From b2e2b4cce7f913957317b5df3eddf930ac8407f8 Mon Sep 17 00:00:00 2001 From: liyang830 Date: Mon, 18 Apr 2022 19:58:57 +0800 Subject: [PATCH 0001/1997] fix attach table dictionaries function name normalizer --- src/Databases/DatabaseOrdinary.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index b5557d9a08d..baf93182a57 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -106,6 +107,7 @@ void DatabaseOrdinary::loadStoredObjects( const auto & name = name_with_path_and_query.first; const auto & path = name_with_path_and_query.second.path; const auto & ast = name_with_path_and_query.second.ast; + FunctionNameNormalizer().visit(ast.get()); const auto & create_query = ast->as(); if (create_query.is_dictionary) @@ -128,6 +130,7 @@ void DatabaseOrdinary::loadStoredObjects( const auto & name = name_with_path_and_query.first; const auto & path = name_with_path_and_query.second.path; const auto & ast = name_with_path_and_query.second.ast; + FunctionNameNormalizer().visit(ast.get()); const auto & create_query = ast->as(); if (!create_query.is_dictionary) @@ -167,6 +170,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables auto ast = parseQueryFromMetadata(log, getContext(), full_path.string(), /*throw_on_error*/ true, /*remove_empty*/ false); if (ast) { + FunctionNameNormalizer().visit(ast.get()); auto * create_query = ast->as(); create_query->setDatabase(database_name); @@ -220,6 +224,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables void DatabaseOrdinary::loadTableFromMetadata(ContextMutablePtr local_context, const String & file_path, const QualifiedTableName & name, const ASTPtr & ast, bool force_restore) { assert(name.database == database_name); + FunctionNameNormalizer().visit(ast.get()); const auto & create_query = ast->as(); tryAttachTable( From f091c8d1d8ff0577e60bf1aed0d3f97d30cdb35f Mon Sep 17 00:00:00 2001 From: liyang830 Date: Fri, 17 Jun 2022 16:42:05 +0800 Subject: [PATCH 0002/1997] fix: attach table normalizer, add test --- src/Databases/DatabaseOrdinary.cpp | 3 -- src/Interpreters/InterpreterCreateQuery.cpp | 1 + .../test_attach_table_normalizer/__init__.py | 0 .../configs/config.xml | 4 ++ .../test_attach_table_normalizer/test.py | 43 +++++++++++++++++++ 5 files changed, 48 insertions(+), 3 deletions(-) create mode 100644 tests/integration/test_attach_table_normalizer/__init__.py create mode 100644 tests/integration/test_attach_table_normalizer/configs/config.xml create mode 100644 tests/integration/test_attach_table_normalizer/test.py diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index baf93182a57..5708ff50323 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -107,7 +107,6 @@ void DatabaseOrdinary::loadStoredObjects( const auto & name = name_with_path_and_query.first; const auto & path = name_with_path_and_query.second.path; const auto & ast = name_with_path_and_query.second.ast; - FunctionNameNormalizer().visit(ast.get()); const auto & create_query = ast->as(); if (create_query.is_dictionary) @@ -170,7 +169,6 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables auto ast = parseQueryFromMetadata(log, getContext(), full_path.string(), /*throw_on_error*/ true, /*remove_empty*/ false); if (ast) { - FunctionNameNormalizer().visit(ast.get()); auto * create_query = ast->as(); create_query->setDatabase(database_name); @@ -224,7 +222,6 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables void DatabaseOrdinary::loadTableFromMetadata(ContextMutablePtr local_context, const String & file_path, const QualifiedTableName & name, const ASTPtr & ast, bool force_restore) { assert(name.database == database_name); - FunctionNameNormalizer().visit(ast.get()); const auto & create_query = ast->as(); tryAttachTable( diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ed996430996..7eb293b1813 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -953,6 +953,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) // Table SQL definition is available even if the table is detached (even permanently) auto query = database->getCreateTableQuery(create.getTable(), getContext()); + FunctionNameNormalizer().visit(query.get()); auto create_query = query->as(); if (!create.is_dictionary && create_query.is_dictionary) diff --git a/tests/integration/test_attach_table_normalizer/__init__.py b/tests/integration/test_attach_table_normalizer/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_attach_table_normalizer/configs/config.xml b/tests/integration/test_attach_table_normalizer/configs/config.xml new file mode 100644 index 00000000000..0500e2ad554 --- /dev/null +++ b/tests/integration/test_attach_table_normalizer/configs/config.xml @@ -0,0 +1,4 @@ + + 1 + 1 + diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py new file mode 100644 index 00000000000..3e86d567c5b --- /dev/null +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -0,0 +1,43 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance('node', main_configs=["configs/config.xml"], with_zookeeper=True) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + +def replace_substring_to_substr(node): + node.exec_in_container(["bash", "-c", "sed -i 's/substring/substr/g' /var/lib/clickhouse/metadata/default/file.sql"], user="root") + +@pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) +def test_attach_substr(started_cluster, engine): + # Initialize + node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") + + # Detach table file + node.query("DETACH TABLE file") + + # Replace subtring to substr + replace_substring_to_substr(node) + + # Attach table file + node.query("ATTACH TABLE file") + +@pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) +def test_attach_substr(started_cluster, engine): + # Initialize + node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") + + # Replace subtring to substr + replace_substring_to_substr(node) + + # Restart clickhouse + node.restart_clickhouse(kill=True) From c7a85d565cb17c068528bdbf38a74d0ab29a1450 Mon Sep 17 00:00:00 2001 From: liyang830 Date: Fri, 17 Jun 2022 17:51:33 +0800 Subject: [PATCH 0003/1997] fix: rename restart test --- tests/integration/test_attach_table_normalizer/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index 3e86d567c5b..5a31801b99c 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -32,7 +32,7 @@ def test_attach_substr(started_cluster, engine): node.query("ATTACH TABLE file") @pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) -def test_attach_substr(started_cluster, engine): +def test_attach_substr_restart(started_cluster, engine): # Initialize node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") From 701c687e7933f77ad51e91fa8bf1ef6ff2282f8d Mon Sep 17 00:00:00 2001 From: liyang830 Date: Sat, 18 Jun 2022 17:13:50 +0800 Subject: [PATCH 0004/1997] fix : test error --- src/Databases/DatabaseOrdinary.cpp | 2 +- tests/integration/test_attach_table_normalizer/test.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 5708ff50323..1477014a869 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -129,7 +129,6 @@ void DatabaseOrdinary::loadStoredObjects( const auto & name = name_with_path_and_query.first; const auto & path = name_with_path_and_query.second.path; const auto & ast = name_with_path_and_query.second.ast; - FunctionNameNormalizer().visit(ast.get()); const auto & create_query = ast->as(); if (!create_query.is_dictionary) @@ -169,6 +168,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables auto ast = parseQueryFromMetadata(log, getContext(), full_path.string(), /*throw_on_error*/ true, /*remove_empty*/ false); if (ast) { + FunctionNameNormalizer().visit(ast.get()); auto * create_query = ast->as(); create_query->setDatabase(database_name); diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index 5a31801b99c..80c4b99dfcc 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -20,12 +20,13 @@ def replace_substring_to_substr(node): @pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) def test_attach_substr(started_cluster, engine): # Initialize + node.query("DROP TABLE IF EXISTS default.file") node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") # Detach table file node.query("DETACH TABLE file") - # Replace subtring to substr + # Replace substring to substr replace_substring_to_substr(node) # Attach table file @@ -34,9 +35,10 @@ def test_attach_substr(started_cluster, engine): @pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) def test_attach_substr_restart(started_cluster, engine): # Initialize + node.query("DROP TABLE IF EXISTS default.file") node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") - # Replace subtring to substr + # Replace substring to substr replace_substring_to_substr(node) # Restart clickhouse From 252e750fd79090dc4fdb8bfb1317d8f8b1f3136c Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 8 Jul 2022 17:57:24 +0200 Subject: [PATCH 0005/1997] Update test.py --- tests/integration/test_attach_table_normalizer/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index 80c4b99dfcc..f2d99588b94 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -3,7 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance('node', main_configs=["configs/config.xml"], with_zookeeper=True) +node = cluster.add_instance('node', main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True) @pytest.fixture(scope="module") From 2de309c34f366967b50aed8e504a6748b7543057 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Nov 2021 00:56:52 +0300 Subject: [PATCH 0006/1997] Add Linux RISC-V 64 build to CI --- cmake/target.cmake | 11 +++++++++++ docker/packager/packager | 18 ++++++++++++++++++ docs/en/development/build-cross-riscv.md | 2 +- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/cmake/target.cmake b/cmake/target.cmake index 0fb5e8a20de..6b78a9253b2 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -33,6 +33,17 @@ if (CMAKE_CROSSCOMPILING) elseif (ARCH_PPC64LE) set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_SENTRY OFF CACHE INTERNAL "") + elseif (ARCH_RISCV64 OFF CACHE INTERNAL "") + # RISC-V support is preliminary + set (GLIBC_COMPATIBILITY OFF CACHE INTERNAL "") + set (ENABLE_LDAP OFF CACHE INTERNAL "") + set (OPENSSL_NO_ASM ON CACHE INTERNAL "") + set (ENABLE_JEMALLOC ON CACHE INTERNAL "") + set (ENABLE_PARQUET OFF CACHE INTERNAL "") + set (USE_UNWIND OFF CACHE INTERNAL "") + set (ENABLE_GRPC OFF CACHE INTERNAL "") + set (ENABLE_HDFS OFF CACHE INTERNAL "") + set (ENABLE_MYSQL OFF CACHE INTERNAL "") endif () elseif (OS_FREEBSD) # FIXME: broken dependencies diff --git a/docker/packager/packager b/docker/packager/packager index 66eb568d460..98b864edbc6 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -130,6 +130,7 @@ def parse_env_variables( ARM_SUFFIX = "-aarch64" FREEBSD_SUFFIX = "-freebsd" PPC_SUFFIX = "-ppc64le" + RISCV_SUFFIX = "-riscv64" result = [] result.append("OUTPUT_DIR=/output") @@ -140,6 +141,7 @@ def parse_env_variables( is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX) is_cross_arm = compiler.endswith(ARM_SUFFIX) is_cross_ppc = compiler.endswith(PPC_SUFFIX) + is_cross_riscv = compiler.endswith(RISCV_SUFFIX) is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) if is_cross_darwin: @@ -186,6 +188,11 @@ def parse_env_variables( cmake_flags.append( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake" ) + elif is_cross_riscv: + cc = compiler[: -len(RISCV_SUFFIX)] + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-riscv64.cmake" + ) else: cc = compiler result.append("DEB_ARCH=amd64") @@ -329,6 +336,7 @@ if __name__ == "__main__": ) parser.add_argument("--output-dir", type=dir_name, required=True) parser.add_argument("--build-type", choices=("debug", ""), default="") +<<<<<<< HEAD parser.add_argument( "--compiler", @@ -352,6 +360,16 @@ if __name__ == "__main__": ) parser.add_argument("--shared-libraries", action="store_true") +======= + parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64", + "clang-12", "clang-12-darwin", "clang-12-darwin-aarch64", "clang-12-aarch64", + "clang-13", "clang-13-darwin", "clang-13-darwin-aarch64", "clang-13-aarch64", + "clang-13-ppc64le", "clang-13-riscv64", + "clang-11-freebsd", "clang-12-freebsd", "clang-13-freebsd", "gcc-11"), default="clang-13") + parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="") + parser.add_argument("--unbundled", action="store_true") + parser.add_argument("--split-binary", action="store_true") +>>>>>>> 8bc7bf3d87 (Add Linux RISC-V 64 build to CI) parser.add_argument("--clang-tidy", action="store_true") parser.add_argument("--cache", choices=("ccache", "distcc", ""), default="") parser.add_argument( diff --git a/docs/en/development/build-cross-riscv.md b/docs/en/development/build-cross-riscv.md index a0b31ff131a..b94b1072f28 100644 --- a/docs/en/development/build-cross-riscv.md +++ b/docs/en/development/build-cross-riscv.md @@ -11,7 +11,7 @@ This is for the case when you have Linux machine and want to use it to build `cl The cross-build for RISC-V 64 is based on the [Build instructions](../development/build.md), follow them first. -## Install Clang-13 +## Install Clang-14 Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup or do ``` From 1bd3b8825c8bf72b0d32ff6a0287f853eebbdcaf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Nov 2021 02:48:38 +0300 Subject: [PATCH 0007/1997] Fix typo --- cmake/target.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/target.cmake b/cmake/target.cmake index 6b78a9253b2..86b060f53e1 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -33,7 +33,7 @@ if (CMAKE_CROSSCOMPILING) elseif (ARCH_PPC64LE) set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_SENTRY OFF CACHE INTERNAL "") - elseif (ARCH_RISCV64 OFF CACHE INTERNAL "") + elseif (ARCH_RISCV64) # RISC-V support is preliminary set (GLIBC_COMPATIBILITY OFF CACHE INTERNAL "") set (ENABLE_LDAP OFF CACHE INTERNAL "") From 1021b756ac33806bec7525bac9a1b45a76d9c507 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Aug 2022 06:12:38 +0200 Subject: [PATCH 0008/1997] Fix conflict --- docker/packager/packager | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/docker/packager/packager b/docker/packager/packager index 98b864edbc6..3769e321ccc 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -336,7 +336,6 @@ if __name__ == "__main__": ) parser.add_argument("--output-dir", type=dir_name, required=True) parser.add_argument("--build-type", choices=("debug", ""), default="") -<<<<<<< HEAD parser.add_argument( "--compiler", @@ -347,6 +346,7 @@ if __name__ == "__main__": "clang-14-darwin-aarch64", "clang-14-aarch64", "clang-14-ppc64le", + "clang-14-riscv64", "clang-14-freebsd", "gcc-11", ), @@ -360,16 +360,6 @@ if __name__ == "__main__": ) parser.add_argument("--shared-libraries", action="store_true") -======= - parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64", - "clang-12", "clang-12-darwin", "clang-12-darwin-aarch64", "clang-12-aarch64", - "clang-13", "clang-13-darwin", "clang-13-darwin-aarch64", "clang-13-aarch64", - "clang-13-ppc64le", "clang-13-riscv64", - "clang-11-freebsd", "clang-12-freebsd", "clang-13-freebsd", "gcc-11"), default="clang-13") - parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="") - parser.add_argument("--unbundled", action="store_true") - parser.add_argument("--split-binary", action="store_true") ->>>>>>> 8bc7bf3d87 (Add Linux RISC-V 64 build to CI) parser.add_argument("--clang-tidy", action="store_true") parser.add_argument("--cache", choices=("ccache", "distcc", ""), default="") parser.add_argument( From edc99648ade4ef39e633da31b97995f6b5d3cd5c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Aug 2022 06:14:43 +0200 Subject: [PATCH 0009/1997] Add build to CI --- tests/ci/ci_config.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 5b8f3b4227e..8dd4843cb88 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -177,6 +177,17 @@ CI_CONFIG = { "tidy": "disable", "with_coverage": False, }, + "binary_riscv64": { + "compiler": "clang-14-riscv64", + "build_type": "", + "sanitizer": "", + "package_type": "binary", + "static_binary_name": "riscv64", + "bundled": "bundled", + "libraries": "static", + "tidy": "disable", + "with_coverage": False, + }, }, "builds_report_config": { "ClickHouse build check": [ @@ -198,6 +209,7 @@ CI_CONFIG = { "binary_freebsd", "binary_darwin_aarch64", "binary_ppc64le", + "binary_riscv64", ], }, "tests_config": { From ec334a3a0866a773b2bb34f1d08be789831df33d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Aug 2022 09:45:38 +0200 Subject: [PATCH 0010/1997] Programming in YAML with copy-paste --- .github/workflows/master.yml | 49 +++++++++++++++++++++++++++++- .github/workflows/pull_request.yml | 47 +++++++++++++++++++++++++++- 2 files changed, 94 insertions(+), 2 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 2acc1468328..69a28350945 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -923,6 +923,53 @@ jobs: # shellcheck disable=SC2046 docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinRISCV64: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_riscv64 + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + with: + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + git -C "$GITHUB_WORKSPACE" submodule sync --recursive + git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + # shellcheck disable=SC2046 + docker kill $(docker ps -q) ||: + # shellcheck disable=SC2046 + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -1009,8 +1056,8 @@ jobs: - BuilderBinDarwin - BuilderBinDarwinAarch64 - BuilderBinFreeBSD - # - BuilderBinGCC - BuilderBinPPC64 + - BuilderBinRISCV64 - BuilderBinClangTidy - BuilderDebShared runs-on: [self-hosted, style-checker] diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 513df8487c4..f17c25cd164 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -935,6 +935,51 @@ jobs: # shellcheck disable=SC2046 docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinRISCV64: + needs: [DockerHubPush, FastTest, StyleCheck] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_riscv64 + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Build + run: | + git -C "$GITHUB_WORKSPACE" submodule sync --recursive + git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + # shellcheck disable=SC2046 + docker kill $(docker ps -q) ||: + # shellcheck disable=SC2046 + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -1021,8 +1066,8 @@ jobs: - BuilderBinDarwin - BuilderBinDarwinAarch64 - BuilderBinFreeBSD - # - BuilderBinGCC - BuilderBinPPC64 + - BuilderBinRISCV64 - BuilderBinClangTidy - BuilderDebShared runs-on: [self-hosted, style-checker] From a517d9d65fcd6646944d45d295284edf3c87cf99 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Aug 2022 04:23:56 +0200 Subject: [PATCH 0011/1997] Maybe make it possible --- docker/packager/binary/Dockerfile | 9 +++++++++ docker/packager/packager | 4 ++-- tests/ci/ci_config.py | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 74919bb2100..67e61f2036b 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -44,6 +44,15 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ # A cross-linker for RISC-V 64 (we need it, because LLVM's LLD does not work): RUN apt-get install binutils-riscv64-linux-gnu +# Build LLVM from trunk, because only that version can build the RISC-V binaries successfully. +# It can be also used to check it for regular builds. +RUN git clone git@github.com:llvm/llvm-project.git \ + && mkdir llvm-build \ + && cd llvm-build \ + && cmake -GNinja -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all -DLLVM_TARGETS_TO_BUILD=all ../llvm-project/llvm \ + && ninja \ + && ninja install + # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH ARG NFPM_VERSION=2.16.0 diff --git a/docker/packager/packager b/docker/packager/packager index 3769e321ccc..03f3bd80c96 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -189,7 +189,7 @@ def parse_env_variables( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake" ) elif is_cross_riscv: - cc = compiler[: -len(RISCV_SUFFIX)] + cc = "clang" cmake_flags.append( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-riscv64.cmake" ) @@ -346,7 +346,7 @@ if __name__ == "__main__": "clang-14-darwin-aarch64", "clang-14-aarch64", "clang-14-ppc64le", - "clang-14-riscv64", + "clang-trunk-riscv64", "clang-14-freebsd", "gcc-11", ), diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 8dd4843cb88..f66eba7d966 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -178,7 +178,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_riscv64": { - "compiler": "clang-14-riscv64", + "compiler": "clang-trunk-riscv64", "build_type": "", "sanitizer": "", "package_type": "binary", From e702adf0c967cafdc03d80b23d83a477c52eab09 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Aug 2022 06:23:00 +0200 Subject: [PATCH 0012/1997] Fix error --- docker/packager/binary/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 67e61f2036b..fdb2b324e06 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -46,7 +46,7 @@ RUN apt-get install binutils-riscv64-linux-gnu # Build LLVM from trunk, because only that version can build the RISC-V binaries successfully. # It can be also used to check it for regular builds. -RUN git clone git@github.com:llvm/llvm-project.git \ +RUN git clone https://github.com/llvm/llvm-project.git \ && mkdir llvm-build \ && cd llvm-build \ && cmake -GNinja -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all -DLLVM_TARGETS_TO_BUILD=all ../llvm-project/llvm \ From 7b99581e5c2a9d9e4cde387ae7cd6928cb9918bc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Aug 2022 07:47:42 +0200 Subject: [PATCH 0013/1997] Better machines to build Docker images --- .github/workflows/backport_branches.yml | 4 ++-- .github/workflows/docs_check.yml | 4 ++-- .github/workflows/docs_release.yml | 4 ++-- .github/workflows/master.yml | 4 ++-- .github/workflows/nightly.yml | 4 ++-- .github/workflows/pull_request.yml | 4 ++-- .github/workflows/release_branches.yml | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index e1b2b1fad01..bd399e48100 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -22,7 +22,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -39,7 +39,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml index b50584a2c01..850f690f44d 100644 --- a/.github/workflows/docs_check.yml +++ b/.github/workflows/docs_check.yml @@ -31,7 +31,7 @@ jobs: python3 run_check.py DockerHubPushAarch64: needs: CheckLabels - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -49,7 +49,7 @@ jobs: path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: needs: CheckLabels - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | diff --git a/.github/workflows/docs_release.yml b/.github/workflows/docs_release.yml index e0fdb0c2f7b..cb2f2b8453d 100644 --- a/.github/workflows/docs_release.yml +++ b/.github/workflows/docs_release.yml @@ -20,7 +20,7 @@ concurrency: workflow_dispatch: jobs: DockerHubPushAarch64: - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -37,7 +37,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 69a28350945..552272b38e5 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -22,7 +22,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -39,7 +39,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index e712ada1551..bff937b832f 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -11,7 +11,7 @@ env: jobs: DockerHubPushAarch64: - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -28,7 +28,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index f17c25cd164..c10767c55e6 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -47,7 +47,7 @@ jobs: python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: needs: CheckLabels - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -65,7 +65,7 @@ jobs: path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: needs: CheckLabels - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index fdfedc56f5d..1f082f0ab64 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -13,7 +13,7 @@ on: # yamllint disable-line rule:truthy jobs: DockerHubPushAarch64: - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -30,7 +30,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | From 2b82916175f1eda4d0456fcbcb7784b5d6ba377a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Aug 2022 09:13:04 +0200 Subject: [PATCH 0014/1997] Do not put garbage in the Docker image --- docker/packager/binary/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index fdb2b324e06..cbab3d501d6 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -51,7 +51,8 @@ RUN git clone https://github.com/llvm/llvm-project.git \ && cd llvm-build \ && cmake -GNinja -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all -DLLVM_TARGETS_TO_BUILD=all ../llvm-project/llvm \ && ninja \ - && ninja install + && ninja install \ + && cd .. && rm -rf llvm-build llvm-project # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH From 0eca4d9560ec20290aa35ae9765bf293dbfe01ce Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 20 Aug 2022 18:50:27 +0200 Subject: [PATCH 0015/1997] Maybe fix error --- contrib/sysroot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/sysroot b/contrib/sysroot index e9fb375d0a1..38c88421249 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit e9fb375d0a1e5ebfd74c043f088f2342552103f8 +Subproject commit 38c8842124940a26e7e851c083cd61c651a83ee3 From c08766aa3476e7faea38187061993eeb1b76454e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Aug 2022 20:51:59 +0200 Subject: [PATCH 0016/1997] Revert "Do not put garbage in the Docker image" This reverts commit 2b82916175f1eda4d0456fcbcb7784b5d6ba377a. --- docker/packager/binary/Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index cbab3d501d6..fdb2b324e06 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -51,8 +51,7 @@ RUN git clone https://github.com/llvm/llvm-project.git \ && cd llvm-build \ && cmake -GNinja -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all -DLLVM_TARGETS_TO_BUILD=all ../llvm-project/llvm \ && ninja \ - && ninja install \ - && cd .. && rm -rf llvm-build llvm-project + && ninja install # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH From 47ccb28ad387642bfc1549642dd43bfea5c06f4a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Aug 2022 20:52:01 +0200 Subject: [PATCH 0017/1997] Revert "Better machines to build Docker images" This reverts commit 7b99581e5c2a9d9e4cde387ae7cd6928cb9918bc. --- .github/workflows/backport_branches.yml | 4 ++-- .github/workflows/docs_check.yml | 4 ++-- .github/workflows/docs_release.yml | 4 ++-- .github/workflows/master.yml | 4 ++-- .github/workflows/nightly.yml | 4 ++-- .github/workflows/pull_request.yml | 4 ++-- .github/workflows/release_branches.yml | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index bd399e48100..e1b2b1fad01 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -22,7 +22,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -39,7 +39,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml index 850f690f44d..b50584a2c01 100644 --- a/.github/workflows/docs_check.yml +++ b/.github/workflows/docs_check.yml @@ -31,7 +31,7 @@ jobs: python3 run_check.py DockerHubPushAarch64: needs: CheckLabels - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -49,7 +49,7 @@ jobs: path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: needs: CheckLabels - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | diff --git a/.github/workflows/docs_release.yml b/.github/workflows/docs_release.yml index cb2f2b8453d..e0fdb0c2f7b 100644 --- a/.github/workflows/docs_release.yml +++ b/.github/workflows/docs_release.yml @@ -20,7 +20,7 @@ concurrency: workflow_dispatch: jobs: DockerHubPushAarch64: - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -37,7 +37,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 552272b38e5..69a28350945 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -22,7 +22,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -39,7 +39,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index bff937b832f..e712ada1551 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -11,7 +11,7 @@ env: jobs: DockerHubPushAarch64: - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -28,7 +28,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index c10767c55e6..f17c25cd164 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -47,7 +47,7 @@ jobs: python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: needs: CheckLabels - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -65,7 +65,7 @@ jobs: path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: needs: CheckLabels - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 1f082f0ab64..fdfedc56f5d 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -13,7 +13,7 @@ on: # yamllint disable-line rule:truthy jobs: DockerHubPushAarch64: - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -30,7 +30,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | From dd9085346af943c9dca0ab18fe7f4c16fda38ae5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Aug 2022 20:52:03 +0200 Subject: [PATCH 0018/1997] Revert "Fix error" This reverts commit e702adf0c967cafdc03d80b23d83a477c52eab09. --- docker/packager/binary/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index fdb2b324e06..67e61f2036b 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -46,7 +46,7 @@ RUN apt-get install binutils-riscv64-linux-gnu # Build LLVM from trunk, because only that version can build the RISC-V binaries successfully. # It can be also used to check it for regular builds. -RUN git clone https://github.com/llvm/llvm-project.git \ +RUN git clone git@github.com:llvm/llvm-project.git \ && mkdir llvm-build \ && cd llvm-build \ && cmake -GNinja -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all -DLLVM_TARGETS_TO_BUILD=all ../llvm-project/llvm \ From cb2e5f316c442ac0528980f5204d0afa45d030ac Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Aug 2022 20:52:04 +0200 Subject: [PATCH 0019/1997] Revert "Maybe make it possible" This reverts commit a517d9d65fcd6646944d45d295284edf3c87cf99. --- docker/packager/binary/Dockerfile | 9 --------- docker/packager/packager | 4 ++-- tests/ci/ci_config.py | 2 +- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 67e61f2036b..74919bb2100 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -44,15 +44,6 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ # A cross-linker for RISC-V 64 (we need it, because LLVM's LLD does not work): RUN apt-get install binutils-riscv64-linux-gnu -# Build LLVM from trunk, because only that version can build the RISC-V binaries successfully. -# It can be also used to check it for regular builds. -RUN git clone git@github.com:llvm/llvm-project.git \ - && mkdir llvm-build \ - && cd llvm-build \ - && cmake -GNinja -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all -DLLVM_TARGETS_TO_BUILD=all ../llvm-project/llvm \ - && ninja \ - && ninja install - # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH ARG NFPM_VERSION=2.16.0 diff --git a/docker/packager/packager b/docker/packager/packager index 03f3bd80c96..3769e321ccc 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -189,7 +189,7 @@ def parse_env_variables( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake" ) elif is_cross_riscv: - cc = "clang" + cc = compiler[: -len(RISCV_SUFFIX)] cmake_flags.append( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-riscv64.cmake" ) @@ -346,7 +346,7 @@ if __name__ == "__main__": "clang-14-darwin-aarch64", "clang-14-aarch64", "clang-14-ppc64le", - "clang-trunk-riscv64", + "clang-14-riscv64", "clang-14-freebsd", "gcc-11", ), diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index aa5762d7536..7110ff628ad 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -178,7 +178,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_riscv64": { - "compiler": "clang-trunk-riscv64", + "compiler": "clang-14-riscv64", "build_type": "", "sanitizer": "", "package_type": "binary", From be37730906230fcc4255a9802231cc4d3fe11679 Mon Sep 17 00:00:00 2001 From: zvonand Date: Fri, 27 Jan 2023 16:23:27 +0300 Subject: [PATCH 0020/1997] upd --- src/Common/DateLUT.cpp | 10 +++++++++- src/Common/DateLUT.h | 24 ++++++++++++++++++++++-- src/Core/Settings.h | 1 + 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/src/Common/DateLUT.cpp b/src/Common/DateLUT.cpp index ae6127670e5..fd10b799b66 100644 --- a/src/Common/DateLUT.cpp +++ b/src/Common/DateLUT.cpp @@ -7,6 +7,8 @@ #include #include +#include +#include namespace @@ -147,7 +149,7 @@ DateLUT::DateLUT() } -const DateLUTImpl & DateLUT::getImplementation(const std::string & time_zone) const +const ALWAYS_INLINE DateLUTImpl & DateLUT::getImplementation(const std::string & time_zone) const { std::lock_guard lock(mutex); @@ -163,3 +165,9 @@ DateLUT & DateLUT::getInstance() static DateLUT ret; return ret; } + +std::string DateLUT::extractTimezoneFromContext(const DB::ContextPtr query_context) +{ + std::string ret = query_context->getSettingsRef().implicit_timezone.value; + return ret; +} diff --git a/src/Common/DateLUT.h b/src/Common/DateLUT.h index b7ba37c2bec..bd7969bffa6 100644 --- a/src/Common/DateLUT.h +++ b/src/Common/DateLUT.h @@ -5,6 +5,10 @@ #include #include +// +//#include "Interpreters/Context_fwd.h" +//#include "Interpreters/Context.h" +#include "Common/CurrentThread.h" #include #include @@ -20,16 +24,30 @@ public: static ALWAYS_INLINE const DateLUTImpl & instance() // -V1071 { const auto & date_lut = getInstance(); + + if (DB::CurrentThread::isInitialized()) + { + const auto query_context = DB::CurrentThread::get().getQueryContext(); + + if (query_context) + { + auto implicit_timezone = extractTimezoneFromContext(query_context); + + if (!implicit_timezone.empty()) + return instance(implicit_timezone); + } + } + return *date_lut.default_impl.load(std::memory_order_acquire); } /// Return singleton DateLUTImpl instance for a given time zone. static ALWAYS_INLINE const DateLUTImpl & instance(const std::string & time_zone) { - const auto & date_lut = getInstance(); if (time_zone.empty()) - return *date_lut.default_impl.load(std::memory_order_acquire); + return instance(); + const auto & date_lut = getInstance(); return date_lut.getImplementation(time_zone); } static void setDefaultTimezone(const std::string & time_zone) @@ -45,6 +63,8 @@ protected: private: static DateLUT & getInstance(); + static std::string extractTimezoneFromContext(const DB::ContextPtr query_context); + const DateLUTImpl & getImplementation(const std::string & time_zone) const; using DateLUTImplPtr = std::unique_ptr; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 1948a6da012..2da5791ff81 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -688,6 +688,7 @@ class IColumn; M(Float, insert_keeper_fault_injection_probability, 0.0f, "Approximate probability of failure for a keeper request during insert. Valid value is in interval [0.0f, 1.0f]", 0) \ M(UInt64, insert_keeper_fault_injection_seed, 0, "0 - random seed, otherwise the setting value", 0) \ M(Bool, force_aggregation_in_order, false, "Force use of aggregation in order on remote nodes during distributed aggregation. PLEASE, NEVER CHANGE THIS SETTING VALUE MANUALLY!", IMPORTANT) \ + M(String, implicit_timezone, "", "Use specified timezone for interpreting Date and DateTime instead of server's timezone.", 0) \ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. From 537721e297e4ba94f30d5bd76ca0a7b01a080a5c Mon Sep 17 00:00:00 2001 From: zvonand Date: Fri, 27 Jan 2023 23:40:43 +0300 Subject: [PATCH 0021/1997] re-add tests --- .../0_stateless/02538_implicit_timezone.reference | 3 +++ tests/queries/0_stateless/02538_implicit_timezone.sql | 9 +++++++++ 2 files changed, 12 insertions(+) create mode 100644 tests/queries/0_stateless/02538_implicit_timezone.reference create mode 100644 tests/queries/0_stateless/02538_implicit_timezone.sql diff --git a/tests/queries/0_stateless/02538_implicit_timezone.reference b/tests/queries/0_stateless/02538_implicit_timezone.reference new file mode 100644 index 00000000000..8ed8024f652 --- /dev/null +++ b/tests/queries/0_stateless/02538_implicit_timezone.reference @@ -0,0 +1,3 @@ +1999-12-12 18:23:23.123 +1999-12-12 23:23:23.123 +1999-12-13 04:23:23.123 diff --git a/tests/queries/0_stateless/02538_implicit_timezone.sql b/tests/queries/0_stateless/02538_implicit_timezone.sql new file mode 100644 index 00000000000..663b218d235 --- /dev/null +++ b/tests/queries/0_stateless/02538_implicit_timezone.sql @@ -0,0 +1,9 @@ +SET implicit_timezone = 'Asia/Novosibirsk'; + +SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich'); + +SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS implicit_timezone = 'Europe/Zurich'; + +SET implicit_timezone = 'Europe/Zurich'; + +SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Asia/Novosibirsk'); \ No newline at end of file From f29700bd2fe1fc79cb6fd5bf2b31129119e6978c Mon Sep 17 00:00:00 2001 From: LiuYangkuan Date: Wed, 1 Feb 2023 18:50:43 +0800 Subject: [PATCH 0022/1997] use IDisk to do IO in Keeper's snapshots and logs --- programs/keeper-converter/KeeperConverter.cpp | 3 +- src/Coordination/Changelog.cpp | 151 +++++++++++------- src/Coordination/Changelog.h | 6 +- src/Coordination/KeeperLogStore.cpp | 11 +- src/Coordination/KeeperLogStore.h | 3 + src/Coordination/KeeperServer.cpp | 23 ++- src/Coordination/KeeperSnapshotManager.cpp | 91 ++++++----- src/Coordination/KeeperSnapshotManager.h | 24 ++- src/Coordination/KeeperStateMachine.cpp | 4 +- src/Coordination/KeeperStateMachine.h | 2 +- src/Coordination/KeeperStateManager.cpp | 71 ++++---- src/Coordination/KeeperStateManager.h | 8 +- src/IO/ZstdDeflatingAppendableWriteBuffer.cpp | 2 +- src/IO/ZstdDeflatingAppendableWriteBuffer.h | 4 +- 14 files changed, 244 insertions(+), 159 deletions(-) diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp index 7d25c1d5017..c81e61685fd 100644 --- a/programs/keeper-converter/KeeperConverter.cpp +++ b/programs/keeper-converter/KeeperConverter.cpp @@ -9,6 +9,7 @@ #include #include #include +#include int mainEntryClickHouseKeeperConverter(int argc, char ** argv) @@ -51,7 +52,7 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv) DB::SnapshotMetadataPtr snapshot_meta = std::make_shared(storage.getZXID(), 1, std::make_shared()); DB::KeeperStorageSnapshot snapshot(&storage, snapshot_meta); - DB::KeeperSnapshotManager manager(options["output-dir"].as(), 1, keeper_context); + DB::KeeperSnapshotManager manager(std::make_shared("Keeper-snapshots", options["output-dir"].as(), 0), 1, keeper_context); auto snp = manager.serializeSnapshotToBuffer(snapshot); auto path = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID()); std::cout << "Snapshot serialized to path:" << path << std::endl; diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 142a7209b42..2d3fb9cba47 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -31,12 +31,9 @@ namespace constexpr auto DEFAULT_PREFIX = "changelog"; -std::string formatChangelogPath( - const std::string & prefix, const std::string & name_prefix, uint64_t from_index, uint64_t to_index, const std::string & extension) +inline std::string formatChangelogPath(const std::string & name_prefix, uint64_t from_index, uint64_t to_index, const std::string & extension) { - std::filesystem::path path(prefix); - path /= std::filesystem::path(fmt::format("{}_{}_{}.{}", name_prefix, from_index, to_index, extension)); - return path; + return fmt::format("{}_{}_{}.{}", name_prefix, from_index, to_index, extension); } ChangelogFileDescriptionPtr getChangelogFileDescription(const std::filesystem::path & path) @@ -88,11 +85,11 @@ class ChangelogWriter public: ChangelogWriter( std::map & existing_changelogs_, - const std::filesystem::path & changelogs_dir_, + DiskPtr disk_, LogFileSettings log_file_settings_) : existing_changelogs(existing_changelogs_) , log_file_settings(log_file_settings_) - , changelogs_dir(changelogs_dir_) + , disk(disk_) , log(&Poco::Logger::get("Changelog")) { } @@ -109,7 +106,7 @@ public: file_description->expectedEntriesCountInLog()); // we have a file we need to finalize first - if (tryGetFileBuffer() && prealloc_done) + if (tryGetFileBaseBuffer() && prealloc_done) { finalizeCurrentFile(); @@ -121,18 +118,16 @@ public: && *last_index_written != current_file_description->to_log_index) { auto new_path = formatChangelogPath( - changelogs_dir, current_file_description->prefix, current_file_description->from_log_index, *last_index_written, current_file_description->extension); - std::filesystem::rename(current_file_description->path, new_path); + disk->moveFile(current_file_description->path, new_path); current_file_description->path = std::move(new_path); } } - file_buf = std::make_unique( - file_description->path, DBMS_DEFAULT_BUFFER_SIZE, mode == WriteMode::Rewrite ? -1 : (O_APPEND | O_CREAT | O_WRONLY)); + file_buf = disk->writeFile(file_description->path, DBMS_DEFAULT_BUFFER_SIZE, mode); last_index_written.reset(); current_file_description = std::move(file_description); @@ -148,12 +143,15 @@ public: } } - bool isFileSet() const { return tryGetFileBuffer() != nullptr; } - + /// There is bug when compressed_buffer has value, file_buf's ownership transfer to compressed_buffer + bool isFileSet() const + { + return compressed_buffer.get() != nullptr || file_buf.get() != nullptr; + } bool appendRecord(ChangelogRecord && record) { - const auto * file_buffer = tryGetFileBuffer(); + const auto * file_buffer = tryGetFileBaseBuffer(); assert(file_buffer && current_file_description); assert(record.header.index - getStartIndex() <= current_file_description->expectedEntriesCountInLog()); @@ -207,7 +205,7 @@ public: void flush() { - auto * file_buffer = tryGetFileBuffer(); + auto * file_buffer = tryGetFileBaseBuffer(); /// Fsync file system if needed if (file_buffer && log_file_settings.force_sync) file_buffer->sync(); @@ -232,7 +230,6 @@ public: new_description->extension += "." + toContentEncodingName(CompressionMethod::Zstd); new_description->path = formatChangelogPath( - changelogs_dir, new_description->prefix, new_start_log_index, new_start_log_index + log_file_settings.rotate_interval - 1, @@ -254,14 +251,13 @@ private: void finalizeCurrentFile() { - const auto * file_buffer = tryGetFileBuffer(); - assert(file_buffer && prealloc_done); + assert(prealloc_done); assert(current_file_description); // compact can delete the file and we don't need to do anything if (current_file_description->deleted) { - LOG_WARNING(log, "Log {} is already deleted", file_buffer->getFileName()); + LOG_WARNING(log, "Log {} is already deleted", current_file_description->path); return; } @@ -270,7 +266,8 @@ private: flush(); - if (log_file_settings.max_size != 0) + const auto * file_buffer = tryGetFileBuffer(); + if (log_file_settings.max_size != 0 && file_buffer) ftruncate(file_buffer->getFD(), initial_file_size + file_buffer->count()); if (log_file_settings.compress_logs) @@ -281,6 +278,8 @@ private: WriteBuffer & getBuffer() { + /// TODO: unify compressed_buffer and file_buf, + /// compressed_buffer can use its NestedBuffer directly if compress_logs=false if (compressed_buffer) return *compressed_buffer; @@ -310,10 +309,15 @@ private: if (compressed_buffer) return dynamic_cast(compressed_buffer->getNestedBuffer()); - if (file_buf) - return file_buf.get(); + return dynamic_cast(file_buf.get()); + } - return nullptr; + WriteBufferFromFileBase * tryGetFileBaseBuffer() + { + if (compressed_buffer) + return dynamic_cast(compressed_buffer->getNestedBuffer()); + + return file_buf.get(); } void tryPreallocateForFile() @@ -325,13 +329,22 @@ private: return; } - const auto & file_buffer = getFileBuffer(); + const auto * file_buffer = tryGetFileBuffer(); + + if (!file_buffer) + { + initial_file_size = 0; + prealloc_done = true; + LOG_WARNING(log, "Could not preallocate space on disk {} using fallocate", disk->getName()); + return; + } + #ifdef OS_LINUX { int res = -1; do { - res = fallocate(file_buffer.getFD(), FALLOC_FL_KEEP_SIZE, 0, log_file_settings.max_size + log_file_settings.overallocate_size); + res = fallocate(file_buffer->getFD(), FALLOC_FL_KEEP_SIZE, 0, log_file_settings.max_size + log_file_settings.overallocate_size); } while (res < 0 && errno == EINTR); if (res != 0) @@ -346,7 +359,7 @@ private: } } #endif - initial_file_size = getSizeFromFileDescriptor(file_buffer.getFD()); + initial_file_size = getSizeFromFileDescriptor(file_buffer->getFD()); prealloc_done = true; } @@ -354,7 +367,7 @@ private: std::map & existing_changelogs; ChangelogFileDescriptionPtr current_file_description{nullptr}; - std::unique_ptr file_buf; + std::unique_ptr file_buf; std::optional last_index_written; size_t initial_file_size{0}; @@ -364,7 +377,7 @@ private: LogFileSettings log_file_settings; - const std::filesystem::path changelogs_dir; + DiskPtr disk; Poco::Logger * const log; }; @@ -394,10 +407,12 @@ struct ChangelogReadResult class ChangelogReader { public: - explicit ChangelogReader(const std::string & filepath_) : filepath(filepath_) + explicit ChangelogReader(DiskPtr disk_, const std::string & filepath_) + : disk(disk_) + , filepath(filepath_) { auto compression_method = chooseCompressionMethod(filepath, ""); - auto read_buffer_from_file = std::make_unique(filepath); + auto read_buffer_from_file = disk->readFile(filepath); read_buf = wrapReadBufferWithCompressionMethod(std::move(read_buffer_from_file), compression_method); } @@ -493,37 +508,35 @@ public: } private: + DiskPtr disk; std::string filepath; std::unique_ptr read_buf; }; Changelog::Changelog( - const std::string & changelogs_dir_, + DiskPtr disk_, Poco::Logger * log_, LogFileSettings log_file_settings) - : changelogs_dir(changelogs_dir_) - , changelogs_detached_dir(changelogs_dir / "detached") + : disk(disk_) + , changelogs_detached_dir("detached") , rotate_interval(log_file_settings.rotate_interval) , log(log_) , write_operations(std::numeric_limits::max()) , append_completion_queue(std::numeric_limits::max()) { /// Load all files in changelog directory - namespace fs = std::filesystem; - if (!fs::exists(changelogs_dir)) - fs::create_directories(changelogs_dir); - for (const auto & p : fs::directory_iterator(changelogs_dir)) + for (auto it = disk->iterateDirectory(""); it->isValid(); it->next()) { - if (p == changelogs_detached_dir) + if (it->name() == changelogs_detached_dir) continue; - auto file_description = getChangelogFileDescription(p.path()); + auto file_description = getChangelogFileDescription(it->path()); existing_changelogs[file_description->from_log_index] = std::move(file_description); } if (existing_changelogs.empty()) - LOG_WARNING(log, "No logs exists in {}. It's Ok if it's the first run of clickhouse-keeper.", changelogs_dir.generic_string()); + LOG_WARNING(log, "No logs exists in {}. It's Ok if it's the first run of clickhouse-keeper.", disk->getPath()); clean_log_thread = ThreadFromGlobalPool([this] { cleanLogThread(); }); @@ -532,7 +545,7 @@ Changelog::Changelog( append_completion_thread = ThreadFromGlobalPool([this] { appendCompletionThread(); }); current_writer = std::make_unique( - existing_changelogs, changelogs_dir, log_file_settings); + existing_changelogs, disk, log_file_settings); } void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uint64_t logs_to_keep) @@ -604,7 +617,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin break; } - ChangelogReader reader(changelog_description.path); + ChangelogReader reader(disk, changelog_description.path); last_log_read_result = reader.readChangelog(logs, start_to_read_from, log); last_log_read_result->log_start_index = changelog_description.from_log_index; @@ -671,7 +684,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin if (last_log_read_result->last_read_index == 0 || last_log_read_result->error) /// If it's broken log then remove it { LOG_INFO(log, "Removing chagelog {} because it's empty or read finished with error", description->path); - std::filesystem::remove(description->path); + disk->removeFile(description->path); existing_changelogs.erase(last_log_read_result->log_start_index); std::erase_if(logs, [last_log_read_result](const auto & item) { return item.first >= last_log_read_result->log_start_index; }); } @@ -691,6 +704,9 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin void Changelog::initWriter(ChangelogFileDescriptionPtr description) { + if (description->expectedEntriesCountInLog() != rotate_interval) + LOG_TRACE(log, "Looks like rotate_logs_interval was changed, current {}, expected entries in last log {}", rotate_interval, description->expectedEntriesCountInLog()); + LOG_TRACE(log, "Continue to write into {}", description->path); current_writer->setFile(std::move(description), WriteMode::Append); } @@ -715,20 +731,20 @@ std::string getCurrentTimestampFolder() void Changelog::removeExistingLogs(ChangelogIter begin, ChangelogIter end) { - const auto timestamp_folder = changelogs_detached_dir / getCurrentTimestampFolder(); + const auto timestamp_folder = (fs::path(changelogs_detached_dir) / getCurrentTimestampFolder()).generic_string(); for (auto itr = begin; itr != end;) { - if (!std::filesystem::exists(timestamp_folder)) + if (!disk->exists(timestamp_folder)) { - LOG_WARNING(log, "Moving broken logs to {}", timestamp_folder.generic_string()); - std::filesystem::create_directories(timestamp_folder); + LOG_WARNING(log, "Moving broken logs to {}", timestamp_folder); + disk->createDirectories(timestamp_folder); } LOG_WARNING(log, "Removing changelog {}", itr->second->path); const std::filesystem::path & path = itr->second->path; const auto new_path = timestamp_folder / path.filename(); - std::filesystem::rename(path, new_path); + disk->moveFile(path.generic_string(), new_path.generic_string()); itr = existing_changelogs.erase(itr); } } @@ -885,7 +901,7 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry) auto to_remove_itr = existing_changelogs.upper_bound(index); for (auto itr = to_remove_itr; itr != existing_changelogs.end();) { - std::filesystem::remove(itr->second->path); + disk->removeFile(itr->second->path); itr = existing_changelogs.erase(itr); } } @@ -937,12 +953,19 @@ void Changelog::compact(uint64_t up_to_log_index) /// If failed to push to queue for background removing, then we will remove it now if (!log_files_to_delete_queue.tryPush(changelog_description.path, 1)) { - std::error_code ec; - std::filesystem::remove(changelog_description.path, ec); - if (ec) - LOG_WARNING(log, "Failed to remove changelog {} in compaction, error message: {}", changelog_description.path, ec.message()); - else - LOG_INFO(log, "Removed changelog {} because of compaction", changelog_description.path); + try + { + disk->removeFile(itr->second->path); + LOG_INFO(log, "Removed changelog {} because of compaction.", itr->second->path); + } + catch (Exception & e) + { + LOG_WARNING(log, "Failed to remove changelog {} in compaction, error message: {}", itr->second->path, e.message()); + } + catch (...) + { + tryLogCurrentException(log); + } } changelog_description.deleted = true; @@ -1135,11 +1158,19 @@ void Changelog::cleanLogThread() std::string path; while (log_files_to_delete_queue.pop(path)) { - std::error_code ec; - if (std::filesystem::remove(path, ec)) + try + { + disk->removeFile(path); LOG_INFO(log, "Removed changelog {} because of compaction.", path); - else - LOG_WARNING(log, "Failed to remove changelog {} in compaction, error message: {}", path, ec.message()); + } + catch (Exception & e) + { + LOG_WARNING(log, "Failed to remove changelog {} in compaction, error message: {}", path, e.message()); + } + catch (...) + { + tryLogCurrentException(log); + } } } diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index 288f71bb915..052eba45858 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -86,7 +86,7 @@ class Changelog { public: Changelog( - const std::string & changelogs_dir_, + DiskPtr disk_, Poco::Logger * log_, LogFileSettings log_file_settings); @@ -168,8 +168,8 @@ private: /// Clean useless log files in a background thread void cleanLogThread(); - const std::filesystem::path changelogs_dir; - const std::filesystem::path changelogs_detached_dir; + DiskPtr disk; + const String changelogs_detached_dir; const uint64_t rotate_interval; Poco::Logger * log; diff --git a/src/Coordination/KeeperLogStore.cpp b/src/Coordination/KeeperLogStore.cpp index d1bd2f9db18..fcf9400a1d9 100644 --- a/src/Coordination/KeeperLogStore.cpp +++ b/src/Coordination/KeeperLogStore.cpp @@ -1,13 +1,13 @@ #include #include +#include namespace DB { -KeeperLogStore::KeeperLogStore( - const std::string & changelogs_path, LogFileSettings log_file_settings) +KeeperLogStore::KeeperLogStore(DiskPtr disk_, LogFileSettings log_file_settings) : log(&Poco::Logger::get("KeeperLogStore")) - , changelog(changelogs_path, log, log_file_settings) + , changelog(disk_, log, log_file_settings) { if (log_file_settings.force_sync) LOG_INFO(log, "force_sync enabled"); @@ -15,6 +15,11 @@ KeeperLogStore::KeeperLogStore( LOG_INFO(log, "force_sync disabled"); } +KeeperLogStore::KeeperLogStore(const std::string & changelogs_path, LogFileSettings log_file_settings) + : KeeperLogStore(std::make_shared("Keeper-logs", changelogs_path, 0), log_file_settings) +{ +} + uint64_t KeeperLogStore::start_index() const { std::lock_guard lock(changelog_lock); diff --git a/src/Coordination/KeeperLogStore.h b/src/Coordination/KeeperLogStore.h index 108241e024e..c2428413753 100644 --- a/src/Coordination/KeeperLogStore.h +++ b/src/Coordination/KeeperLogStore.h @@ -14,6 +14,9 @@ namespace DB class KeeperLogStore : public nuraft::log_store { public: + KeeperLogStore(DiskPtr disk_, LogFileSettings log_file_settings); + + /// For gtest KeeperLogStore(const std::string & changelogs_path, LogFileSettings log_file_settings); /// Read log storage from filesystem starting from last_commited_log_index diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index b823fbc697c..4de194fe9ae 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -25,6 +25,7 @@ #include #include #include +#include namespace DB { @@ -121,20 +122,36 @@ KeeperServer::KeeperServer( keeper_context->digest_enabled = config.getBool("keeper_server.digest_enabled", false); keeper_context->ignore_system_path_on_startup = config.getBool("keeper_server.ignore_system_path_on_startup", false); + if (!fs::exists(configuration_and_settings_->snapshot_storage_path)) + fs::create_directories(configuration_and_settings_->snapshot_storage_path); + auto snapshots_disk = std::make_shared("Keeper-snapshots", configuration_and_settings_->snapshot_storage_path, 0); + state_machine = nuraft::cs_new( responses_queue_, snapshots_queue_, - configuration_and_settings_->snapshot_storage_path, + snapshots_disk, coordination_settings, keeper_context, config.getBool("keeper_server.upload_snapshot_on_exit", true) ? &snapshot_manager_s3 : nullptr, checkAndGetSuperdigest(configuration_and_settings_->super_digest)); + auto state_path = fs::path(configuration_and_settings_->state_file_path).parent_path().generic_string(); + auto state_file_name = fs::path(configuration_and_settings_->state_file_path).filename().generic_string(); + + if (!fs::exists(state_path)) + fs::create_directories(state_path); + auto state_disk = std::make_shared("Keeper-state", state_path, 0); + + if (!fs::exists(configuration_and_settings_->log_storage_path)) + fs::create_directories(configuration_and_settings_->log_storage_path); + auto logs_disk = std::make_shared("Keeper-logs", configuration_and_settings_->log_storage_path, 0); + state_manager = nuraft::cs_new( server_id, "keeper_server", - configuration_and_settings_->log_storage_path, - configuration_and_settings_->state_file_path, + logs_disk, + state_disk, + state_file_name, config, coordination_settings); } diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index a3f7dbc2c6a..3d308fbabda 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -16,6 +16,7 @@ #include #include #include +#include namespace DB @@ -507,39 +508,45 @@ KeeperSnapshotManager::KeeperSnapshotManager( bool compress_snapshots_zstd_, const std::string & superdigest_, size_t storage_tick_time_) - : snapshots_path(snapshots_path_) + : KeeperSnapshotManager( + std::make_shared("Keeper-snapshots", snapshots_path_, 0), + snapshots_to_keep_, + keeper_context_, + compress_snapshots_zstd_, + superdigest_, + storage_tick_time_) +{ +} + +KeeperSnapshotManager::KeeperSnapshotManager( + DiskPtr disk_, + size_t snapshots_to_keep_, + const KeeperContextPtr & keeper_context_, + bool compress_snapshots_zstd_, + const std::string & superdigest_, + size_t storage_tick_time_) + : disk(disk_) , snapshots_to_keep(snapshots_to_keep_) , compress_snapshots_zstd(compress_snapshots_zstd_) , superdigest(superdigest_) , storage_tick_time(storage_tick_time_) , keeper_context(keeper_context_) { - namespace fs = std::filesystem; - - if (!fs::exists(snapshots_path)) - fs::create_directories(snapshots_path); - - for (const auto & p : fs::directory_iterator(snapshots_path)) + for (auto it = disk->iterateDirectory(""); it->isValid(); it->next()) { - const auto & path = p.path(); - - if (!path.has_filename()) + const auto & name = it->name(); + if (name.empty()) continue; - - if (startsWith(path.filename(), "tmp_")) /// Unfinished tmp files + if (startsWith(name, "tmp_")) { - std::filesystem::remove(p); + disk->removeFile(it->path()); continue; } - /// Not snapshot file - if (!startsWith(path.filename(), "snapshot_")) - { + if (!startsWith(name, "snapshot_")) continue; - } - - size_t snapshot_up_to = getSnapshotPathUpToLogIdx(p.path()); - existing_snapshots[snapshot_up_to] = p.path(); + size_t snapshot_up_to = getSnapshotPathUpToLogIdx(name); + existing_snapshots[snapshot_up_to] = it->path(); } removeOutdatedSnapshotsIfNeeded(); @@ -552,19 +559,17 @@ std::string KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer auto snapshot_file_name = getSnapshotFileName(up_to_log_idx, compress_snapshots_zstd); auto tmp_snapshot_file_name = "tmp_" + snapshot_file_name; - std::string tmp_snapshot_path = std::filesystem::path{snapshots_path} / tmp_snapshot_file_name; - std::string new_snapshot_path = std::filesystem::path{snapshots_path} / snapshot_file_name; - WriteBufferFromFile plain_buf(tmp_snapshot_path); - copyData(reader, plain_buf); - plain_buf.sync(); + auto plain_buf = disk->writeFile(tmp_snapshot_file_name); + copyData(reader, *plain_buf); + plain_buf->sync(); - std::filesystem::rename(tmp_snapshot_path, new_snapshot_path); + disk->moveFile(tmp_snapshot_file_name, snapshot_file_name); - existing_snapshots.emplace(up_to_log_idx, new_snapshot_path); + existing_snapshots.emplace(up_to_log_idx, snapshot_file_name); removeOutdatedSnapshotsIfNeeded(); - return new_snapshot_path; + return snapshot_file_name; } nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBufferFromDisk() @@ -578,7 +583,7 @@ nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBuff } catch (const DB::Exception &) { - std::filesystem::remove(latest_itr->second); + disk->removeFile(latest_itr->second); existing_snapshots.erase(latest_itr->first); tryLogCurrentException(__PRETTY_FUNCTION__); } @@ -591,8 +596,8 @@ nuraft::ptr KeeperSnapshotManager::deserializeSnapshotBufferFrom { const std::string & snapshot_path = existing_snapshots.at(up_to_log_idx); WriteBufferFromNuraftBuffer writer; - ReadBufferFromFile reader(snapshot_path); - copyData(reader, writer); + auto reader = disk->readFile(snapshot_path); + copyData(*reader, writer); return writer.getBuffer(); } @@ -664,7 +669,7 @@ void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx) auto itr = existing_snapshots.find(log_idx); if (itr == existing_snapshots.end()) throw Exception(ErrorCodes::UNKNOWN_SNAPSHOT, "Unknown snapshot with log index {}", log_idx); - std::filesystem::remove(itr->second); + disk->removeFile(itr->second); existing_snapshots.erase(itr); } @@ -673,10 +678,8 @@ std::pair KeeperSnapshotManager::serializeSnapshot auto up_to_log_idx = snapshot.snapshot_meta->get_last_log_idx(); auto snapshot_file_name = getSnapshotFileName(up_to_log_idx, compress_snapshots_zstd); auto tmp_snapshot_file_name = "tmp_" + snapshot_file_name; - std::string tmp_snapshot_path = std::filesystem::path{snapshots_path} / tmp_snapshot_file_name; - std::string new_snapshot_path = std::filesystem::path{snapshots_path} / snapshot_file_name; - auto writer = std::make_unique(tmp_snapshot_path, O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC | O_APPEND); + auto writer = disk->writeFile(tmp_snapshot_file_name); std::unique_ptr compressed_writer; if (compress_snapshots_zstd) compressed_writer = wrapWriteBufferWithCompressionMethod(std::move(writer), CompressionMethod::Zstd, 3); @@ -688,13 +691,21 @@ std::pair KeeperSnapshotManager::serializeSnapshot compressed_writer->sync(); std::error_code ec; - std::filesystem::rename(tmp_snapshot_path, new_snapshot_path, ec); - if (!ec) + + try { - existing_snapshots.emplace(up_to_log_idx, new_snapshot_path); - removeOutdatedSnapshotsIfNeeded(); + disk->moveFile(tmp_snapshot_file_name, snapshot_file_name); } - return {new_snapshot_path, ec}; + catch (fs::filesystem_error & e) + { + ec = e.code(); + return {snapshot_file_name, ec}; + } + + existing_snapshots.emplace(up_to_log_idx, snapshot_file_name); + removeOutdatedSnapshotsIfNeeded(); + + return {snapshot_file_name, ec}; } } diff --git a/src/Coordination/KeeperSnapshotManager.h b/src/Coordination/KeeperSnapshotManager.h index 52647712083..9babad9ed98 100644 --- a/src/Coordination/KeeperSnapshotManager.h +++ b/src/Coordination/KeeperSnapshotManager.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB { @@ -97,6 +98,15 @@ using SnapshotMetaAndStorage = std::pair; class KeeperSnapshotManager { public: + KeeperSnapshotManager( + DiskPtr disk_, + size_t snapshots_to_keep_, + const KeeperContextPtr & keeper_context_, + bool compress_snapshots_zstd_ = true, + const std::string & superdigest_ = "", + size_t storage_tick_time_ = 500); + + /// For gtest KeeperSnapshotManager( const std::string & snapshots_path_, size_t snapshots_to_keep_, @@ -144,9 +154,15 @@ public: if (!existing_snapshots.empty()) { const auto & path = existing_snapshots.at(getLatestSnapshotIndex()); - std::error_code ec; - if (std::filesystem::exists(path, ec)) - return path; + + try + { + if (disk->exists(path)) + return path; + } + catch (...) + { + } } return ""; } @@ -158,7 +174,7 @@ private: /// ZSTD codec. static bool isZstdCompressed(nuraft::ptr buffer); - const std::string snapshots_path; + DiskPtr disk; /// How many snapshots to keep before remove const size_t snapshots_to_keep; /// All existing snapshots in our path (log_index -> path) diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index e591f87c6f1..56b8c7e5f2e 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -41,14 +41,14 @@ namespace KeeperStateMachine::KeeperStateMachine( ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, - const std::string & snapshots_path_, + DiskPtr disk_, const CoordinationSettingsPtr & coordination_settings_, const KeeperContextPtr & keeper_context_, KeeperSnapshotManagerS3 * snapshot_manager_s3_, const std::string & superdigest_) : coordination_settings(coordination_settings_) , snapshot_manager( - snapshots_path_, + disk_, coordination_settings->snapshots_to_keep, keeper_context_, coordination_settings->compress_snapshots_with_zstd_format, diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index ffc7fce1cfe..ba209f63a0c 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -25,7 +25,7 @@ public: KeeperStateMachine( ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, - const std::string & snapshots_path_, + DiskPtr disk_, const CoordinationSettingsPtr & coordination_settings_, const KeeperContextPtr & keeper_context_, KeeperSnapshotManagerS3 * snapshot_manager_s3_, diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index cfb3519e597..36a88f5591f 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { @@ -214,8 +215,8 @@ KeeperStateManager::KeeperStateManager( int server_id_, const std::string & host, int port, const std::string & logs_path, const std::string & state_file_path) : my_server_id(server_id_) , secure(false) - , log_store(nuraft::cs_new(logs_path, LogFileSettings{.force_sync =false, .compress_logs = false, .rotate_interval = 5000})) - , server_state_path(state_file_path) + , log_store(nuraft::cs_new(std::make_shared("Keeper-logs", logs_path, 0), LogFileSettings{.force_sync =false, .compress_logs = false, .rotate_interval = 5000})) + , server_state_file_name(fs::path(state_file_path).filename().generic_string()) , logger(&Poco::Logger::get("KeeperStateManager")) { auto peer_config = nuraft::cs_new(my_server_id, host + ":" + std::to_string(port)); @@ -228,8 +229,9 @@ KeeperStateManager::KeeperStateManager( KeeperStateManager::KeeperStateManager( int my_server_id_, const std::string & config_prefix_, - const std::string & log_storage_path, - const std::string & state_file_path, + DiskPtr log_disk_, + DiskPtr state_disk_, + const std::string & server_state_file_name_, const Poco::Util::AbstractConfiguration & config, const CoordinationSettingsPtr & coordination_settings) : my_server_id(my_server_id_) @@ -237,7 +239,7 @@ KeeperStateManager::KeeperStateManager( , config_prefix(config_prefix_) , configuration_wrapper(parseServersConfiguration(config, false)) , log_store(nuraft::cs_new( - log_storage_path, + log_disk_, LogFileSettings { .force_sync = coordination_settings->force_sync, @@ -246,7 +248,8 @@ KeeperStateManager::KeeperStateManager( .max_size = coordination_settings->max_log_file_size, .overallocate_size = coordination_settings->log_file_overallocate_size })) - , server_state_path(state_file_path) + , disk(state_disk_) + , server_state_file_name(server_state_file_name_) , logger(&Poco::Logger::get("KeeperStateManager")) { } @@ -285,11 +288,11 @@ void KeeperStateManager::save_config(const nuraft::cluster_config & config) configuration_wrapper.cluster_config = nuraft::cluster_config::deserialize(*buf); } -const std::filesystem::path & KeeperStateManager::getOldServerStatePath() +const String & KeeperStateManager::getOldServerStatePath() { static auto old_path = [this] { - return server_state_path.parent_path() / (server_state_path.filename().generic_string() + "-OLD"); + return server_state_file_name + "-OLD"; }(); return old_path; @@ -310,25 +313,24 @@ void KeeperStateManager::save_state(const nuraft::srv_state & state) { const auto & old_path = getOldServerStatePath(); - if (std::filesystem::exists(server_state_path)) - std::filesystem::rename(server_state_path, old_path); + if (disk->exists(server_state_file_name)) + disk->moveFile(server_state_file_name, old_path); - WriteBufferFromFile server_state_file(server_state_path, DBMS_DEFAULT_BUFFER_SIZE, O_TRUNC | O_CREAT | O_WRONLY); + auto server_state_file = disk->writeFile(server_state_file_name); auto buf = state.serialize(); // calculate checksum SipHash hash; hash.update(current_server_state_version); hash.update(reinterpret_cast(buf->data_begin()), buf->size()); - writeIntBinary(hash.get64(), server_state_file); + writeIntBinary(hash.get64(), *server_state_file); - writeIntBinary(static_cast(current_server_state_version), server_state_file); + writeIntBinary(static_cast(current_server_state_version), *server_state_file); - server_state_file.write(reinterpret_cast(buf->data_begin()), buf->size()); - server_state_file.sync(); - server_state_file.close(); + server_state_file->write(reinterpret_cast(buf->data_begin()), buf->size()); + server_state_file->sync(); - std::filesystem::remove(old_path); + disk->removeFileIfExists(old_path); } nuraft::ptr KeeperStateManager::read_state() @@ -339,22 +341,22 @@ nuraft::ptr KeeperStateManager::read_state() { try { - ReadBufferFromFile read_buf(path); - auto content_size = read_buf.getFileSize(); + auto read_buf = disk->readFile(path); + auto content_size = read_buf->getFileSize(); if (content_size == 0) return nullptr; uint64_t read_checksum{0}; - readIntBinary(read_checksum, read_buf); + readIntBinary(read_checksum, *read_buf); uint8_t version; - readIntBinary(version, read_buf); + readIntBinary(version, *read_buf); auto buffer_size = content_size - sizeof read_checksum - sizeof version; auto state_buf = nuraft::buffer::alloc(buffer_size); - read_buf.readStrict(reinterpret_cast(state_buf->data_begin()), buffer_size); + read_buf->readStrict(reinterpret_cast(state_buf->data_begin()), buffer_size); SipHash hash; hash.update(version); @@ -364,15 +366,15 @@ nuraft::ptr KeeperStateManager::read_state() { constexpr auto error_format = "Invalid checksum while reading state from {}. Got {}, expected {}"; #ifdef NDEBUG - LOG_ERROR(logger, error_format, path.generic_string(), hash.get64(), read_checksum); + LOG_ERROR(logger, error_format, path, hash.get64(), read_checksum); return nullptr; #else - throw Exception(ErrorCodes::CORRUPTED_DATA, error_format, path.generic_string(), hash.get64(), read_checksum); + throw Exception(ErrorCodes::CORRUPTED_DATA, error_format, disk->getPath() + path, hash.get64(), read_checksum); #endif } auto state = nuraft::srv_state::deserialize(*state_buf); - LOG_INFO(logger, "Read state from {}", path.generic_string()); + LOG_INFO(logger, "Read state from {}", disk->getPath() + path); return state; } catch (const std::exception & e) @@ -383,37 +385,34 @@ nuraft::ptr KeeperStateManager::read_state() throw; } - LOG_ERROR(logger, "Failed to deserialize state from {}", path.generic_string()); + LOG_ERROR(logger, "Failed to deserialize state from {}", disk->getPath() + path); return nullptr; } }; - if (std::filesystem::exists(server_state_path)) + if (disk->exists(server_state_file_name)) { - auto state = try_read_file(server_state_path); + auto state = try_read_file(server_state_file_name); if (state) { - if (std::filesystem::exists(old_path)) - std::filesystem::remove(old_path); + disk->removeFileIfExists(old_path); return state; } - std::filesystem::remove(server_state_path); + disk->removeFile(server_state_file_name); } - if (std::filesystem::exists(old_path)) + if (disk->exists(old_path)) { auto state = try_read_file(old_path); - if (state) { - std::filesystem::rename(old_path, server_state_path); + disk->moveFile(old_path, server_state_file_name); return state; } - - std::filesystem::remove(old_path); + disk->removeFile(old_path); } LOG_WARNING(logger, "No state was read"); diff --git a/src/Coordination/KeeperStateManager.h b/src/Coordination/KeeperStateManager.h index 5d210f8c0ea..d8369100d1c 100644 --- a/src/Coordination/KeeperStateManager.h +++ b/src/Coordination/KeeperStateManager.h @@ -39,7 +39,8 @@ public: KeeperStateManager( int server_id_, const std::string & config_prefix_, - const std::string & log_storage_path, + DiskPtr logs_disk_, + DiskPtr state_disk_, const std::string & state_file_path, const Poco::Util::AbstractConfiguration & config, const CoordinationSettingsPtr & coordination_settings); @@ -111,7 +112,7 @@ public: ConfigUpdateActions getConfigurationDiff(const Poco::Util::AbstractConfiguration & config) const; private: - const std::filesystem::path & getOldServerStatePath(); + const String & getOldServerStatePath(); /// Wrapper struct for Keeper cluster config. We parse this /// info from XML files. @@ -136,7 +137,8 @@ private: nuraft::ptr log_store; - const std::filesystem::path server_state_path; + DiskPtr disk; + const String server_state_file_name; Poco::Logger * logger; diff --git a/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp b/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp index f8c4d0e2bac..d079a0d230d 100644 --- a/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp +++ b/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp @@ -11,7 +11,7 @@ namespace ErrorCodes } ZstdDeflatingAppendableWriteBuffer::ZstdDeflatingAppendableWriteBuffer( - std::unique_ptr out_, + std::unique_ptr out_, int compression_level, bool append_to_existing_file_, size_t buf_size, diff --git a/src/IO/ZstdDeflatingAppendableWriteBuffer.h b/src/IO/ZstdDeflatingAppendableWriteBuffer.h index a0715480737..b64e5d5c4cf 100644 --- a/src/IO/ZstdDeflatingAppendableWriteBuffer.h +++ b/src/IO/ZstdDeflatingAppendableWriteBuffer.h @@ -29,7 +29,7 @@ public: static inline constexpr ZSTDLastBlock ZSTD_CORRECT_TERMINATION_LAST_BLOCK = {0x01, 0x00, 0x00}; ZstdDeflatingAppendableWriteBuffer( - std::unique_ptr out_, + std::unique_ptr out_, int compression_level, bool append_to_existing_file_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, @@ -68,7 +68,7 @@ private: /// Adding zstd empty block (ZSTD_CORRECT_TERMINATION_LAST_BLOCK) to out.working_buffer void addEmptyBlock(); - std::unique_ptr out; + std::unique_ptr out; bool append_to_existing_file = false; ZSTD_CCtx * cctx; From 935bc723299056c816646fb9067638a60ddfb085 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 14 Feb 2023 18:51:10 +0100 Subject: [PATCH 0023/1997] [TEST] Tune allocator Processing of the default max_block_size can be faster then running mmap()/munmap() plus memory dependencies. Here is an example: SELECT count() FROM zeros(10_000_000) WHERE NOT ignore(randomString(1000)) SETTINGS function_implementation='avx2' - Before this patch it takes: ~6sec - After: 1.3sec And even though 128MiB should be enough, since for this query size of allocation for string will be 65409*(1000+1)=65474409 bytes, due to rounding to power of two it will not, so let's try simply use 256MiB (another option is to use strict comparison for MMAP_THRESHOLD) and see the perf tests. But also note, that this has other allocator side effects (performance, fragmentation), so unlikely this is for upstream. I've found this while I was playing with PODArray [1]. [1]: https://s3.amazonaws.com/clickhouse-test-reports/45654/2101b66570cbb9eb9a492afa8ab82d562c34336b/performance_comparison_[1/4]/report.html Signed-off-by: Azat Khuzhin --- src/Common/Allocator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/Allocator.cpp b/src/Common/Allocator.cpp index 5a66ddb63a2..c02210f2ece 100644 --- a/src/Common/Allocator.cpp +++ b/src/Common/Allocator.cpp @@ -8,7 +8,7 @@ * See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html */ #ifdef NDEBUG - __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 64 * (1ULL << 20); + __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 256 * (1ULL << 20); #else /** * In debug build, use small mmap threshold to reproduce more memory From 010edbf2aad3508402e82b8ce62f90ce62fc9f09 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 22 Feb 2023 10:39:23 +0100 Subject: [PATCH 0024/1997] do another way and logs work --- src/Common/DateLUT.h | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/src/Common/DateLUT.h b/src/Common/DateLUT.h index bd7969bffa6..6ec9cf1646d 100644 --- a/src/Common/DateLUT.h +++ b/src/Common/DateLUT.h @@ -5,9 +5,6 @@ #include #include -// -//#include "Interpreters/Context_fwd.h" -//#include "Interpreters/Context.h" #include "Common/CurrentThread.h" #include @@ -24,30 +21,31 @@ public: static ALWAYS_INLINE const DateLUTImpl & instance() // -V1071 { const auto & date_lut = getInstance(); - - if (DB::CurrentThread::isInitialized()) - { - const auto query_context = DB::CurrentThread::get().getQueryContext(); - - if (query_context) - { - auto implicit_timezone = extractTimezoneFromContext(query_context); - - if (!implicit_timezone.empty()) - return instance(implicit_timezone); - } - } - return *date_lut.default_impl.load(std::memory_order_acquire); } /// Return singleton DateLUTImpl instance for a given time zone. static ALWAYS_INLINE const DateLUTImpl & instance(const std::string & time_zone) { - if (time_zone.empty()) - return instance(); - const auto & date_lut = getInstance(); + + if (time_zone.empty()) + { + if (DB::CurrentThread::isInitialized()) + { + const auto query_context = DB::CurrentThread::get().getQueryContext(); + + if (query_context) + { + auto implicit_timezone = extractTimezoneFromContext(query_context); + + if (!implicit_timezone.empty()) + return instance(implicit_timezone); + } + } + return *date_lut.default_impl.load(std::memory_order_acquire); + } + return date_lut.getImplementation(time_zone); } static void setDefaultTimezone(const std::string & time_zone) From 1cf6c3a9c0fa4867684dee56c651d4131aa3b0fe Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 22 Feb 2023 10:51:32 +0100 Subject: [PATCH 0025/1997] update test names --- ...licit_timezone.reference => 02668_implicit_timezone.reference} | 0 .../{02538_implicit_timezone.sql => 02668_implicit_timezone.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{02538_implicit_timezone.reference => 02668_implicit_timezone.reference} (100%) rename tests/queries/0_stateless/{02538_implicit_timezone.sql => 02668_implicit_timezone.sql} (100%) diff --git a/tests/queries/0_stateless/02538_implicit_timezone.reference b/tests/queries/0_stateless/02668_implicit_timezone.reference similarity index 100% rename from tests/queries/0_stateless/02538_implicit_timezone.reference rename to tests/queries/0_stateless/02668_implicit_timezone.reference diff --git a/tests/queries/0_stateless/02538_implicit_timezone.sql b/tests/queries/0_stateless/02668_implicit_timezone.sql similarity index 100% rename from tests/queries/0_stateless/02538_implicit_timezone.sql rename to tests/queries/0_stateless/02668_implicit_timezone.sql From 393830ecdc78cd2745cc439d7ac95c3421fe9044 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 22 Feb 2023 16:30:46 +0100 Subject: [PATCH 0026/1997] add docs + tiny cleanup --- docs/en/operations/settings/settings.md | 27 ++++++++++++++++++++++++ docs/ru/operations/settings/settings.md | 28 +++++++++++++++++++++++++ src/Common/DateLUT.cpp | 3 +-- 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 1060eae1b0e..4e105124086 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3939,3 +3939,30 @@ Default value: `0`. :::note Use this setting only for backward compatibility if your use cases depend on old syntax. ::: + +## implicit_timezone {#implicit_timezone} + +If specified, sets a implicit timezone (instead of server-default). All DateTime/DateTime64 values (and/or functions results) that have no explicit timezone specified are treated as having this timezone instead of default. +Examples: +``` +SELECT timeZone(), timeZoneOf(now()) +┌─timeZone()────┬─timeZoneOf(now())─┐ +│ Europe/Berlin │ Europe/Berlin │ +└───────────────┴───────────────────┘ + +:) SELECT timeZone(), timeZoneOf(now()) SETTINGS implicit_timezone = 'Asia/Novosibirsk' +┌─timeZone()────┬─timeZoneOf(now())─┐ +│ Europe/Berlin │ Asia/Novosibirsk │ +└───────────────┴───────────────────┘ + +SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS implicit_timezone = 'America/Denver'; +┌─toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich')─┐ +│ 1999-12-13 07:23:23.123 │ +└──────────────────────────────────────────────────────────────────────────────┘ +``` + +Possible values: + +- Any valid timezone in `Region/Place` notation, e.g. `Europe/Berlin` + +Default value: `''`. \ No newline at end of file diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 4025966ac21..8d3f2706585 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -4084,3 +4084,31 @@ ALTER TABLE test FREEZE SETTINGS alter_partition_verbose_result = 1; Задает символ, который интерпретируется как суффикс после результирующего набора данных формата [CustomSeparated](../../interfaces/formats.md#format-customseparated). Значение по умолчанию: `''`. + +## implicit_timezone {#implicit_timezone} + +Задаёт значение часового пояса (timezone) по умолчанию для текущей сессии вместо часового пояса сервера. То есть, все значения DateTime/DateTime64, для которых явно не задан параметр timezone, будут интерпретированы как относящиеся к указанной зоне. + +Примеры: +``` +SELECT timeZone(), timeZoneOf(now()) +┌─timeZone()────┬─timeZoneOf(now())─┐ +│ Europe/Berlin │ Europe/Berlin │ +└───────────────┴───────────────────┘ + +:) SELECT timeZone(), timeZoneOf(now()) SETTINGS implicit_timezone = 'Asia/Novosibirsk' +┌─timeZone()────┬─timeZoneOf(now())─┐ +│ Europe/Berlin │ Asia/Novosibirsk │ +└───────────────┴───────────────────┘ + +SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS implicit_timezone = 'America/Denver'; +┌─toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich')─┐ +│ 1999-12-13 07:23:23.123 │ +└──────────────────────────────────────────────────────────────────────────────┘ +``` + +Возможные значения: + +- Строка вида `Регион/Город`, например `Europe/Zurich` + +Значение по умолчанию: `''`. \ No newline at end of file diff --git a/src/Common/DateLUT.cpp b/src/Common/DateLUT.cpp index fd10b799b66..e309b0cb28a 100644 --- a/src/Common/DateLUT.cpp +++ b/src/Common/DateLUT.cpp @@ -8,7 +8,6 @@ #include #include #include -#include namespace @@ -149,7 +148,7 @@ DateLUT::DateLUT() } -const ALWAYS_INLINE DateLUTImpl & DateLUT::getImplementation(const std::string & time_zone) const +const DateLUTImpl & DateLUT::getImplementation(const std::string & time_zone) const { std::lock_guard lock(mutex); From c61aff7cac2e5cc79dc4591d9228308e017e5b28 Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 23 Feb 2023 13:38:13 +0100 Subject: [PATCH 0027/1997] Added standalone function to get server's own timezone Fix missing global_context --- src/Common/DateLUT.h | 10 ++++++++++ src/Functions/serverConstants.cpp | 19 +++++++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/Common/DateLUT.h b/src/Common/DateLUT.h index b66821989e3..18ef5ee2e30 100644 --- a/src/Common/DateLUT.h +++ b/src/Common/DateLUT.h @@ -42,6 +42,16 @@ public: if (!implicit_timezone.empty()) return instance(implicit_timezone); } + + const auto global_context = DB::CurrentThread::get().getGlobalContext(); + if (global_context) + { + auto implicit_timezone = extractTimezoneFromContext(global_context); + + if (!implicit_timezone.empty()) + return instance(implicit_timezone); + } + } return *date_lut.default_impl.load(std::memory_order_acquire); } diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index 96615d0a4c9..a89e1564f28 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -60,13 +60,22 @@ namespace }; - /// Returns the server time zone. + /// Returns default timezone for current session. class FunctionTimezone : public FunctionConstantBase { public: static constexpr auto name = "timezone"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionTimezone(ContextPtr context) : FunctionConstantBase(String{DateLUT::instance().getTimeZone()}, context->isDistributed()) {} + explicit FunctionTimezone(ContextPtr context) : FunctionConstantBase(String{DateLUT::instance("").getTimeZone()}, context->isDistributed()) {} + }; + + /// Returns the server time zone (timezone in which server runs). + class FunctionServerTimezone : public FunctionConstantBase + { + public: + static constexpr auto name = "serverTimezone"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionServerTimezone(ContextPtr context) : FunctionConstantBase(String{DateLUT::instance().getTimeZone()}, context->isDistributed()) {} }; @@ -155,6 +164,12 @@ REGISTER_FUNCTION(Timezone) factory.registerAlias("timeZone", "timezone"); } +REGISTER_FUNCTION(ServerTimezone) +{ + factory.registerFunction(); + factory.registerAlias("serverTimeZone", "serverTimezone"); +} + REGISTER_FUNCTION(Uptime) { factory.registerFunction(); From a9d0f7e7dbb281def311b22e4ae6300c73b5e979 Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 23 Feb 2023 19:14:49 +0100 Subject: [PATCH 0028/1997] Added docs for new serverTimeZone function Updated tests and docs --- docs/en/operations/settings/settings.md | 28 ++++++++++--------- .../functions/date-time-functions.md | 21 +++++++++++++- docs/ru/operations/settings/settings.md | 27 +++++++++--------- .../functions/date-time-functions.md | 21 +++++++++++++- src/Common/DateLUT.cpp | 3 +- src/Common/DateLUT.h | 8 ++++-- src/Core/Settings.h | 2 +- src/Functions/serverConstants.cpp | 23 +++++++++++++-- ...rence => 02668_timezone_setting.reference} | 0 ...imezone.sql => 02668_timezone_setting.sql} | 6 ++-- 10 files changed, 101 insertions(+), 38 deletions(-) rename tests/queries/0_stateless/{02668_implicit_timezone.reference => 02668_timezone_setting.reference} (100%) rename tests/queries/0_stateless/{02668_implicit_timezone.sql => 02668_timezone_setting.sql} (61%) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 4e105124086..c1c4483d341 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3940,25 +3940,27 @@ Default value: `0`. Use this setting only for backward compatibility if your use cases depend on old syntax. ::: -## implicit_timezone {#implicit_timezone} +## timezone {#timezone} If specified, sets a implicit timezone (instead of server-default). All DateTime/DateTime64 values (and/or functions results) that have no explicit timezone specified are treated as having this timezone instead of default. Examples: + +```clickhouse +SELECT timeZone(), serverTimezone() FORMAT TSV + +Europe/Berlin Europe/Berlin ``` -SELECT timeZone(), timeZoneOf(now()) -┌─timeZone()────┬─timeZoneOf(now())─┐ -│ Europe/Berlin │ Europe/Berlin │ -└───────────────┴───────────────────┘ -:) SELECT timeZone(), timeZoneOf(now()) SETTINGS implicit_timezone = 'Asia/Novosibirsk' -┌─timeZone()────┬─timeZoneOf(now())─┐ -│ Europe/Berlin │ Asia/Novosibirsk │ -└───────────────┴───────────────────┘ +```clickhouse +SELECT timeZone(), serverTimezone() SETTINGS timezone = 'Asia/Novosibirsk' FORMAT TSV -SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS implicit_timezone = 'America/Denver'; -┌─toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich')─┐ -│ 1999-12-13 07:23:23.123 │ -└──────────────────────────────────────────────────────────────────────────────┘ +Asia/Novosibirsk Europe/Berlin +``` + +```clickhouse +SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS timezone = 'America/Denver' FORMAT TSV + +1999-12-13 07:23:23.123 ``` Possible values: diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index f6af8abcbaf..8d31cb3872f 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -26,7 +26,7 @@ SELECT ## timeZone -Returns the timezone of the server. +Returns the default timezone of the server for current session. This can be modified using `SET timezone = 'New/Value'` If it is executed in the context of a distributed table, then it generates a normal column with values relevant to each shard. Otherwise it produces a constant value. **Syntax** @@ -43,6 +43,25 @@ Alias: `timezone`. Type: [String](../../sql-reference/data-types/string.md). +## serverTimeZone + +Returns the actual timezone in which the server runs in. +If it is executed in the context of a distributed table, then it generates a normal column with values relevant to each shard. Otherwise it produces a constant value. + +**Syntax** + +``` sql +timeZone() +``` + +Alias: `ServerTimezone`, `servertimezone`. + +**Returned value** + +- Timezone. + +Type: [String](../../sql-reference/data-types/string.md). + ## toTimeZone Converts time or date and time to the specified time zone. The time zone is an attribute of the `Date` and `DateTime` data types. The internal value (number of seconds) of the table field or of the resultset's column does not change, the column's type changes and its string representation changes accordingly. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 8d3f2706585..dd1e9d98427 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -4085,26 +4085,27 @@ ALTER TABLE test FREEZE SETTINGS alter_partition_verbose_result = 1; Значение по умолчанию: `''`. -## implicit_timezone {#implicit_timezone} +## timezone {#timezone} Задаёт значение часового пояса (timezone) по умолчанию для текущей сессии вместо часового пояса сервера. То есть, все значения DateTime/DateTime64, для которых явно не задан параметр timezone, будут интерпретированы как относящиеся к указанной зоне. Примеры: +```clickhouse +SELECT timeZone(), serverTimezone() FORMAT TSV + +Europe/Berlin Europe/Berlin ``` -SELECT timeZone(), timeZoneOf(now()) -┌─timeZone()────┬─timeZoneOf(now())─┐ -│ Europe/Berlin │ Europe/Berlin │ -└───────────────┴───────────────────┘ -:) SELECT timeZone(), timeZoneOf(now()) SETTINGS implicit_timezone = 'Asia/Novosibirsk' -┌─timeZone()────┬─timeZoneOf(now())─┐ -│ Europe/Berlin │ Asia/Novosibirsk │ -└───────────────┴───────────────────┘ +```clickhouse +SELECT timeZone(), serverTimezone() SETTINGS timezone = 'Asia/Novosibirsk' FORMAT TSV -SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS implicit_timezone = 'America/Denver'; -┌─toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich')─┐ -│ 1999-12-13 07:23:23.123 │ -└──────────────────────────────────────────────────────────────────────────────┘ +Asia/Novosibirsk Europe/Berlin +``` + +```clickhouse +SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS timezone = 'America/Denver' FORMAT TSV + +1999-12-13 07:23:23.123 ``` Возможные значения: diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 8fbcaf9568b..77188ea2797 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -26,7 +26,7 @@ SELECT ## timeZone {#timezone} -Возвращает часовой пояс сервера. +Возвращает часовой пояс сервера, считающийся умолчанием для текущей сессии. Если функция вызывается в контексте распределенной таблицы, то она генерирует обычный столбец со значениями, актуальными для каждого шарда. Иначе возвращается константа. **Синтаксис** @@ -43,6 +43,25 @@ timeZone() Тип: [String](../../sql-reference/data-types/string.md). +## serverTimeZone {#servertimezone} + +Возвращает (истинный) часовой пояс сервера, в котором тот работает. +Если функция вызывается в контексте распределенной таблицы, то она генерирует обычный столбец со значениями, актуальными для каждого шарда. Иначе возвращается константа. + +**Синтаксис** + +``` sql +serverTimeZone() +``` + +Синонимы: `servertimezone`, `serverTimezone`. + +**Возвращаемое значение** + +- Часовой пояс. + +Тип: [String](../../sql-reference/data-types/string.md). + ## toTimeZone {#totimezone} Переводит дату или дату с временем в указанный часовой пояс. Часовой пояс - это атрибут типов `Date` и `DateTime`. Внутреннее значение (количество секунд) поля таблицы или результирующего столбца не изменяется, изменяется тип поля и, соответственно, его текстовое отображение. diff --git a/src/Common/DateLUT.cpp b/src/Common/DateLUT.cpp index e309b0cb28a..3698fe45aa7 100644 --- a/src/Common/DateLUT.cpp +++ b/src/Common/DateLUT.cpp @@ -167,6 +167,5 @@ DateLUT & DateLUT::getInstance() std::string DateLUT::extractTimezoneFromContext(const DB::ContextPtr query_context) { - std::string ret = query_context->getSettingsRef().implicit_timezone.value; - return ret; + return query_context->getSettingsRef().timezone.value; } diff --git a/src/Common/DateLUT.h b/src/Common/DateLUT.h index 18ef5ee2e30..a9ee61dc8ab 100644 --- a/src/Common/DateLUT.h +++ b/src/Common/DateLUT.h @@ -17,14 +17,18 @@ class DateLUT : private boost::noncopyable { public: - /// Return singleton DateLUTImpl instance for the default time zone. + /// Return singleton DateLUTImpl instance for server's timezone (the one which server has). static ALWAYS_INLINE const DateLUTImpl & instance() { const auto & date_lut = getInstance(); return *date_lut.default_impl.load(std::memory_order_acquire); } - /// Return singleton DateLUTImpl instance for a given time zone. + /* + Return singleton DateLUTImpl instance for a given time zone. If timezone is an empty string, + timezone set by `timezone` setting for current session is used. If it is not set, server's timezone is used, + and return is the same as calling instance(). + */ static ALWAYS_INLINE const DateLUTImpl & instance(const std::string & time_zone) { const auto & date_lut = getInstance(); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index e3ed1b26269..e70b8c131b3 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -712,7 +712,7 @@ class IColumn; M(Float, insert_keeper_fault_injection_probability, 0.0f, "Approximate probability of failure for a keeper request during insert. Valid value is in interval [0.0f, 1.0f]", 0) \ M(UInt64, insert_keeper_fault_injection_seed, 0, "0 - random seed, otherwise the setting value", 0) \ M(Bool, force_aggregation_in_order, false, "Force use of aggregation in order on remote nodes during distributed aggregation. PLEASE, NEVER CHANGE THIS SETTING VALUE MANUALLY!", IMPORTANT) \ - M(String, implicit_timezone, "", "Use specified timezone for interpreting Date and DateTime instead of server's timezone.", 0) \ + M(String, timezone, "", "Use specified timezone for interpreting Date and DateTime instead of server's timezone.", 0) \ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index a89e1564f28..b57f7e40e07 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -160,14 +160,33 @@ REGISTER_FUNCTION(TcpPort) REGISTER_FUNCTION(Timezone) { - factory.registerFunction(); + factory.registerFunction({ + R"( +Returns the default timezone for current session. +Used as default timezone for parsing DateTime|DateTime64 without explicitly specified timezone. +Can be changed with SET timezone = 'New/Tz' + +[example:timezone] +)", + Documentation::Examples{{"serverTimezone", "SELECT timezone();"}}, + Documentation::Categories{"Constant", "Miscellaneous"} + }); factory.registerAlias("timeZone", "timezone"); } REGISTER_FUNCTION(ServerTimezone) { - factory.registerFunction(); + factory.registerFunction({ + R"( +Returns the timezone name in which server operates. + +[example:serverTimezone] +)", + Documentation::Examples{{"serverTimezone", "SELECT serverTimezone();"}}, + Documentation::Categories{"Constant", "Miscellaneous"} + }); factory.registerAlias("serverTimeZone", "serverTimezone"); + factory.registerAlias("servertimezone", "serverTimezone"); } REGISTER_FUNCTION(Uptime) diff --git a/tests/queries/0_stateless/02668_implicit_timezone.reference b/tests/queries/0_stateless/02668_timezone_setting.reference similarity index 100% rename from tests/queries/0_stateless/02668_implicit_timezone.reference rename to tests/queries/0_stateless/02668_timezone_setting.reference diff --git a/tests/queries/0_stateless/02668_implicit_timezone.sql b/tests/queries/0_stateless/02668_timezone_setting.sql similarity index 61% rename from tests/queries/0_stateless/02668_implicit_timezone.sql rename to tests/queries/0_stateless/02668_timezone_setting.sql index 663b218d235..3748b536614 100644 --- a/tests/queries/0_stateless/02668_implicit_timezone.sql +++ b/tests/queries/0_stateless/02668_timezone_setting.sql @@ -1,9 +1,9 @@ -SET implicit_timezone = 'Asia/Novosibirsk'; +SET timezone = 'Asia/Novosibirsk'; SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich'); -SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS implicit_timezone = 'Europe/Zurich'; +SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS timezone = 'Europe/Zurich'; -SET implicit_timezone = 'Europe/Zurich'; +SET timezone = 'Europe/Zurich'; SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Asia/Novosibirsk'); \ No newline at end of file From c3a6efe0310ec23521eb97c1b53c6616f72ba7a0 Mon Sep 17 00:00:00 2001 From: zvonand Date: Fri, 24 Feb 2023 20:51:12 +0100 Subject: [PATCH 0029/1997] update --- src/Client/ClientBase.cpp | 2 +- src/Common/DateLUT.h | 13 +++++++------ src/Functions/serverConstants.cpp | 2 +- src/IO/ReadHelpers.h | 8 ++++---- src/IO/WriteHelpers.h | 6 +++--- .../queries/0_stateless/02668_timezone_setting.sql | 6 +++--- 6 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index bc8c43af8c6..9ebe115d408 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2139,7 +2139,7 @@ void ClientBase::runInteractive() initQueryIdFormats(); /// Initialize DateLUT here to avoid counting time spent here as query execution time. - const auto local_tz = DateLUT::instance().getTimeZone(); + const auto local_tz = DateLUT::instance("").getTimeZone(); suggest.emplace(); if (load_suggestions) diff --git a/src/Common/DateLUT.h b/src/Common/DateLUT.h index a9ee61dc8ab..29a4ee13d87 100644 --- a/src/Common/DateLUT.h +++ b/src/Common/DateLUT.h @@ -32,6 +32,7 @@ public: static ALWAYS_INLINE const DateLUTImpl & instance(const std::string & time_zone) { const auto & date_lut = getInstance(); + std::string effective_time_zone; if (time_zone.empty()) { @@ -41,19 +42,19 @@ public: if (query_context) { - auto implicit_timezone = extractTimezoneFromContext(query_context); + effective_time_zone = extractTimezoneFromContext(query_context); - if (!implicit_timezone.empty()) - return instance(implicit_timezone); + if (!effective_time_zone.empty()) + return date_lut.getImplementation(effective_time_zone); } const auto global_context = DB::CurrentThread::get().getGlobalContext(); if (global_context) { - auto implicit_timezone = extractTimezoneFromContext(global_context); + effective_time_zone = extractTimezoneFromContext(global_context); - if (!implicit_timezone.empty()) - return instance(implicit_timezone); + if (!effective_time_zone.empty()) + return date_lut.getImplementation(effective_time_zone); } } diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index b57f7e40e07..ea74d7a89bb 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -168,7 +168,7 @@ Can be changed with SET timezone = 'New/Tz' [example:timezone] )", - Documentation::Examples{{"serverTimezone", "SELECT timezone();"}}, + Documentation::Examples{{"timezone", "SELECT timezone();"}}, Documentation::Categories{"Constant", "Miscellaneous"} }); factory.registerAlias("timeZone", "timezone"); diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index f8931a7f622..9f5358ee141 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -1032,22 +1032,22 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re return ReturnType(is_ok); } -inline void readDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) +inline void readDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance("")) { readDateTimeTextImpl(datetime, buf, time_zone); } -inline void readDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) +inline void readDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance("")) { readDateTimeTextImpl(datetime64, scale, buf, date_lut); } -inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) +inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance("")) { return readDateTimeTextImpl(datetime, buf, time_zone); } -inline bool tryReadDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) +inline bool tryReadDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance("")) { return readDateTimeTextImpl(datetime64, scale, buf, date_lut); } diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 1c0b48c53c3..d408e2bed42 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -755,14 +755,14 @@ inline void writeDateTimeText(const LocalDateTime & datetime, WriteBuffer & buf) /// In the format YYYY-MM-DD HH:MM:SS, according to the specified time zone. template -inline void writeDateTimeText(time_t datetime, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) +inline void writeDateTimeText(time_t datetime, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance("")) { writeDateTimeText(LocalDateTime(datetime, time_zone), buf); } /// In the format YYYY-MM-DD HH:MM:SS.NNNNNNNNN, according to the specified time zone. template -inline void writeDateTimeText(DateTime64 datetime64, UInt32 scale, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) +inline void writeDateTimeText(DateTime64 datetime64, UInt32 scale, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance("")) { static constexpr UInt32 MaxScale = DecimalUtils::max_precision; scale = scale > MaxScale ? MaxScale : scale; @@ -796,7 +796,7 @@ inline void writeDateTimeText(DateTime64 datetime64, UInt32 scale, WriteBuffer & /// In the RFC 1123 format: "Tue, 03 Dec 2019 00:11:50 GMT". You must provide GMT DateLUT. /// This is needed for HTTP requests. -inline void writeDateTimeTextRFC1123(time_t datetime, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) +inline void writeDateTimeTextRFC1123(time_t datetime, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance("")) { const auto & values = time_zone.getValues(datetime); diff --git a/tests/queries/0_stateless/02668_timezone_setting.sql b/tests/queries/0_stateless/02668_timezone_setting.sql index 3748b536614..f331ab58307 100644 --- a/tests/queries/0_stateless/02668_timezone_setting.sql +++ b/tests/queries/0_stateless/02668_timezone_setting.sql @@ -1,9 +1,9 @@ SET timezone = 'Asia/Novosibirsk'; - SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich'); - SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS timezone = 'Europe/Zurich'; SET timezone = 'Europe/Zurich'; +SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Asia/Novosibirsk'); -SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Asia/Novosibirsk'); \ No newline at end of file +SET timezone = 'Абырвалг'; +select now(); -- { serverError POCO_EXCEPTION } \ No newline at end of file From 5781eb67cba3e827ecf47b7929c47777a6e48094 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Feb 2023 01:28:13 +0300 Subject: [PATCH 0030/1997] Update test.py --- tests/integration/test_attach_table_normalizer/test.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index f2d99588b94..526da39935a 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -17,8 +17,7 @@ def started_cluster(): def replace_substring_to_substr(node): node.exec_in_container(["bash", "-c", "sed -i 's/substring/substr/g' /var/lib/clickhouse/metadata/default/file.sql"], user="root") -@pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) -def test_attach_substr(started_cluster, engine): +def test_attach_substr(started_cluster): # Initialize node.query("DROP TABLE IF EXISTS default.file") node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") @@ -32,8 +31,7 @@ def test_attach_substr(started_cluster, engine): # Attach table file node.query("ATTACH TABLE file") -@pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) -def test_attach_substr_restart(started_cluster, engine): +def test_attach_substr_restart(started_cluster): # Initialize node.query("DROP TABLE IF EXISTS default.file") node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") From 63982a20936bb384a4c4f88f9e4ed2282680e33b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Feb 2023 01:29:29 +0300 Subject: [PATCH 0031/1997] Delete config.xml --- .../test_attach_table_normalizer/configs/config.xml | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 tests/integration/test_attach_table_normalizer/configs/config.xml diff --git a/tests/integration/test_attach_table_normalizer/configs/config.xml b/tests/integration/test_attach_table_normalizer/configs/config.xml deleted file mode 100644 index 0500e2ad554..00000000000 --- a/tests/integration/test_attach_table_normalizer/configs/config.xml +++ /dev/null @@ -1,4 +0,0 @@ - - 1 - 1 - From 0e01991eb7b1331d2fca09c94b3e41fdd5c32bb3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Feb 2023 04:33:56 +0300 Subject: [PATCH 0032/1997] Update test.py --- .../test_attach_table_normalizer/test.py | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index 526da39935a..ddbb02bf4ef 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -3,7 +3,9 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance('node', main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True) +node = cluster.add_instance( + 'node', main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True +) @pytest.fixture(scope="module") @@ -14,13 +16,24 @@ def started_cluster(): finally: cluster.shutdown() + def replace_substring_to_substr(node): - node.exec_in_container(["bash", "-c", "sed -i 's/substring/substr/g' /var/lib/clickhouse/metadata/default/file.sql"], user="root") + node.exec_in_container(( + [ + "bash", + "-c", + "sed -i 's/substring/substr/g' /var/lib/clickhouse/metadata/default/file.sql", + ], + user="root", + ) + def test_attach_substr(started_cluster): # Initialize node.query("DROP TABLE IF EXISTS default.file") - node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") + node.query( + "CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n " + ) # Detach table file node.query("DETACH TABLE file") @@ -31,10 +44,13 @@ def test_attach_substr(started_cluster): # Attach table file node.query("ATTACH TABLE file") + def test_attach_substr_restart(started_cluster): # Initialize node.query("DROP TABLE IF EXISTS default.file") - node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") + node.query( + "CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n " + ) # Replace substring to substr replace_substring_to_substr(node) From 6a996f552b9cf70f88d2a6c7c8f1ef2780268666 Mon Sep 17 00:00:00 2001 From: zvonand Date: Sun, 26 Feb 2023 10:06:27 +0100 Subject: [PATCH 0033/1997] update undocumented funcs reference --- .../02415_all_new_functions_must_be_documented.reference | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index e41249af54c..ce14ee871f5 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -683,7 +683,6 @@ throwIf tid timeSlot timeSlots -timezone timezoneOf timezoneOffset toBool From a69425326de20dcf5814c39a0962023cabec27ec Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 27 Feb 2023 01:40:00 +0100 Subject: [PATCH 0034/1997] upd --- src/Client/ClientBase.cpp | 2 +- src/Common/DateLUT.h | 59 +++++++++++++++---------------- src/DataTypes/TimezoneMixin.h | 2 +- src/Functions/serverConstants.cpp | 4 +-- src/IO/ReadHelpers.h | 8 ++--- src/IO/WriteHelpers.h | 6 ++-- 6 files changed, 39 insertions(+), 42 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index a335dca0602..96aff9aa304 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2139,7 +2139,7 @@ void ClientBase::runInteractive() initQueryIdFormats(); /// Initialize DateLUT here to avoid counting time spent here as query execution time. - const auto local_tz = DateLUT::instance("").getTimeZone(); + const auto local_tz = DateLUT::instance().getTimeZone(); suggest.emplace(); if (load_suggestions) diff --git a/src/Common/DateLUT.h b/src/Common/DateLUT.h index 29a4ee13d87..efbf56b59b2 100644 --- a/src/Common/DateLUT.h +++ b/src/Common/DateLUT.h @@ -17,49 +17,46 @@ class DateLUT : private boost::noncopyable { public: - /// Return singleton DateLUTImpl instance for server's timezone (the one which server has). + /// Return singleton DateLUTImpl instance for timezone set by `timezone` setting for current session is used. + /// If it is not set, server's timezone (the one which server has) is being used. static ALWAYS_INLINE const DateLUTImpl & instance() { + std::string effective_time_zone; const auto & date_lut = getInstance(); + + if (DB::CurrentThread::isInitialized()) + { + const auto query_context = DB::CurrentThread::get().getQueryContext(); + + if (query_context) + { + effective_time_zone = extractTimezoneFromContext(query_context); + + if (!effective_time_zone.empty()) + return date_lut.getImplementation(effective_time_zone); + } + + const auto global_context = DB::CurrentThread::get().getGlobalContext(); + if (global_context) + { + effective_time_zone = extractTimezoneFromContext(global_context); + + if (!effective_time_zone.empty()) + return date_lut.getImplementation(effective_time_zone); + } + + } return *date_lut.default_impl.load(std::memory_order_acquire); } - /* - Return singleton DateLUTImpl instance for a given time zone. If timezone is an empty string, - timezone set by `timezone` setting for current session is used. If it is not set, server's timezone is used, - and return is the same as calling instance(). - */ + /// Return singleton DateLUTImpl instance for a given time zone. If timezone is an empty string, + /// server's timezone is used. The `timezone` setting is not considered here. static ALWAYS_INLINE const DateLUTImpl & instance(const std::string & time_zone) { const auto & date_lut = getInstance(); - std::string effective_time_zone; if (time_zone.empty()) - { - if (DB::CurrentThread::isInitialized()) - { - const auto query_context = DB::CurrentThread::get().getQueryContext(); - - if (query_context) - { - effective_time_zone = extractTimezoneFromContext(query_context); - - if (!effective_time_zone.empty()) - return date_lut.getImplementation(effective_time_zone); - } - - const auto global_context = DB::CurrentThread::get().getGlobalContext(); - if (global_context) - { - effective_time_zone = extractTimezoneFromContext(global_context); - - if (!effective_time_zone.empty()) - return date_lut.getImplementation(effective_time_zone); - } - - } return *date_lut.default_impl.load(std::memory_order_acquire); - } return date_lut.getImplementation(time_zone); } diff --git a/src/DataTypes/TimezoneMixin.h b/src/DataTypes/TimezoneMixin.h index 03ecde5dd0a..5b7870c7b9a 100644 --- a/src/DataTypes/TimezoneMixin.h +++ b/src/DataTypes/TimezoneMixin.h @@ -15,7 +15,7 @@ public: explicit TimezoneMixin(const String & time_zone_name = "") : has_explicit_time_zone(!time_zone_name.empty()) - , time_zone(DateLUT::instance(time_zone_name)) + , time_zone(time_zone_name.empty() ? DateLUT::instance() : DateLUT::instance(time_zone_name)) , utc_time_zone(DateLUT::instance("UTC")) { } diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index ea74d7a89bb..57a6279bd7a 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -66,7 +66,7 @@ namespace public: static constexpr auto name = "timezone"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionTimezone(ContextPtr context) : FunctionConstantBase(String{DateLUT::instance("").getTimeZone()}, context->isDistributed()) {} + explicit FunctionTimezone(ContextPtr context) : FunctionConstantBase(String{DateLUT::instance().getTimeZone()}, context->isDistributed()) {} }; /// Returns the server time zone (timezone in which server runs). @@ -75,7 +75,7 @@ namespace public: static constexpr auto name = "serverTimezone"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionServerTimezone(ContextPtr context) : FunctionConstantBase(String{DateLUT::instance().getTimeZone()}, context->isDistributed()) {} + explicit FunctionServerTimezone(ContextPtr context) : FunctionConstantBase(String{DateLUT::instance("").getTimeZone()}, context->isDistributed()) {} }; diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 9f5358ee141..f8931a7f622 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -1032,22 +1032,22 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re return ReturnType(is_ok); } -inline void readDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance("")) +inline void readDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) { readDateTimeTextImpl(datetime, buf, time_zone); } -inline void readDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance("")) +inline void readDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) { readDateTimeTextImpl(datetime64, scale, buf, date_lut); } -inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance("")) +inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) { return readDateTimeTextImpl(datetime, buf, time_zone); } -inline bool tryReadDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance("")) +inline bool tryReadDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) { return readDateTimeTextImpl(datetime64, scale, buf, date_lut); } diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index d408e2bed42..1c0b48c53c3 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -755,14 +755,14 @@ inline void writeDateTimeText(const LocalDateTime & datetime, WriteBuffer & buf) /// In the format YYYY-MM-DD HH:MM:SS, according to the specified time zone. template -inline void writeDateTimeText(time_t datetime, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance("")) +inline void writeDateTimeText(time_t datetime, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) { writeDateTimeText(LocalDateTime(datetime, time_zone), buf); } /// In the format YYYY-MM-DD HH:MM:SS.NNNNNNNNN, according to the specified time zone. template -inline void writeDateTimeText(DateTime64 datetime64, UInt32 scale, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance("")) +inline void writeDateTimeText(DateTime64 datetime64, UInt32 scale, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) { static constexpr UInt32 MaxScale = DecimalUtils::max_precision; scale = scale > MaxScale ? MaxScale : scale; @@ -796,7 +796,7 @@ inline void writeDateTimeText(DateTime64 datetime64, UInt32 scale, WriteBuffer & /// In the RFC 1123 format: "Tue, 03 Dec 2019 00:11:50 GMT". You must provide GMT DateLUT. /// This is needed for HTTP requests. -inline void writeDateTimeTextRFC1123(time_t datetime, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance("")) +inline void writeDateTimeTextRFC1123(time_t datetime, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) { const auto & values = time_zone.getValues(datetime); From f3e19144d81449c1e2bdec52ebc38e85ea1e8ee9 Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 27 Feb 2023 14:38:15 +0100 Subject: [PATCH 0035/1997] update --- src/Interpreters/executeQuery.cpp | 3 ++- src/Loggers/OwnPatternFormatter.cpp | 2 +- src/Loggers/OwnPatternFormatter.h | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 435401796a0..cda7ec2b0d3 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -1279,7 +1280,7 @@ void executeQuery( QueryResultDetails result_details { .query_id = context->getClientInfo().current_query_id, - .timezone = DateLUT::instance().getTimeZone(), + .timezone = DateLUT::instance("").getTimeZone(), }; std::unique_ptr compressed_buffer; diff --git a/src/Loggers/OwnPatternFormatter.cpp b/src/Loggers/OwnPatternFormatter.cpp index 02a2c2e510b..54d2b995d15 100644 --- a/src/Loggers/OwnPatternFormatter.cpp +++ b/src/Loggers/OwnPatternFormatter.cpp @@ -22,7 +22,7 @@ void OwnPatternFormatter::formatExtended(const DB::ExtendedLogMessage & msg_ext, const Poco::Message & msg = msg_ext.base; /// Change delimiters in date for compatibility with old logs. - DB::writeDateTimeText<'.', ':'>(msg_ext.time_seconds, wb); + DB::writeDateTimeText<'.', ':'>(msg_ext.time_seconds, wb, server_timezone); DB::writeChar('.', wb); DB::writeChar('0' + ((msg_ext.time_microseconds / 100000) % 10), wb); diff --git a/src/Loggers/OwnPatternFormatter.h b/src/Loggers/OwnPatternFormatter.h index d776b097cb2..07d0409b0ae 100644 --- a/src/Loggers/OwnPatternFormatter.h +++ b/src/Loggers/OwnPatternFormatter.h @@ -2,6 +2,7 @@ #include +#include #include "ExtendedLogChannel.h" @@ -30,5 +31,6 @@ public: virtual void formatExtended(const DB::ExtendedLogMessage & msg_ext, std::string & text) const; private: + const DateLUTImpl & server_timezone = DateLUT::instance(""); bool color; }; From 548d79c2e80bb23f246c63fc7e33d0c01eb6b944 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 2 Mar 2023 12:31:09 +0000 Subject: [PATCH 0036/1997] Remove perf test duplicate_order_by_and_distinct.xml --- tests/performance/duplicate_order_by_and_distinct.xml | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 tests/performance/duplicate_order_by_and_distinct.xml diff --git a/tests/performance/duplicate_order_by_and_distinct.xml b/tests/performance/duplicate_order_by_and_distinct.xml deleted file mode 100644 index e36bc470512..00000000000 --- a/tests/performance/duplicate_order_by_and_distinct.xml +++ /dev/null @@ -1,8 +0,0 @@ - - 1 - - - SELECT * FROM (SELECT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID DESC) ORDER BY EventDate, CounterID FORMAT Null - SELECT DISTINCT * FROM (SELECT DISTINCT CounterID, EventDate FROM hits_10m_single) FORMAT Null - SELECT DISTINCT * FROM (SELECT DISTINCT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID DESC) ORDER BY toStartOfWeek(EventDate) FORMAT Null - From f2fbf2d61e8ede663ba37065d8ea8fe9b430de3e Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 6 Mar 2023 02:52:05 +0100 Subject: [PATCH 0037/1997] tcp protocol modification (min revision to be updated) --- src/Client/ClientBase.cpp | 4 ++++ src/Client/Connection.cpp | 5 +++++ src/Client/IServerConnection.h | 2 ++ src/Client/MultiplexedConnections.cpp | 2 ++ src/Client/Suggest.cpp | 1 + src/Common/DateLUT.h | 1 + src/Core/Protocol.h | 4 +++- src/Core/ProtocolDefines.h | 2 ++ src/Interpreters/executeQuery.cpp | 2 +- src/Server/TCPHandler.cpp | 12 ++++++++++++ src/Server/TCPHandler.h | 1 + 11 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 96aff9aa304..65d04a6bb9d 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1020,6 +1020,10 @@ bool ClientBase::receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_) onProfileEvents(packet.block); return true; + case Protocol::Server::TimezoneUpdate: + DateLUT::setDefaultTimezone(packet.server_timezone); + return true; + default: throw Exception( ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from server {}", packet.type, connection->getDescription()); diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index eea007a8608..87e9e20e8f7 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -972,6 +972,11 @@ Packet Connection::receivePacket() res.block = receiveProfileEvents(); return res; + case Protocol::Server::TimezoneUpdate: + readStringBinary(server_timezone, *in); + res.server_timezone = server_timezone; + return res; + default: /// In unknown state, disconnect - to not leave unsynchronised connection. disconnect(); diff --git a/src/Client/IServerConnection.h b/src/Client/IServerConnection.h index cd4db8f5258..52382ff9d45 100644 --- a/src/Client/IServerConnection.h +++ b/src/Client/IServerConnection.h @@ -38,6 +38,8 @@ struct Packet ParallelReadRequest request; ParallelReadResponse response; + std::string server_timezone; + Packet() : type(Protocol::Server::Hello) {} }; diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index cc260353339..668833b2a84 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -258,6 +258,7 @@ Packet MultiplexedConnections::drain() switch (packet.type) { + case Protocol::Server::TimezoneUpdate: case Protocol::Server::MergeTreeAllRangesAnnounecement: case Protocol::Server::MergeTreeReadTaskRequest: case Protocol::Server::ReadTaskRequest: @@ -339,6 +340,7 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac switch (packet.type) { + case Protocol::Server::TimezoneUpdate: case Protocol::Server::MergeTreeAllRangesAnnounecement: case Protocol::Server::MergeTreeReadTaskRequest: case Protocol::Server::ReadTaskRequest: diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index 7027f35d21a..4a29bead540 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -158,6 +158,7 @@ void Suggest::fetch(IServerConnection & connection, const ConnectionTimeouts & t fillWordsFromBlock(packet.block); continue; + case Protocol::Server::TimezoneUpdate: case Protocol::Server::Progress: case Protocol::Server::ProfileInfo: case Protocol::Server::Totals: diff --git a/src/Common/DateLUT.h b/src/Common/DateLUT.h index efbf56b59b2..f17fe772dbc 100644 --- a/src/Common/DateLUT.h +++ b/src/Common/DateLUT.h @@ -60,6 +60,7 @@ public: return date_lut.getImplementation(time_zone); } + static void setDefaultTimezone(const std::string & time_zone) { auto & date_lut = getInstance(); diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h index 86c0a851c60..97a2831ffe8 100644 --- a/src/Core/Protocol.h +++ b/src/Core/Protocol.h @@ -83,7 +83,8 @@ namespace Protocol ProfileEvents = 14, /// Packet with profile events from server. MergeTreeAllRangesAnnounecement = 15, MergeTreeReadTaskRequest = 16, /// Request from a MergeTree replica to a coordinator - MAX = MergeTreeReadTaskRequest, + TimezoneUpdate = 17, /// Receive server's (session-wide) default timezone + MAX = TimezoneUpdate, }; @@ -111,6 +112,7 @@ namespace Protocol "ProfileEvents", "MergeTreeAllRangesAnnounecement", "MergeTreeReadTaskRequest", + "TimezoneUpdate", }; return packet <= MAX ? data[packet] diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index 3bbfb95f020..e56ae0305cc 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -72,3 +72,5 @@ #define DBMS_MIN_PROTOCOL_VERSION_WITH_SERVER_QUERY_TIME_IN_PROGRESS 54460 #define DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES 54461 + +#define DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES 54461 diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index cda7ec2b0d3..85e623dc17d 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -1280,7 +1280,7 @@ void executeQuery( QueryResultDetails result_details { .query_id = context->getClientInfo().current_query_id, - .timezone = DateLUT::instance("").getTimeZone(), + .timezone = DateLUT::instance().getTimeZone(), }; std::unique_ptr compressed_buffer; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index a307b472a64..9bb11f34916 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -446,6 +446,7 @@ void TCPHandler::runImpl() sendSelectProfileEvents(); sendLogs(); + return false; }; @@ -483,6 +484,9 @@ void TCPHandler::runImpl() { std::lock_guard lock(task_callback_mutex); sendLogs(); + if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES + && client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE) + sendTimezone(); sendEndOfStream(); } @@ -1035,6 +1039,14 @@ void TCPHandler::sendInsertProfileEvents() sendProfileEvents(); } +void TCPHandler::sendTimezone() +{ + writeVarUInt(Protocol::Server::TimezoneUpdate, *out); + writeStringBinary(DateLUT::instance().getTimeZone(), *out); + out->next(); +} + + bool TCPHandler::receiveProxyHeader() { if (in->eof()) diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index f06b0b060b3..b19f908bc27 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -262,6 +262,7 @@ private: void sendProfileEvents(); void sendSelectProfileEvents(); void sendInsertProfileEvents(); + void sendTimezone(); /// Creates state.block_in/block_out for blocks read/write, depending on whether compression is enabled. void initBlockInput(); From 0706108b683ab5d67885b81a16b24a76c4d59513 Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 6 Mar 2023 11:16:53 +0100 Subject: [PATCH 0038/1997] typo fix --- src/Interpreters/executeQuery.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 85e623dc17d..435401796a0 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -49,7 +49,6 @@ #include #include #include -#include #include #include #include From 57c5a637217779fbcc999cbaa5bd965f8892d092 Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 6 Mar 2023 16:39:47 +0100 Subject: [PATCH 0039/1997] fix receive of timezone update on processing --- src/Client/ClientBase.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 65d04a6bb9d..7ca6bbed6ba 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1188,6 +1188,10 @@ bool ClientBase::receiveSampleBlock(Block & out, ColumnsDescription & columns_de columns_description = ColumnsDescription::parse(packet.multistring_message[1]); return receiveSampleBlock(out, columns_description, parsed_query); + case Protocol::Server::TimezoneUpdate: + DateLUT::setDefaultTimezone(packet.server_timezone); + break; + default: throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER, "Unexpected packet from server (expected Data, Exception or Log, got {})", @@ -1533,6 +1537,10 @@ bool ClientBase::receiveEndOfQuery() onProfileEvents(packet.block); break; + case Protocol::Server::TimezoneUpdate: + DateLUT::setDefaultTimezone(packet.server_timezone); + break; + default: throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER, "Unexpected packet from server (expected Exception, EndOfStream, Log, Progress or ProfileEvents. Got {})", From d93937cc5e92ae4612259e9e57bca15489aabc8f Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 6 Mar 2023 16:45:38 +0100 Subject: [PATCH 0040/1997] increment protocol version --- src/Core/ProtocolDefines.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index e56ae0305cc..5483489d5c4 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -54,7 +54,7 @@ /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, /// later is just a number for server version (one number instead of commit SHA) /// for simplicity (sometimes it may be more convenient in some use cases). -#define DBMS_TCP_PROTOCOL_VERSION 54461 +#define DBMS_TCP_PROTOCOL_VERSION 54462 #define DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME 54449 @@ -73,4 +73,4 @@ #define DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES 54461 -#define DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES 54461 +#define DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES 54462 From 3a918ae66a984451e0db0f56ffa6232b897ad62f Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 7 Mar 2023 02:33:46 +0100 Subject: [PATCH 0041/1997] revert protocol changes, found better way --- src/Client/ClientBase.cpp | 25 ++++++++++++------------- src/Client/Connection.cpp | 5 ----- src/Client/MultiplexedConnections.cpp | 2 -- src/Client/Suggest.cpp | 1 - src/Core/Protocol.h | 4 +--- src/Core/ProtocolDefines.h | 4 +--- src/Server/TCPHandler.cpp | 10 ---------- src/Server/TCPHandler.h | 1 - 8 files changed, 14 insertions(+), 38 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 7ca6bbed6ba..09c510f01f3 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1020,10 +1020,6 @@ bool ClientBase::receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_) onProfileEvents(packet.block); return true; - case Protocol::Server::TimezoneUpdate: - DateLUT::setDefaultTimezone(packet.server_timezone); - return true; - default: throw Exception( ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from server {}", packet.type, connection->getDescription()); @@ -1188,10 +1184,6 @@ bool ClientBase::receiveSampleBlock(Block & out, ColumnsDescription & columns_de columns_description = ColumnsDescription::parse(packet.multistring_message[1]); return receiveSampleBlock(out, columns_description, parsed_query); - case Protocol::Server::TimezoneUpdate: - DateLUT::setDefaultTimezone(packet.server_timezone); - break; - default: throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER, "Unexpected packet from server (expected Data, Exception or Log, got {})", @@ -1500,7 +1492,7 @@ void ClientBase::receiveLogsAndProfileEvents(ASTPtr parsed_query) { auto packet_type = connection->checkPacket(0); - while (packet_type && (*packet_type == Protocol::Server::Log || *packet_type == Protocol::Server::ProfileEvents)) + while (packet_type && (*packet_type == Protocol::Server::Log || *packet_type == Protocol::Server::ProfileEvents )) { receiveAndProcessPacket(parsed_query, false); packet_type = connection->checkPacket(0); @@ -1537,10 +1529,6 @@ bool ClientBase::receiveEndOfQuery() onProfileEvents(packet.block); break; - case Protocol::Server::TimezoneUpdate: - DateLUT::setDefaultTimezone(packet.server_timezone); - break; - default: throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER, "Unexpected packet from server (expected Exception, EndOfStream, Log, Progress or ProfileEvents. Got {})", @@ -1611,6 +1599,8 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin progress_indication.resetProgress(); profile_events.watch.restart(); + const std::string old_timezone = DateLUT::instance().getTimeZone(); + { /// Temporarily apply query settings to context. std::optional old_settings; @@ -1659,6 +1649,9 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin bool is_async_insert = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData(); + if (!global_context->getSettingsRef().timezone.toString().empty()) + DateLUT::setDefaultTimezone(global_context->getSettingsRef().timezone); + /// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately. if (insert && (!insert->select || input_function) && !insert->watch && !is_async_insert) { @@ -1693,6 +1686,10 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin query_parameters.insert_or_assign(name, value); global_context->addQueryParameters(set_query->query_parameters); + + if (!global_context->getSettingsRef().timezone.toString().empty()) + DateLUT::setDefaultTimezone(global_context->getSettingsRef().timezone); + } if (const auto * use_query = parsed_query->as()) { @@ -1703,6 +1700,8 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin connection->setDefaultDatabase(new_database); } } + else + DateLUT::setDefaultTimezone(old_timezone); /// Always print last block (if it was not printed already) if (profile_events.last_block) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 87e9e20e8f7..eea007a8608 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -972,11 +972,6 @@ Packet Connection::receivePacket() res.block = receiveProfileEvents(); return res; - case Protocol::Server::TimezoneUpdate: - readStringBinary(server_timezone, *in); - res.server_timezone = server_timezone; - return res; - default: /// In unknown state, disconnect - to not leave unsynchronised connection. disconnect(); diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index 668833b2a84..cc260353339 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -258,7 +258,6 @@ Packet MultiplexedConnections::drain() switch (packet.type) { - case Protocol::Server::TimezoneUpdate: case Protocol::Server::MergeTreeAllRangesAnnounecement: case Protocol::Server::MergeTreeReadTaskRequest: case Protocol::Server::ReadTaskRequest: @@ -340,7 +339,6 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac switch (packet.type) { - case Protocol::Server::TimezoneUpdate: case Protocol::Server::MergeTreeAllRangesAnnounecement: case Protocol::Server::MergeTreeReadTaskRequest: case Protocol::Server::ReadTaskRequest: diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index 4a29bead540..7027f35d21a 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -158,7 +158,6 @@ void Suggest::fetch(IServerConnection & connection, const ConnectionTimeouts & t fillWordsFromBlock(packet.block); continue; - case Protocol::Server::TimezoneUpdate: case Protocol::Server::Progress: case Protocol::Server::ProfileInfo: case Protocol::Server::Totals: diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h index 97a2831ffe8..86c0a851c60 100644 --- a/src/Core/Protocol.h +++ b/src/Core/Protocol.h @@ -83,8 +83,7 @@ namespace Protocol ProfileEvents = 14, /// Packet with profile events from server. MergeTreeAllRangesAnnounecement = 15, MergeTreeReadTaskRequest = 16, /// Request from a MergeTree replica to a coordinator - TimezoneUpdate = 17, /// Receive server's (session-wide) default timezone - MAX = TimezoneUpdate, + MAX = MergeTreeReadTaskRequest, }; @@ -112,7 +111,6 @@ namespace Protocol "ProfileEvents", "MergeTreeAllRangesAnnounecement", "MergeTreeReadTaskRequest", - "TimezoneUpdate", }; return packet <= MAX ? data[packet] diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index 5483489d5c4..3bbfb95f020 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -54,7 +54,7 @@ /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, /// later is just a number for server version (one number instead of commit SHA) /// for simplicity (sometimes it may be more convenient in some use cases). -#define DBMS_TCP_PROTOCOL_VERSION 54462 +#define DBMS_TCP_PROTOCOL_VERSION 54461 #define DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME 54449 @@ -72,5 +72,3 @@ #define DBMS_MIN_PROTOCOL_VERSION_WITH_SERVER_QUERY_TIME_IN_PROGRESS 54460 #define DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES 54461 - -#define DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES 54462 diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 9bb11f34916..617b084a149 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -484,9 +484,6 @@ void TCPHandler::runImpl() { std::lock_guard lock(task_callback_mutex); sendLogs(); - if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES - && client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE) - sendTimezone(); sendEndOfStream(); } @@ -1039,13 +1036,6 @@ void TCPHandler::sendInsertProfileEvents() sendProfileEvents(); } -void TCPHandler::sendTimezone() -{ - writeVarUInt(Protocol::Server::TimezoneUpdate, *out); - writeStringBinary(DateLUT::instance().getTimeZone(), *out); - out->next(); -} - bool TCPHandler::receiveProxyHeader() { diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index b19f908bc27..f06b0b060b3 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -262,7 +262,6 @@ private: void sendProfileEvents(); void sendSelectProfileEvents(); void sendInsertProfileEvents(); - void sendTimezone(); /// Creates state.block_in/block_out for blocks read/write, depending on whether compression is enabled. void initBlockInput(); From c859478db3a3964c49457e49bab62bdf975bed7f Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 7 Mar 2023 02:36:02 +0100 Subject: [PATCH 0042/1997] upd --- src/Client/IServerConnection.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Client/IServerConnection.h b/src/Client/IServerConnection.h index 52382ff9d45..cd4db8f5258 100644 --- a/src/Client/IServerConnection.h +++ b/src/Client/IServerConnection.h @@ -38,8 +38,6 @@ struct Packet ParallelReadRequest request; ParallelReadResponse response; - std::string server_timezone; - Packet() : type(Protocol::Server::Hello) {} }; From 5e7a861e688dea04ecfe9c54d30c642f65a28569 Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 7 Mar 2023 02:45:47 +0100 Subject: [PATCH 0043/1997] fix --- src/Server/TCPHandler.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 617b084a149..a307b472a64 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -446,7 +446,6 @@ void TCPHandler::runImpl() sendSelectProfileEvents(); sendLogs(); - return false; }; @@ -1036,7 +1035,6 @@ void TCPHandler::sendInsertProfileEvents() sendProfileEvents(); } - bool TCPHandler::receiveProxyHeader() { if (in->eof()) From a7a3c9d1a675743e776fde32c96ccd9bbfc94e46 Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 7 Mar 2023 02:52:42 +0100 Subject: [PATCH 0044/1997] fix style --- src/Client/ClientBase.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 09c510f01f3..25442c89f99 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1492,7 +1492,7 @@ void ClientBase::receiveLogsAndProfileEvents(ASTPtr parsed_query) { auto packet_type = connection->checkPacket(0); - while (packet_type && (*packet_type == Protocol::Server::Log || *packet_type == Protocol::Server::ProfileEvents )) + while (packet_type && (*packet_type == Protocol::Server::Log || *packet_type == Protocol::Server::ProfileEvents)) { receiveAndProcessPacket(parsed_query, false); packet_type = connection->checkPacket(0); From e92501d5dd7a9f3a77ad38f8750432a2286e9f0b Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 7 Mar 2023 13:02:02 +0100 Subject: [PATCH 0045/1997] update tests + exception --- src/Client/ClientBase.cpp | 28 +++++++++++++++++-- ...rence => 02674_timezone_setting.reference} | 0 ...setting.sql => 02674_timezone_setting.sql} | 4 +-- 3 files changed, 27 insertions(+), 5 deletions(-) rename tests/queries/0_stateless/{02668_timezone_setting.reference => 02674_timezone_setting.reference} (100%) rename tests/queries/0_stateless/{02668_timezone_setting.sql => 02674_timezone_setting.sql} (73%) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 25442c89f99..13f28806066 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -107,6 +107,7 @@ namespace ErrorCodes extern const int UNRECOGNIZED_ARGUMENTS; extern const int LOGICAL_ERROR; extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_PARSE_DATETIME; } } @@ -1599,6 +1600,9 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin progress_indication.resetProgress(); profile_events.watch.restart(); + /// A query may contain timezone setting. To handle this, old client-wide tz is saved here. + /// If timezone was set for a query, after its execution client tz will be back to old one. + /// If it was a settings query, new setting will be applied to client. const std::string old_timezone = DateLUT::instance().getTimeZone(); { @@ -1649,8 +1653,18 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin bool is_async_insert = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData(); - if (!global_context->getSettingsRef().timezone.toString().empty()) - DateLUT::setDefaultTimezone(global_context->getSettingsRef().timezone); + /// pre-load timezone from (query) settings -- new timezone may also be specified in query. + try + { + if (!global_context->getSettingsRef().timezone.toString().empty()) + DateLUT::setDefaultTimezone(global_context->getSettingsRef().timezone); + } + catch (Poco::Exception &) + { + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, + "Invalid time zone {} in client settings. Use `SET timezone = \'New/TZ\'` to set a proper timezone.", + global_context->getSettingsRef().timezone.toString()); + } /// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately. if (insert && (!insert->select || input_function) && !insert->watch && !is_async_insert) @@ -1687,9 +1701,17 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin global_context->addQueryParameters(set_query->query_parameters); + try + { if (!global_context->getSettingsRef().timezone.toString().empty()) DateLUT::setDefaultTimezone(global_context->getSettingsRef().timezone); - + } + catch (Poco::Exception &) + { + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, + "Invalid time zone {} in client settings. Use `SET timezone = \'New/TZ\'` to set a proper timezone.", + global_context->getSettingsRef().timezone.toString()); + } } if (const auto * use_query = parsed_query->as()) { diff --git a/tests/queries/0_stateless/02668_timezone_setting.reference b/tests/queries/0_stateless/02674_timezone_setting.reference similarity index 100% rename from tests/queries/0_stateless/02668_timezone_setting.reference rename to tests/queries/0_stateless/02674_timezone_setting.reference diff --git a/tests/queries/0_stateless/02668_timezone_setting.sql b/tests/queries/0_stateless/02674_timezone_setting.sql similarity index 73% rename from tests/queries/0_stateless/02668_timezone_setting.sql rename to tests/queries/0_stateless/02674_timezone_setting.sql index f331ab58307..51820fc2dca 100644 --- a/tests/queries/0_stateless/02668_timezone_setting.sql +++ b/tests/queries/0_stateless/02674_timezone_setting.sql @@ -5,5 +5,5 @@ SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zuric SET timezone = 'Europe/Zurich'; SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Asia/Novosibirsk'); -SET timezone = 'Абырвалг'; -select now(); -- { serverError POCO_EXCEPTION } \ No newline at end of file +SET timezone = 'Абырвалг'; -- { clientError CANNOT_PARSE_DATETIME } +select now(); -- { clientError CANNOT_PARSE_DATETIME } \ No newline at end of file From 1fd6e3f23b41dac6fde5b238e1a5da11a976b5ae Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 7 Mar 2023 16:02:30 +0100 Subject: [PATCH 0046/1997] Revert "fix style" This reverts commit a7a3c9d1a675743e776fde32c96ccd9bbfc94e46. --- src/Client/ClientBase.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 13f28806066..cfef1a5d3fe 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1493,7 +1493,7 @@ void ClientBase::receiveLogsAndProfileEvents(ASTPtr parsed_query) { auto packet_type = connection->checkPacket(0); - while (packet_type && (*packet_type == Protocol::Server::Log || *packet_type == Protocol::Server::ProfileEvents)) + while (packet_type && (*packet_type == Protocol::Server::Log || *packet_type == Protocol::Server::ProfileEvents )) { receiveAndProcessPacket(parsed_query, false); packet_type = connection->checkPacket(0); From 1ce697d8c06ce7f44e078f9b8809dcaa3e3ba8f8 Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 7 Mar 2023 16:05:23 +0100 Subject: [PATCH 0047/1997] Revert "revert protocol changes, found better way" This reverts commit 3a918ae66a984451e0db0f56ffa6232b897ad62f. --- src/Client/ClientBase.cpp | 47 +++++-------------- src/Client/Connection.cpp | 5 ++ src/Client/IServerConnection.h | 2 + src/Client/MultiplexedConnections.cpp | 2 + src/Client/Suggest.cpp | 1 + src/Core/Protocol.h | 4 +- src/Core/ProtocolDefines.h | 4 +- src/Server/TCPHandler.cpp | 12 +++++ src/Server/TCPHandler.h | 1 + ...rence => 02668_timezone_setting.reference} | 0 ...setting.sql => 02668_timezone_setting.sql} | 4 +- 11 files changed, 44 insertions(+), 38 deletions(-) rename tests/queries/0_stateless/{02674_timezone_setting.reference => 02668_timezone_setting.reference} (100%) rename tests/queries/0_stateless/{02674_timezone_setting.sql => 02668_timezone_setting.sql} (73%) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index cfef1a5d3fe..7ca6bbed6ba 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -107,7 +107,6 @@ namespace ErrorCodes extern const int UNRECOGNIZED_ARGUMENTS; extern const int LOGICAL_ERROR; extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_PARSE_DATETIME; } } @@ -1021,6 +1020,10 @@ bool ClientBase::receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_) onProfileEvents(packet.block); return true; + case Protocol::Server::TimezoneUpdate: + DateLUT::setDefaultTimezone(packet.server_timezone); + return true; + default: throw Exception( ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from server {}", packet.type, connection->getDescription()); @@ -1185,6 +1188,10 @@ bool ClientBase::receiveSampleBlock(Block & out, ColumnsDescription & columns_de columns_description = ColumnsDescription::parse(packet.multistring_message[1]); return receiveSampleBlock(out, columns_description, parsed_query); + case Protocol::Server::TimezoneUpdate: + DateLUT::setDefaultTimezone(packet.server_timezone); + break; + default: throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER, "Unexpected packet from server (expected Data, Exception or Log, got {})", @@ -1493,7 +1500,7 @@ void ClientBase::receiveLogsAndProfileEvents(ASTPtr parsed_query) { auto packet_type = connection->checkPacket(0); - while (packet_type && (*packet_type == Protocol::Server::Log || *packet_type == Protocol::Server::ProfileEvents )) + while (packet_type && (*packet_type == Protocol::Server::Log || *packet_type == Protocol::Server::ProfileEvents)) { receiveAndProcessPacket(parsed_query, false); packet_type = connection->checkPacket(0); @@ -1530,6 +1537,10 @@ bool ClientBase::receiveEndOfQuery() onProfileEvents(packet.block); break; + case Protocol::Server::TimezoneUpdate: + DateLUT::setDefaultTimezone(packet.server_timezone); + break; + default: throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER, "Unexpected packet from server (expected Exception, EndOfStream, Log, Progress or ProfileEvents. Got {})", @@ -1600,11 +1611,6 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin progress_indication.resetProgress(); profile_events.watch.restart(); - /// A query may contain timezone setting. To handle this, old client-wide tz is saved here. - /// If timezone was set for a query, after its execution client tz will be back to old one. - /// If it was a settings query, new setting will be applied to client. - const std::string old_timezone = DateLUT::instance().getTimeZone(); - { /// Temporarily apply query settings to context. std::optional old_settings; @@ -1653,19 +1659,6 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin bool is_async_insert = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData(); - /// pre-load timezone from (query) settings -- new timezone may also be specified in query. - try - { - if (!global_context->getSettingsRef().timezone.toString().empty()) - DateLUT::setDefaultTimezone(global_context->getSettingsRef().timezone); - } - catch (Poco::Exception &) - { - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, - "Invalid time zone {} in client settings. Use `SET timezone = \'New/TZ\'` to set a proper timezone.", - global_context->getSettingsRef().timezone.toString()); - } - /// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately. if (insert && (!insert->select || input_function) && !insert->watch && !is_async_insert) { @@ -1700,18 +1693,6 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin query_parameters.insert_or_assign(name, value); global_context->addQueryParameters(set_query->query_parameters); - - try - { - if (!global_context->getSettingsRef().timezone.toString().empty()) - DateLUT::setDefaultTimezone(global_context->getSettingsRef().timezone); - } - catch (Poco::Exception &) - { - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, - "Invalid time zone {} in client settings. Use `SET timezone = \'New/TZ\'` to set a proper timezone.", - global_context->getSettingsRef().timezone.toString()); - } } if (const auto * use_query = parsed_query->as()) { @@ -1722,8 +1703,6 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin connection->setDefaultDatabase(new_database); } } - else - DateLUT::setDefaultTimezone(old_timezone); /// Always print last block (if it was not printed already) if (profile_events.last_block) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index eea007a8608..87e9e20e8f7 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -972,6 +972,11 @@ Packet Connection::receivePacket() res.block = receiveProfileEvents(); return res; + case Protocol::Server::TimezoneUpdate: + readStringBinary(server_timezone, *in); + res.server_timezone = server_timezone; + return res; + default: /// In unknown state, disconnect - to not leave unsynchronised connection. disconnect(); diff --git a/src/Client/IServerConnection.h b/src/Client/IServerConnection.h index cd4db8f5258..52382ff9d45 100644 --- a/src/Client/IServerConnection.h +++ b/src/Client/IServerConnection.h @@ -38,6 +38,8 @@ struct Packet ParallelReadRequest request; ParallelReadResponse response; + std::string server_timezone; + Packet() : type(Protocol::Server::Hello) {} }; diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index cc260353339..668833b2a84 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -258,6 +258,7 @@ Packet MultiplexedConnections::drain() switch (packet.type) { + case Protocol::Server::TimezoneUpdate: case Protocol::Server::MergeTreeAllRangesAnnounecement: case Protocol::Server::MergeTreeReadTaskRequest: case Protocol::Server::ReadTaskRequest: @@ -339,6 +340,7 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac switch (packet.type) { + case Protocol::Server::TimezoneUpdate: case Protocol::Server::MergeTreeAllRangesAnnounecement: case Protocol::Server::MergeTreeReadTaskRequest: case Protocol::Server::ReadTaskRequest: diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index 7027f35d21a..4a29bead540 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -158,6 +158,7 @@ void Suggest::fetch(IServerConnection & connection, const ConnectionTimeouts & t fillWordsFromBlock(packet.block); continue; + case Protocol::Server::TimezoneUpdate: case Protocol::Server::Progress: case Protocol::Server::ProfileInfo: case Protocol::Server::Totals: diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h index 86c0a851c60..97a2831ffe8 100644 --- a/src/Core/Protocol.h +++ b/src/Core/Protocol.h @@ -83,7 +83,8 @@ namespace Protocol ProfileEvents = 14, /// Packet with profile events from server. MergeTreeAllRangesAnnounecement = 15, MergeTreeReadTaskRequest = 16, /// Request from a MergeTree replica to a coordinator - MAX = MergeTreeReadTaskRequest, + TimezoneUpdate = 17, /// Receive server's (session-wide) default timezone + MAX = TimezoneUpdate, }; @@ -111,6 +112,7 @@ namespace Protocol "ProfileEvents", "MergeTreeAllRangesAnnounecement", "MergeTreeReadTaskRequest", + "TimezoneUpdate", }; return packet <= MAX ? data[packet] diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index 3bbfb95f020..5483489d5c4 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -54,7 +54,7 @@ /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, /// later is just a number for server version (one number instead of commit SHA) /// for simplicity (sometimes it may be more convenient in some use cases). -#define DBMS_TCP_PROTOCOL_VERSION 54461 +#define DBMS_TCP_PROTOCOL_VERSION 54462 #define DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME 54449 @@ -72,3 +72,5 @@ #define DBMS_MIN_PROTOCOL_VERSION_WITH_SERVER_QUERY_TIME_IN_PROGRESS 54460 #define DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES 54461 + +#define DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES 54462 diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index a307b472a64..9bb11f34916 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -446,6 +446,7 @@ void TCPHandler::runImpl() sendSelectProfileEvents(); sendLogs(); + return false; }; @@ -483,6 +484,9 @@ void TCPHandler::runImpl() { std::lock_guard lock(task_callback_mutex); sendLogs(); + if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES + && client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE) + sendTimezone(); sendEndOfStream(); } @@ -1035,6 +1039,14 @@ void TCPHandler::sendInsertProfileEvents() sendProfileEvents(); } +void TCPHandler::sendTimezone() +{ + writeVarUInt(Protocol::Server::TimezoneUpdate, *out); + writeStringBinary(DateLUT::instance().getTimeZone(), *out); + out->next(); +} + + bool TCPHandler::receiveProxyHeader() { if (in->eof()) diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index f06b0b060b3..b19f908bc27 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -262,6 +262,7 @@ private: void sendProfileEvents(); void sendSelectProfileEvents(); void sendInsertProfileEvents(); + void sendTimezone(); /// Creates state.block_in/block_out for blocks read/write, depending on whether compression is enabled. void initBlockInput(); diff --git a/tests/queries/0_stateless/02674_timezone_setting.reference b/tests/queries/0_stateless/02668_timezone_setting.reference similarity index 100% rename from tests/queries/0_stateless/02674_timezone_setting.reference rename to tests/queries/0_stateless/02668_timezone_setting.reference diff --git a/tests/queries/0_stateless/02674_timezone_setting.sql b/tests/queries/0_stateless/02668_timezone_setting.sql similarity index 73% rename from tests/queries/0_stateless/02674_timezone_setting.sql rename to tests/queries/0_stateless/02668_timezone_setting.sql index 51820fc2dca..f331ab58307 100644 --- a/tests/queries/0_stateless/02674_timezone_setting.sql +++ b/tests/queries/0_stateless/02668_timezone_setting.sql @@ -5,5 +5,5 @@ SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zuric SET timezone = 'Europe/Zurich'; SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Asia/Novosibirsk'); -SET timezone = 'Абырвалг'; -- { clientError CANNOT_PARSE_DATETIME } -select now(); -- { clientError CANNOT_PARSE_DATETIME } \ No newline at end of file +SET timezone = 'Абырвалг'; +select now(); -- { serverError POCO_EXCEPTION } \ No newline at end of file From 14cb7ed773f671ef4f6fa3f41aa6d05ce4705228 Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Fri, 10 Mar 2023 10:05:06 -0500 Subject: [PATCH 0048/1997] Udate system tables doc closes #47030 --- docs/en/operations/system-tables/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md index 284ba866cc8..65efe8b01b3 100644 --- a/docs/en/operations/system-tables/index.md +++ b/docs/en/operations/system-tables/index.md @@ -13,6 +13,7 @@ System tables provide information about: - Server states, processes, and environment. - Server’s internal processes. +- Options used when the ClickHouse binary was built. System tables: From bbb31cf8913527eb21823216821ca536c2779563 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 15 Mar 2023 18:37:23 +0100 Subject: [PATCH 0049/1997] added validation on setting modification --- src/Client/ClientBase.cpp | 17 ++++++++++++----- src/Client/ClientBase.h | 1 + src/Core/Settings.h | 2 +- src/Core/SettingsFields.cpp | 11 +++++++++++ src/Core/SettingsFields.h | 38 +++++++++++++++++++++++++++++++++++++ src/Server/TCPHandler.cpp | 17 +++++++++++++---- 6 files changed, 76 insertions(+), 10 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 7ca6bbed6ba..5b086d675ba 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1021,7 +1021,7 @@ bool ClientBase::receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_) return true; case Protocol::Server::TimezoneUpdate: - DateLUT::setDefaultTimezone(packet.server_timezone); + onTimezoneUpdate(packet.server_timezone); return true; default: @@ -1046,6 +1046,11 @@ void ClientBase::onProgress(const Progress & value) progress_indication.writeProgress(*tty_buf); } +void ClientBase::onTimezoneUpdate(const String & tz) +{ + DateLUT::setDefaultTimezone(tz); +} + void ClientBase::onEndOfStream() { @@ -1189,12 +1194,12 @@ bool ClientBase::receiveSampleBlock(Block & out, ColumnsDescription & columns_de return receiveSampleBlock(out, columns_description, parsed_query); case Protocol::Server::TimezoneUpdate: - DateLUT::setDefaultTimezone(packet.server_timezone); + onTimezoneUpdate(packet.server_timezone); break; default: throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER, - "Unexpected packet from server (expected Data, Exception or Log, got {})", + "Unexpected packet from server (expected Data, Exception, Log or TimezoneUpdate, got {})", String(Protocol::Server::toString(packet.type))); } } @@ -1500,7 +1505,9 @@ void ClientBase::receiveLogsAndProfileEvents(ASTPtr parsed_query) { auto packet_type = connection->checkPacket(0); - while (packet_type && (*packet_type == Protocol::Server::Log || *packet_type == Protocol::Server::ProfileEvents)) + while (packet_type && (*packet_type == Protocol::Server::Log || + *packet_type == Protocol::Server::ProfileEvents || + *packet_type == Protocol::Server::TimezoneUpdate)) { receiveAndProcessPacket(parsed_query, false); packet_type = connection->checkPacket(0); @@ -1538,7 +1545,7 @@ bool ClientBase::receiveEndOfQuery() break; case Protocol::Server::TimezoneUpdate: - DateLUT::setDefaultTimezone(packet.server_timezone); + onTimezoneUpdate(packet.server_timezone); break; default: diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 52e15a1a075..18d9a30cac0 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -139,6 +139,7 @@ private: void cancelQuery(); void onProgress(const Progress & value); + void onTimezoneUpdate(const String & tz); void onData(Block & block, ASTPtr parsed_query); void onLogData(Block & block); void onTotals(Block & block, ASTPtr parsed_query); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index e508818a26a..ced59219a5b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -712,7 +712,7 @@ class IColumn; M(Float, insert_keeper_fault_injection_probability, 0.0f, "Approximate probability of failure for a keeper request during insert. Valid value is in interval [0.0f, 1.0f]", 0) \ M(UInt64, insert_keeper_fault_injection_seed, 0, "0 - random seed, otherwise the setting value", 0) \ M(Bool, force_aggregation_in_order, false, "Force use of aggregation in order on remote nodes during distributed aggregation. PLEASE, NEVER CHANGE THIS SETTING VALUE MANUALLY!", IMPORTANT) \ - M(String, timezone, "", "Use specified timezone for interpreting Date and DateTime instead of server's timezone.", 0) \ + M(Timezone, timezone, "", "Use specified timezone for interpreting Date and DateTime instead of server's timezone.", 0) \ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index 4164bf1e27e..44369c7c8a0 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -445,6 +445,17 @@ String SettingFieldEnumHelpers::readBinary(ReadBuffer & in) return str; } +void SettingFieldTimezone::writeBinary(WriteBuffer & out) const +{ + writeStringBinary(value, out); +} + +void SettingFieldTimezone::readBinary(ReadBuffer & in) +{ + String str; + readStringBinary(str, in); + *this = std::move(str); +} String SettingFieldCustom::toString() const { diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index c6fe46c9f6b..b580122d3db 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -539,6 +540,43 @@ void SettingFieldMultiEnum::readBinary(ReadBuffer & in) return std::initializer_list> __VA_ARGS__ .size();\ } +/* Setting field for specifying user-defined timezone. It is basically a string, but it needs validation. + */ +struct SettingFieldTimezone +{ + String value; + bool changed = false; + + explicit SettingFieldTimezone(std::string_view str = {}) { validateTimezone(std::string(str)); value = str; } + explicit SettingFieldTimezone(const String & str) { validateTimezone(str); value = str; } + explicit SettingFieldTimezone(String && str) { validateTimezone(std::string(str)); value = std::move(str); } + explicit SettingFieldTimezone(const char * str) { validateTimezone(str); value = str; } + explicit SettingFieldTimezone(const Field & f) { const String & str = f.safeGet(); validateTimezone(str); value = str; } + + SettingFieldTimezone & operator =(std::string_view str) { validateTimezone(std::string(str)); value = str; changed = true; return *this; } + SettingFieldTimezone & operator =(const String & str) { *this = std::string_view{str}; return *this; } + SettingFieldTimezone & operator =(String && str) { validateTimezone(str); value = std::move(str); changed = true; return *this; } + SettingFieldTimezone & operator =(const char * str) { *this = std::string_view{str}; return *this; } + SettingFieldTimezone & operator =(const Field & f) { *this = f.safeGet(); return *this; } + + operator const String &() const { return value; } /// NOLINT + explicit operator Field() const { return value; } + + const String & toString() const { return value; } + void parseFromString(const String & str) { *this = str; } + + void writeBinary(WriteBuffer & out) const; + void readBinary(ReadBuffer & in); + +private: + cctz::time_zone validated_tz; + void validateTimezone(const std::string & str) + { + if (str != "" && !cctz::load_time_zone(str, &validated_tz)) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid time zone: {}", str); + } +}; + /// Can keep a value of any type. Used for user-defined settings. struct SettingFieldCustom { diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 9bb11f34916..f43982c5133 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -442,6 +442,9 @@ void TCPHandler::runImpl() if (isQueryCancelled()) return true; + if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES + && client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE) + sendTimezone(); sendProgress(); sendSelectProfileEvents(); sendLogs(); @@ -483,10 +486,10 @@ void TCPHandler::runImpl() { std::lock_guard lock(task_callback_mutex); - sendLogs(); if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES && client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE) sendTimezone(); + sendLogs(); sendEndOfStream(); } @@ -1041,9 +1044,15 @@ void TCPHandler::sendInsertProfileEvents() void TCPHandler::sendTimezone() { - writeVarUInt(Protocol::Server::TimezoneUpdate, *out); - writeStringBinary(DateLUT::instance().getTimeZone(), *out); - out->next(); +// const String & tz = CurrentThread::get().getQueryContext()->getSettingsRef().timezone.toString(); + const String & tz = query_context->getSettingsRef().timezone.toString(); + if (!tz.empty()) + { + LOG_DEBUG(log, "Sent timezone: {}", tz); + writeVarUInt(Protocol::Server::TimezoneUpdate, *out); + writeStringBinary(tz, *out); + out->next(); + } } From e36addb96a7eaaba8f9a90383d3e77020a1a61e8 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Tue, 11 Apr 2023 13:03:03 +0200 Subject: [PATCH 0050/1997] Hackish way of setting up timezone on the client Warning: lots of debug logging --- programs/client/Client.cpp | 12 ++++- src/Client/ClientBase.cpp | 47 +++++++++++++++++++ src/Functions/timezoneOf.cpp | 13 +++++ src/Interpreters/Context.cpp | 9 ++++ src/Server/TCPHandler.cpp | 21 ++++++--- .../0_stateless/02668_timezone_setting.sql | 8 ++-- 6 files changed, 99 insertions(+), 11 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 660b8d7c00a..2aa75e60294 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -4,8 +4,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -307,7 +309,7 @@ int Client::main(const std::vector & /*args*/) try { UseSSL use_ssl; - MainThreadStatus::getInstance(); + auto & thread_status = MainThreadStatus::getInstance(); setupSignalHandler(); std::cout << std::fixed << std::setprecision(3); @@ -320,6 +322,14 @@ try processConfig(); initTtyBuffer(toProgressOption(config().getString("progress", "default"))); + { + // All that just to set DB::CurrentThread::get().getGlobalContext() + // which is required for client timezone (pushed as from server) to work. + auto thread_group = std::make_shared(); + thread_group->global_context = global_context; + thread_status.attachQuery(thread_group, false); + } + /// Includes delayed_interactive. if (is_interactive) { diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index ca32b9b97d7..e3e0364523a 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -65,6 +65,7 @@ #include #include +#include #include #include #include @@ -73,11 +74,44 @@ #include "config_version.h" #include "config.h" +#include + namespace fs = std::filesystem; using namespace std::literals; +namespace +{ +using namespace DB; +using ContetGetterFunc = std::function const; +const void* getContextPtrOrNull(ContetGetterFunc contextFunc) +{ + try + { + return contextFunc().get(); + } + catch(...) + { + } + return nullptr; +} + +void LogContextes(const std::string_view scope, const ContextPtr global_context) +{ + const auto * context = global_context.get(); + std::cerr << scope << " contextes" + << "\n\tglobal: " << reinterpret_cast(context) + << "\n\tsession: " << getContextPtrOrNull([&]() { return context ? context->getSessionContext() : nullptr; }) + << "\n\tquery: " << getContextPtrOrNull([&]() { return context ? context->getQueryContext() : nullptr; }) + << "\n\tcurrent T query: " << getContextPtrOrNull([&]() { return DB::CurrentThread::get().getQueryContext(); }) + << "\n\tcurrent T global: " << getContextPtrOrNull([&]() { return DB::CurrentThread::get().getGlobalContext(); }) +// << "\n\tbuffer: " << getContextPtrOrNull(context, &Context::getBufferContext) + << std::endl; +} + +} + namespace CurrentMetrics { extern const Metric MemoryTracking; @@ -438,7 +472,12 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query) /// output_format, do not output it. /// Also do not output too much data if we're fuzzing. if (block.rows() == 0 || (query_fuzzer_runs != 0 && processed_rows >= 100)) + { + LogContextes("ClientBase::onData header", global_context); return; + } + + LogContextes("ClientBase::onData DATA block", global_context); /// If results are written INTO OUTFILE, we can avoid clearing progress to avoid flicker. if (need_render_progress && tty_buf && (!select_into_file || select_into_file_and_stdout)) @@ -1048,7 +1087,15 @@ void ClientBase::onProgress(const Progress & value) void ClientBase::onTimezoneUpdate(const String & tz) { + std::cerr << "ClientBase::onTimezoneUpdate received new TZ from server: " << tz << std::endl; DateLUT::setDefaultTimezone(tz); + + Settings settings; + settings.timezone = tz; + global_context->applySettingsChanges(settings.changes()); +// DB::CurrentThread::get().getQueryContext()->applySettingsChanges(settings.changes()); + + LogContextes("ClientBase::onTimezoneUpdate", global_context); } diff --git a/src/Functions/timezoneOf.cpp b/src/Functions/timezoneOf.cpp index 6454b1cd735..ce419b7b4cd 100644 --- a/src/Functions/timezoneOf.cpp +++ b/src/Functions/timezoneOf.cpp @@ -5,7 +5,11 @@ #include #include #include +#include "Poco/Logger.h" +#include +#include +#include namespace DB { @@ -52,6 +56,15 @@ public: { DataTypePtr type_no_nullable = removeNullable(arguments[0].type); + { + const auto query_context = DB::CurrentThread::get().getQueryContext(); + + LOG_DEBUG(&Poco::Logger::get("Function timezoneOf"), "query context: {}, timezone: {} ({})", + reinterpret_cast(query_context.get()), + query_context->getSettingsRef().timezone.toString(), + (query_context->getSettingsRef().timezone.changed ? "changed" : "UNCHANGED")); + } + return DataTypeString().createColumnConst(input_rows_count, dynamic_cast(*type_no_nullable).getTimeZone().getTimeZone()); } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index cf1d5203bf7..e27889702c5 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -118,6 +118,8 @@ #include #endif +#include + namespace fs = std::filesystem; namespace ProfileEvents @@ -1543,6 +1545,13 @@ void Context::applySettingChange(const SettingChange & change) void Context::applySettingsChanges(const SettingsChanges & changes) { auto lock = getLock(); + LOG_DEBUG(shared->log, "Context::applySettingsChanges {} applying settings changes: {}", reinterpret_cast(this), + fmt::join(std::ranges::transform_view(changes, + [](const SettingChange & change) + { + return change.name + ": " + change.value.dump(); + }), ", ")); + for (const SettingChange & change : changes) applySettingChange(change); applySettingsQuirks(settings); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index ef4bf81a5c1..4d5402d65d5 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -345,6 +345,7 @@ void TCPHandler::runImpl() /// Send block to the client - input storage structure. state.input_header = metadata_snapshot->getSampleBlock(); sendData(state.input_header); + sendTimezone(); }); query_context->setInputBlocksReaderCallback([this] (ContextPtr context) -> Block @@ -452,9 +453,7 @@ void TCPHandler::runImpl() if (isQueryCancelled()) return true; - if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES - && client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE) - sendTimezone(); +// sendTimezone(); sendProgress(); sendSelectProfileEvents(); sendLogs(); @@ -496,9 +495,7 @@ void TCPHandler::runImpl() { std::lock_guard lock(task_callback_mutex); - if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES - && client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE) - sendTimezone(); +// sendTimezone(); sendLogs(); sendEndOfStream(); } @@ -764,7 +761,7 @@ void TCPHandler::processInsertQuery() /// Send block to the client - table structure. sendData(executor.getHeader()); - + sendTimezone(); sendLogs(); while (readDataNext()) @@ -809,6 +806,7 @@ void TCPHandler::processOrdinaryQueryWithProcessors() { std::lock_guard lock(task_callback_mutex); sendData(header); + sendTimezone(); } } @@ -1061,7 +1059,16 @@ void TCPHandler::sendInsertProfileEvents() void TCPHandler::sendTimezone() { +// if (client_tcp_protocol_version <= DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES +// || client_tcp_protocol_version <= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE) +// return; + // const String & tz = CurrentThread::get().getQueryContext()->getSettingsRef().timezone.toString(); + LOG_DEBUG(log, "TCPHandler::sendTimezone() query context: {}, timezone: {} ({})", + reinterpret_cast(query_context.get()), + query_context->getSettingsRef().timezone.toString(), + (query_context->getSettingsRef().timezone.changed ? "changed" : "UNCHANGED")); + const String & tz = query_context->getSettingsRef().timezone.toString(); if (!tz.empty()) { diff --git a/tests/queries/0_stateless/02668_timezone_setting.sql b/tests/queries/0_stateless/02668_timezone_setting.sql index f331ab58307..d85efaa8a39 100644 --- a/tests/queries/0_stateless/02668_timezone_setting.sql +++ b/tests/queries/0_stateless/02668_timezone_setting.sql @@ -1,9 +1,11 @@ +SET timezone = 'Абырвалг'; -- { serverError BAD_ARGUMENTS} + SET timezone = 'Asia/Novosibirsk'; SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich'); SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS timezone = 'Europe/Zurich'; -SET timezone = 'Europe/Zurich'; +SET timezone = 'Asia/Manila'; SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Asia/Novosibirsk'); -SET timezone = 'Абырвалг'; -select now(); -- { serverError POCO_EXCEPTION } \ No newline at end of file +SELECT timezone(), serverTimeZone(), timezoneOf(now()) SETTINGS timezone = 'Europe/Zurich'; +SELECT timezone(), serverTimeZone(), timezoneOf(now()) SETTINGS timezone = 'Pacific/Pitcairn'; From 1e7080a9aae4403ea613d401501f5b6498bf2df9 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 11 Apr 2023 17:35:47 +0000 Subject: [PATCH 0051/1997] ReadFromMergeTree: update sort description after applying prewhere info --- .../QueryPlan/ReadFromMergeTree.cpp | 66 +++++++++++-------- 1 file changed, 39 insertions(+), 27 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 291499ff412..64a3a4c74ae 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -133,6 +133,35 @@ static bool checkAllPartsOnRemoteFS(const RangesInDataParts & parts) return true; } +/// build sort description for output stream +static void updateSortDescriptionForOutputStream( + DataStream & output_stream, const Names & sorting_key_columns, const int sort_direction, InputOrderInfoPtr input_order_info) +{ + SortDescription sort_description; + const Block & header = output_stream.header; + for (const auto & column_name : sorting_key_columns) + { + if (std::find_if(header.begin(), header.end(), [&](ColumnWithTypeAndName const & col) { return col.name == column_name; }) + == header.end()) + break; + sort_description.emplace_back(column_name, sort_direction); + } + if (!sort_description.empty()) + { + if (input_order_info) + { + output_stream.sort_scope = DataStream::SortScope::Stream; + const size_t used_prefix_of_sorting_key_size = input_order_info->used_prefix_of_sorting_key_size; + if (sort_description.size() > used_prefix_of_sorting_key_size) + sort_description.resize(used_prefix_of_sorting_key_size); + } + else + output_stream.sort_scope = DataStream::SortScope::Chunk; + } + + output_stream.sort_description = std::move(sort_description); +} + void ReadFromMergeTree::AnalysisResult::checkLimits(const Settings & settings, const SelectQueryInfo & query_info_) const { @@ -244,33 +273,11 @@ ReadFromMergeTree::ReadFromMergeTree( /// Add explicit description. setStepDescription(data.getStorageID().getFullNameNotQuoted()); - { /// build sort description for output stream - SortDescription sort_description; - const Names & sorting_key_columns = storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(); - const Block & header = output_stream->header; - const int sort_direction = getSortDirection(); - for (const auto & column_name : sorting_key_columns) - { - if (std::find_if(header.begin(), header.end(), [&](ColumnWithTypeAndName const & col) { return col.name == column_name; }) - == header.end()) - break; - sort_description.emplace_back(column_name, sort_direction); - } - if (!sort_description.empty()) - { - if (query_info.getInputOrderInfo()) - { - output_stream->sort_scope = DataStream::SortScope::Stream; - const size_t used_prefix_of_sorting_key_size = query_info.getInputOrderInfo()->used_prefix_of_sorting_key_size; - if (sort_description.size() > used_prefix_of_sorting_key_size) - sort_description.resize(used_prefix_of_sorting_key_size); - } - else - output_stream->sort_scope = DataStream::SortScope::Chunk; - } - - output_stream->sort_description = std::move(sort_description); - } + updateSortDescriptionForOutputStream( + *output_stream, + storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(), + getSortDirection(), + query_info.getInputOrderInfo()); } @@ -1425,6 +1432,11 @@ void ReadFromMergeTree::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info prewhere_info_value, data.getPartitionValueType(), virt_column_names)}; + updateSortDescriptionForOutputStream( + *output_stream, + storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(), + getSortDirection(), + query_info.getInputOrderInfo()); } bool ReadFromMergeTree::requestOutputEachPartitionThroughSeparatePort() From 5d18343fb8ac1b0cae8085a660b8c995b9e33ea2 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 12 Apr 2023 00:15:07 +0200 Subject: [PATCH 0052/1997] fixed delay --- src/Client/ClientBase.cpp | 36 +++++++++++++++++------------------- src/Server/TCPHandler.cpp | 12 ++++++------ 2 files changed, 23 insertions(+), 25 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index e3e0364523a..8da4ac200d9 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -65,7 +65,7 @@ #include #include -#include +//#include #include #include #include @@ -74,7 +74,7 @@ #include "config_version.h" #include "config.h" -#include +//#include namespace fs = std::filesystem; @@ -97,18 +97,18 @@ const void* getContextPtrOrNull(ContetGetterFunc contextFunc) return nullptr; } -void LogContextes(const std::string_view scope, const ContextPtr global_context) -{ - const auto * context = global_context.get(); - std::cerr << scope << " contextes" - << "\n\tglobal: " << reinterpret_cast(context) - << "\n\tsession: " << getContextPtrOrNull([&]() { return context ? context->getSessionContext() : nullptr; }) - << "\n\tquery: " << getContextPtrOrNull([&]() { return context ? context->getQueryContext() : nullptr; }) - << "\n\tcurrent T query: " << getContextPtrOrNull([&]() { return DB::CurrentThread::get().getQueryContext(); }) - << "\n\tcurrent T global: " << getContextPtrOrNull([&]() { return DB::CurrentThread::get().getGlobalContext(); }) -// << "\n\tbuffer: " << getContextPtrOrNull(context, &Context::getBufferContext) - << std::endl; -} +//void LogContextes(const std::string_view scope, const ContextPtr global_context) +//{ +// const auto * context = global_context.get(); +// std::cerr << scope << " contextes" +// << "\n\tglobal: " << reinterpret_cast(context) +// << "\n\tsession: " << getContextPtrOrNull([&]() { return context ? context->getSessionContext() : nullptr; }) +// << "\n\tquery: " << getContextPtrOrNull([&]() { return context ? context->getQueryContext() : nullptr; }) +// << "\n\tcurrent T query: " << getContextPtrOrNull([&]() { return DB::CurrentThread::get().getQueryContext(); }) +// << "\n\tcurrent T global: " << getContextPtrOrNull([&]() { return DB::CurrentThread::get().getGlobalContext(); }) +//// << "\n\tbuffer: " << getContextPtrOrNull(context, &Context::getBufferContext) +// << std::endl; +//} } @@ -473,11 +473,11 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query) /// Also do not output too much data if we're fuzzing. if (block.rows() == 0 || (query_fuzzer_runs != 0 && processed_rows >= 100)) { - LogContextes("ClientBase::onData header", global_context); +// LogContextes("ClientBase::onData header", global_context); return; } - LogContextes("ClientBase::onData DATA block", global_context); +// LogContextes("ClientBase::onData DATA block", global_context); /// If results are written INTO OUTFILE, we can avoid clearing progress to avoid flicker. if (need_render_progress && tty_buf && (!select_into_file || select_into_file_and_stdout)) @@ -1088,14 +1088,12 @@ void ClientBase::onProgress(const Progress & value) void ClientBase::onTimezoneUpdate(const String & tz) { std::cerr << "ClientBase::onTimezoneUpdate received new TZ from server: " << tz << std::endl; - DateLUT::setDefaultTimezone(tz); Settings settings; settings.timezone = tz; global_context->applySettingsChanges(settings.changes()); -// DB::CurrentThread::get().getQueryContext()->applySettingsChanges(settings.changes()); - LogContextes("ClientBase::onTimezoneUpdate", global_context); +// LogContextes("ClientBase::onTimezoneUpdate", global_context); } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 4d5402d65d5..6ff7acf025a 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -453,7 +453,7 @@ void TCPHandler::runImpl() if (isQueryCancelled()) return true; -// sendTimezone(); + sendTimezone(); sendProgress(); sendSelectProfileEvents(); sendLogs(); @@ -495,7 +495,7 @@ void TCPHandler::runImpl() { std::lock_guard lock(task_callback_mutex); -// sendTimezone(); + sendTimezone(); sendLogs(); sendEndOfStream(); } @@ -1069,14 +1069,14 @@ void TCPHandler::sendTimezone() query_context->getSettingsRef().timezone.toString(), (query_context->getSettingsRef().timezone.changed ? "changed" : "UNCHANGED")); - const String & tz = query_context->getSettingsRef().timezone.toString(); - if (!tz.empty()) - { + const String & tz = CurrentThread::get().getQueryContext()->getSettingsRef().timezone.toString(); +// if (!tz.empty()) +// { LOG_DEBUG(log, "Sent timezone: {}", tz); writeVarUInt(Protocol::Server::TimezoneUpdate, *out); writeStringBinary(tz, *out); out->next(); - } +// } } From a9499eed794731a3fed2305e4d5f0e3607815816 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 12 Apr 2023 12:47:05 +0200 Subject: [PATCH 0053/1997] moved getting server TZ DateLUT to separate place, upd tests and fix --- programs/copier/ClusterCopierApp.cpp | 2 +- programs/keeper/Keeper.cpp | 4 +-- programs/obfuscator/Obfuscator.cpp | 2 +- programs/server/Server.cpp | 4 +-- src/Client/ClientBase.cpp | 26 +++++++++---------- src/Common/DateLUT.h | 7 +++++ src/Daemon/BaseDaemon.cpp | 2 +- src/Functions/serverConstants.cpp | 2 +- src/Loggers/OwnPatternFormatter.h | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 2 +- .../MergeTree/MergeFromLogEntryTask.cpp | 2 +- .../MergeTree/MergeTreeDataWriter.cpp | 6 ++--- .../MergeTree/MergeTreeMutationEntry.cpp | 2 +- src/Storages/MergeTree/MergeTreePartInfo.cpp | 4 +-- src/Storages/MergeTree/MergeTreePartition.cpp | 4 +-- .../MergeTree/ReplicatedMergeTreeLogEntry.cpp | 2 +- .../ReplicatedMergeTreeMutationEntry.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- src/Storages/WindowView/StorageWindowView.cpp | 2 +- .../02668_timezone_setting.reference | 3 --- .../0_stateless/02668_timezone_setting.sql | 11 -------- .../02681_timezone_setting.reference | 5 ++++ .../0_stateless/02681_timezone_setting.sql | 11 ++++++++ 23 files changed, 59 insertions(+), 50 deletions(-) delete mode 100644 tests/queries/0_stateless/02668_timezone_setting.reference delete mode 100644 tests/queries/0_stateless/02668_timezone_setting.sql create mode 100644 tests/queries/0_stateless/02681_timezone_setting.reference create mode 100644 tests/queries/0_stateless/02681_timezone_setting.sql diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp index b2994b90e23..92657f81c2a 100644 --- a/programs/copier/ClusterCopierApp.cpp +++ b/programs/copier/ClusterCopierApp.cpp @@ -43,7 +43,7 @@ void ClusterCopierApp::initialize(Poco::Util::Application & self) time_t timestamp = Poco::Timestamp().epochTime(); auto curr_pid = Poco::Process::id(); - process_id = std::to_string(DateLUT::instance().toNumYYYYMMDDhhmmss(timestamp)) + "_" + std::to_string(curr_pid); + process_id = std::to_string(DateLUT::serverTimezoneInstance().toNumYYYYMMDDhhmmss(timestamp)) + "_" + std::to_string(curr_pid); host_id = escapeForFileName(getFQDNOrHostName()) + '#' + process_id; process_path = fs::weakly_canonical(fs::path(base_dir) / ("clickhouse-copier_" + process_id)); fs::create_directories(process_path); diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index ed3297ed7cb..58a87057363 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -399,8 +399,8 @@ try /// Initialize DateLUT early, to not interfere with running time of first query. LOG_DEBUG(log, "Initializing DateLUT."); - DateLUT::instance(); - LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::instance().getTimeZone()); + DateLUT::serverTimezoneInstance(); + LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::serverTimezoneInstance().getTimeZone()); /// Don't want to use DNS cache DNSResolver::instance().setDisableCacheFlag(); diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 274ad29a174..9b7f2c424d3 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -492,7 +492,7 @@ private: const DateLUTImpl & date_lut; public: - explicit DateTimeModel(UInt64 seed_) : seed(seed_), date_lut(DateLUT::instance()) {} + explicit DateTimeModel(UInt64 seed_) : seed(seed_), date_lut(DateLUT::serverTimezoneInstance()) {} void train(const IColumn &) override {} void finalize() override {} diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 711dfb3820a..23113686aa1 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1041,8 +1041,8 @@ try /// Initialize DateLUT early, to not interfere with running time of first query. LOG_DEBUG(log, "Initializing DateLUT."); - DateLUT::instance(); - LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::instance().getTimeZone()); + DateLUT::serverTimezoneInstance(); + LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::serverTimezoneInstance().getTimeZone()); /// Storage with temporary data for processing of heavy queries. if (!server_settings.tmp_policy.value.empty()) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 8da4ac200d9..7a91a382787 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -84,18 +84,18 @@ using namespace std::literals; namespace { using namespace DB; -using ContetGetterFunc = std::function const; -const void* getContextPtrOrNull(ContetGetterFunc contextFunc) -{ - try - { - return contextFunc().get(); - } - catch(...) - { - } - return nullptr; -} +//using ContetGetterFunc = std::function const; +//const void* getContextPtrOrNull(ContetGetterFunc contextFunc) +//{ +// try +// { +// return contextFunc().get(); +// } +// catch(...) +// { +// } +// return nullptr; +//} //void LogContextes(const std::string_view scope, const ContextPtr global_context) //{ @@ -1087,7 +1087,7 @@ void ClientBase::onProgress(const Progress & value) void ClientBase::onTimezoneUpdate(const String & tz) { - std::cerr << "ClientBase::onTimezoneUpdate received new TZ from server: " << tz << std::endl; +// std::cerr << "ClientBase::onTimezoneUpdate received new TZ from server: " << tz << std::endl; Settings settings; settings.timezone = tz; diff --git a/src/Common/DateLUT.h b/src/Common/DateLUT.h index f17fe772dbc..810810edb6c 100644 --- a/src/Common/DateLUT.h +++ b/src/Common/DateLUT.h @@ -17,6 +17,13 @@ class DateLUT : private boost::noncopyable { public: + /// Return singleton DateLUTImpl instance for server's (native) time zone. + static ALWAYS_INLINE const DateLUTImpl & serverTimezoneInstance() + { + const auto & date_lut = getInstance(); + return *date_lut.default_impl.load(std::memory_order_acquire); + } + /// Return singleton DateLUTImpl instance for timezone set by `timezone` setting for current session is used. /// If it is not set, server's timezone (the one which server has) is being used. static ALWAYS_INLINE const DateLUTImpl & instance() diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 18c4c0d97a0..c6b5be3ea87 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -1003,7 +1003,7 @@ void BaseDaemon::shouldSetupWatchdog(char * argv0_) void BaseDaemon::setupWatchdog() { /// Initialize in advance to avoid double initialization in forked processes. - DateLUT::instance(); + DateLUT::serverTimezoneInstance(); std::string original_process_name; if (argv0) diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index 57a6279bd7a..5d54815818d 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -75,7 +75,7 @@ namespace public: static constexpr auto name = "serverTimezone"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionServerTimezone(ContextPtr context) : FunctionConstantBase(String{DateLUT::instance("").getTimeZone()}, context->isDistributed()) {} + explicit FunctionServerTimezone(ContextPtr context) : FunctionConstantBase(String{DateLUT::serverTimezoneInstance().getTimeZone()}, context->isDistributed()) {} }; diff --git a/src/Loggers/OwnPatternFormatter.h b/src/Loggers/OwnPatternFormatter.h index 07d0409b0ae..8b0d11bcec1 100644 --- a/src/Loggers/OwnPatternFormatter.h +++ b/src/Loggers/OwnPatternFormatter.h @@ -31,6 +31,6 @@ public: virtual void formatExtended(const DB::ExtendedLogMessage & msg_ext, std::string & text) const; private: - const DateLUTImpl & server_timezone = DateLUT::instance(""); + const DateLUTImpl & server_timezone = DateLUT::serverTimezoneInstance(); bool color; }; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 85420cabb8d..f50a7169d39 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1030,7 +1030,7 @@ void IMergeTreeDataPart::loadPartitionAndMinMaxIndex() DayNum max_date; MergeTreePartInfo::parseMinMaxDatesFromPartName(name, min_date, max_date); - const auto & date_lut = DateLUT::instance(); + const auto & date_lut = DateLUT::serverTimezoneInstance(); partition = MergeTreePartition(date_lut.toNumYYYYMM(min_date)); minmax_idx = std::make_shared(min_date, max_date); } diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index e017c9681e8..28e30b5f64f 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -57,7 +57,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() { LOG_INFO(log, "Will try to fetch part {} until '{}' because this part assigned to recompression merge. " "Source replica {} will try to merge this part first", entry.new_part_name, - DateLUT::instance().timeToString(entry.create_time + storage_settings_ptr->try_fetch_recompressed_part_timeout.totalSeconds()), entry.source_replica); + DateLUT::serverTimezoneInstance().timeToString(entry.create_time + storage_settings_ptr->try_fetch_recompressed_part_timeout.totalSeconds()), entry.source_replica); /// Waiting other replica to recompress part. No need to check it. return PrepareResult{ .prepared_successfully = false, diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 37cfe4d065e..48a1cb97c89 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -115,7 +115,7 @@ void updateTTL( if (const ColumnUInt16 * column_date = typeid_cast(ttl_column.get())) { - const auto & date_lut = DateLUT::instance(); + const auto & date_lut = DateLUT::serverTimezoneInstance(); for (const auto & val : column_date->getData()) ttl_info.update(date_lut.fromDayNum(DayNum(val))); } @@ -128,7 +128,7 @@ void updateTTL( { if (typeid_cast(&column_const->getDataColumn())) { - const auto & date_lut = DateLUT::instance(); + const auto & date_lut = DateLUT::serverTimezoneInstance(); ttl_info.update(date_lut.fromDayNum(DayNum(column_const->getValue()))); } else if (typeid_cast(&column_const->getDataColumn())) @@ -369,7 +369,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( DayNum min_date(minmax_idx->hyperrectangle[data.minmax_idx_date_column_pos].left.get()); DayNum max_date(minmax_idx->hyperrectangle[data.minmax_idx_date_column_pos].right.get()); - const auto & date_lut = DateLUT::instance(); + const auto & date_lut = DateLUT::serverTimezoneInstance(); auto min_month = date_lut.toNumYYYYMM(min_date); auto max_month = date_lut.toNumYYYYMM(max_date); diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp index 2e30a3f3986..2c0359b0f3f 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp @@ -127,7 +127,7 @@ MergeTreeMutationEntry::MergeTreeMutationEntry(DiskPtr disk_, const String & pat LocalDateTime create_time_dt; *buf >> "create time: " >> create_time_dt >> "\n"; - create_time = DateLUT::instance().makeDateTime( + create_time = DateLUT::serverTimezoneInstance().makeDateTime( create_time_dt.year(), create_time_dt.month(), create_time_dt.day(), create_time_dt.hour(), create_time_dt.minute(), create_time_dt.second()); diff --git a/src/Storages/MergeTree/MergeTreePartInfo.cpp b/src/Storages/MergeTree/MergeTreePartInfo.cpp index 84432a293d7..e1b52d8a7b7 100644 --- a/src/Storages/MergeTree/MergeTreePartInfo.cpp +++ b/src/Storages/MergeTree/MergeTreePartInfo.cpp @@ -148,7 +148,7 @@ void MergeTreePartInfo::parseMinMaxDatesFromPartName(const String & part_name, D throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Unexpected part name: {}", part_name); } - const auto & date_lut = DateLUT::instance(); + const auto & date_lut = DateLUT::serverTimezoneInstance(); min_date = date_lut.YYYYMMDDToDayNum(min_yyyymmdd); max_date = date_lut.YYYYMMDDToDayNum(max_yyyymmdd); @@ -219,7 +219,7 @@ String MergeTreePartInfo::getPartNameV1() const String MergeTreePartInfo::getPartNameV0(DayNum left_date, DayNum right_date) const { - const auto & date_lut = DateLUT::instance(); + const auto & date_lut = DateLUT::serverTimezoneInstance(); /// Directory name for the part has form: `YYYYMMDD_YYYYMMDD_N_N_L`. diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index 3b28012e7d6..b0fc34ac2f7 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -239,7 +239,7 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const result += '-'; if (typeid_cast(partition_key_sample.getByPosition(i).type.get())) - result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum(value[i].safeGet()))); + result += toString(DateLUT::serverTimezoneInstance().toNumYYYYMMDD(DayNum(value[i].safeGet()))); else if (typeid_cast(partition_key_sample.getByPosition(i).type.get())) result += toString(value[i].get().toUnderType()); else @@ -320,7 +320,7 @@ std::optional MergeTreePartition::tryParseValueFromID(const String & partit throw Exception( ErrorCodes::INVALID_PARTITION_VALUE, "Cannot parse partition_id: got unexpected Date: {}", date_yyyymmdd); - UInt32 date = DateLUT::instance().YYYYMMDDToDayNum(date_yyyymmdd); + UInt32 date = DateLUT::serverTimezoneInstance().YYYYMMDDToDayNum(date_yyyymmdd); res.emplace_back(date); break; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index 79b0beb0933..ac956433eab 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -199,7 +199,7 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in, MergeTreeDataFor { LocalDateTime create_time_dt; in >> "create_time: " >> create_time_dt >> "\n"; - create_time = DateLUT::instance().makeDateTime( + create_time = DateLUT::serverTimezoneInstance().makeDateTime( create_time_dt.year(), create_time_dt.month(), create_time_dt.day(), create_time_dt.hour(), create_time_dt.minute(), create_time_dt.second()); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp index 1efb3f6826b..17f3637e722 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp @@ -38,7 +38,7 @@ void ReplicatedMergeTreeMutationEntry::readText(ReadBuffer & in) LocalDateTime create_time_dt; in >> "create time: " >> create_time_dt >> "\n"; - create_time = DateLUT::instance().makeDateTime( + create_time = DateLUT::serverTimezoneInstance().makeDateTime( create_time_dt.year(), create_time_dt.month(), create_time_dt.day(), create_time_dt.hour(), create_time_dt.minute(), create_time_dt.second()); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index fe4a144deaa..356663496a6 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5318,7 +5318,7 @@ String getPartNamePossiblyFake(MergeTreeDataFormatVersion format_version, const if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { /// The date range is all month long. - const auto & lut = DateLUT::instance(); + const auto & lut = DateLUT::serverTimezoneInstance(); time_t start_time = lut.YYYYMMDDToDate(parse(part_info.partition_id + "01")); DayNum left_date = DayNum(lut.toDayNum(start_time).toUnderType()); DayNum right_date = DayNum(static_cast(left_date) + lut.daysInMonth(start_time) - 1); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 3471e4ea6bf..8546fdd3c9f 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1335,7 +1335,7 @@ ASTPtr StorageWindowView::innerQueryParser(const ASTSelectQuery & query) time_zone = &DateLUT::instance(window_view_timezone); } else - time_zone = &DateLUT::instance(); + time_zone = &DateLUT::serverTimezoneInstance(); return result; } diff --git a/tests/queries/0_stateless/02668_timezone_setting.reference b/tests/queries/0_stateless/02668_timezone_setting.reference deleted file mode 100644 index 8ed8024f652..00000000000 --- a/tests/queries/0_stateless/02668_timezone_setting.reference +++ /dev/null @@ -1,3 +0,0 @@ -1999-12-12 18:23:23.123 -1999-12-12 23:23:23.123 -1999-12-13 04:23:23.123 diff --git a/tests/queries/0_stateless/02668_timezone_setting.sql b/tests/queries/0_stateless/02668_timezone_setting.sql deleted file mode 100644 index d85efaa8a39..00000000000 --- a/tests/queries/0_stateless/02668_timezone_setting.sql +++ /dev/null @@ -1,11 +0,0 @@ -SET timezone = 'Абырвалг'; -- { serverError BAD_ARGUMENTS} - -SET timezone = 'Asia/Novosibirsk'; -SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich'); -SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS timezone = 'Europe/Zurich'; - -SET timezone = 'Asia/Manila'; -SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Asia/Novosibirsk'); - -SELECT timezone(), serverTimeZone(), timezoneOf(now()) SETTINGS timezone = 'Europe/Zurich'; -SELECT timezone(), serverTimeZone(), timezoneOf(now()) SETTINGS timezone = 'Pacific/Pitcairn'; diff --git a/tests/queries/0_stateless/02681_timezone_setting.reference b/tests/queries/0_stateless/02681_timezone_setting.reference new file mode 100644 index 00000000000..8850d77ab03 --- /dev/null +++ b/tests/queries/0_stateless/02681_timezone_setting.reference @@ -0,0 +1,5 @@ +2022-12-12 17:23:23.123 +2022-12-12 23:23:23.123 +2022-12-12 22:23:23.123 +Europe/Zurich Europe/Zurich +Pacific/Pitcairn Pacific/Pitcairn diff --git a/tests/queries/0_stateless/02681_timezone_setting.sql b/tests/queries/0_stateless/02681_timezone_setting.sql new file mode 100644 index 00000000000..73afb4c029b --- /dev/null +++ b/tests/queries/0_stateless/02681_timezone_setting.sql @@ -0,0 +1,11 @@ +SET timezone = 'Абырвалг'; -- { serverError BAD_ARGUMENTS} + +SET timezone = 'Asia/Novosibirsk'; +SELECT toDateTime64(toDateTime64('2022-12-12 23:23:23.123', 3), 3, 'Europe/Zurich'); +SELECT toDateTime64(toDateTime64('2022-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS timezone = 'Europe/Zurich'; + +SET timezone = 'Asia/Manila'; +SELECT toDateTime64(toDateTime64('2022-12-12 23:23:23.123', 3), 3, 'Asia/Novosibirsk'); + +SELECT timezone(), timezoneOf(now()) SETTINGS timezone = 'Europe/Zurich' FORMAT TSV; +SELECT timezone(), timezoneOf(now()) SETTINGS timezone = 'Pacific/Pitcairn' FORMAT TSV; From daef5d818a5cdc6e358efaa49e56d90ee530e6bf Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 12 Apr 2023 15:31:58 +0200 Subject: [PATCH 0054/1997] fix according to updates in ThreadStatus.h --- programs/client/Client.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index aa563198c82..528c504e555 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -325,9 +325,8 @@ try { // All that just to set DB::CurrentThread::get().getGlobalContext() // which is required for client timezone (pushed as from server) to work. - auto thread_group = std::make_shared(); - thread_group->global_context = global_context; - thread_status.attachQuery(thread_group, false); + auto thread_group = std::make_shared(); + thread_status.attachToGroup(thread_group, false); } /// Includes delayed_interactive. From 3f8956f854253a5b17c6fa4163372f7e0f6cf664 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 12 Apr 2023 17:45:11 +0200 Subject: [PATCH 0055/1997] remove additional logging --- src/Client/ClientBase.cpp | 30 ------------------------------ src/Functions/timezoneOf.cpp | 14 -------------- src/Interpreters/Context.cpp | 8 -------- src/Server/TCPHandler.cpp | 24 +++++++----------------- 4 files changed, 7 insertions(+), 69 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index d722d39e8f6..f4253ab90f6 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -66,7 +66,6 @@ #include #include -//#include #include #include #include @@ -75,9 +74,6 @@ #include "config_version.h" #include "config.h" -//#include - - namespace fs = std::filesystem; using namespace std::literals; @@ -85,32 +81,6 @@ using namespace std::literals; namespace { using namespace DB; -//using ContetGetterFunc = std::function const; -//const void* getContextPtrOrNull(ContetGetterFunc contextFunc) -//{ -// try -// { -// return contextFunc().get(); -// } -// catch(...) -// { -// } -// return nullptr; -//} - -//void LogContextes(const std::string_view scope, const ContextPtr global_context) -//{ -// const auto * context = global_context.get(); -// std::cerr << scope << " contextes" -// << "\n\tglobal: " << reinterpret_cast(context) -// << "\n\tsession: " << getContextPtrOrNull([&]() { return context ? context->getSessionContext() : nullptr; }) -// << "\n\tquery: " << getContextPtrOrNull([&]() { return context ? context->getQueryContext() : nullptr; }) -// << "\n\tcurrent T query: " << getContextPtrOrNull([&]() { return DB::CurrentThread::get().getQueryContext(); }) -// << "\n\tcurrent T global: " << getContextPtrOrNull([&]() { return DB::CurrentThread::get().getGlobalContext(); }) -//// << "\n\tbuffer: " << getContextPtrOrNull(context, &Context::getBufferContext) -// << std::endl; -//} - } namespace CurrentMetrics diff --git a/src/Functions/timezoneOf.cpp b/src/Functions/timezoneOf.cpp index ce419b7b4cd..7a5957a5dbc 100644 --- a/src/Functions/timezoneOf.cpp +++ b/src/Functions/timezoneOf.cpp @@ -5,11 +5,6 @@ #include #include #include -#include "Poco/Logger.h" - -#include -#include -#include namespace DB { @@ -56,15 +51,6 @@ public: { DataTypePtr type_no_nullable = removeNullable(arguments[0].type); - { - const auto query_context = DB::CurrentThread::get().getQueryContext(); - - LOG_DEBUG(&Poco::Logger::get("Function timezoneOf"), "query context: {}, timezone: {} ({})", - reinterpret_cast(query_context.get()), - query_context->getSettingsRef().timezone.toString(), - (query_context->getSettingsRef().timezone.changed ? "changed" : "UNCHANGED")); - } - return DataTypeString().createColumnConst(input_rows_count, dynamic_cast(*type_no_nullable).getTimeZone().getTimeZone()); } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 9e0b1dfd032..e888902ae29 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -119,8 +119,6 @@ #include #endif -#include - namespace fs = std::filesystem; namespace ProfileEvents @@ -1683,12 +1681,6 @@ void Context::applySettingChange(const SettingChange & change) void Context::applySettingsChanges(const SettingsChanges & changes) { auto lock = getLock(); - LOG_DEBUG(shared->log, "Context::applySettingsChanges {} applying settings changes: {}", reinterpret_cast(this), - fmt::join(std::ranges::transform_view(changes, - [](const SettingChange & change) - { - return change.name + ": " + change.value.dump(); - }), ", ")); for (const SettingChange & change : changes) applySettingChange(change); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index a48097a649f..9a1b64eaf89 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -767,7 +767,6 @@ void TCPHandler::processInsertQuery() /// Send block to the client - table structure. sendData(executor.getHeader()); - sendTimezone(); sendLogs(); while (readDataNext()) @@ -1070,24 +1069,15 @@ void TCPHandler::sendInsertProfileEvents() void TCPHandler::sendTimezone() { -// if (client_tcp_protocol_version <= DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES -// || client_tcp_protocol_version <= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE) -// return; + if (client_tcp_protocol_version < DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES) + return; -// const String & tz = CurrentThread::get().getQueryContext()->getSettingsRef().timezone.toString(); - LOG_DEBUG(log, "TCPHandler::sendTimezone() query context: {}, timezone: {} ({})", - reinterpret_cast(query_context.get()), - query_context->getSettingsRef().timezone.toString(), - (query_context->getSettingsRef().timezone.changed ? "changed" : "UNCHANGED")); + const String & tz = query_context->getSettingsRef().timezone.toString(); - const String & tz = CurrentThread::get().getQueryContext()->getSettingsRef().timezone.toString(); -// if (!tz.empty()) -// { - LOG_DEBUG(log, "Sent timezone: {}", tz); - writeVarUInt(Protocol::Server::TimezoneUpdate, *out); - writeStringBinary(tz, *out); - out->next(); -// } + LOG_DEBUG(log, "TCPHandler::sendTimezone(): {}", tz); + writeVarUInt(Protocol::Server::TimezoneUpdate, *out); + writeStringBinary(tz, *out); + out->next(); } From 73675cd8d29ffd3c5e1b1a57a023ee1ac946ef8c Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 12 Apr 2023 19:17:48 +0200 Subject: [PATCH 0056/1997] tryfix fasttest --- src/Server/TCPHandler.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 9a1b64eaf89..152c7aba56e 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -458,7 +458,6 @@ void TCPHandler::runImpl() if (getQueryCancellationStatus() == CancellationStatus::FULLY_CANCELLED) return true; - sendTimezone(); sendProgress(); sendSelectProfileEvents(); sendLogs(); @@ -811,7 +810,6 @@ void TCPHandler::processOrdinaryQueryWithProcessors() { std::lock_guard lock(task_callback_mutex); sendData(header); - sendTimezone(); } } From 981a73cd867c435c74adf06cece8ec279fb8fde8 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 12 Apr 2023 21:20:12 +0200 Subject: [PATCH 0057/1997] upd remotequeryexecutor on receive timezone --- src/QueryPipeline/RemoteQueryExecutor.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index b7490a2ad9c..23c1412dc76 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -449,6 +449,9 @@ RemoteQueryExecutor::ReadResult RemoteQueryExecutor::processPacket(Packet packet throw Exception(ErrorCodes::SYSTEM_ERROR, "Could not push into profile queue"); break; + case Protocol::Server::TimezoneUpdate: + break; + default: got_unknown_packet_from_replica = true; throw Exception( @@ -546,6 +549,9 @@ void RemoteQueryExecutor::finish(std::unique_ptr * read_context) if (!profile_queue->emplace(std::move(packet.block))) throw Exception(ErrorCodes::SYSTEM_ERROR, "Could not push into profile queue"); break; + + case Protocol::Server::TimezoneUpdate: + break; default: got_unknown_packet_from_replica = true; From 0c616ac0a287ce017c18561013b87e576ac8e74b Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 12 Apr 2023 21:31:13 +0200 Subject: [PATCH 0058/1997] fix style --- src/QueryPipeline/RemoteQueryExecutor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index 23c1412dc76..56b5357c522 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -549,7 +549,7 @@ void RemoteQueryExecutor::finish(std::unique_ptr * read_context) if (!profile_queue->emplace(std::move(packet.block))) throw Exception(ErrorCodes::SYSTEM_ERROR, "Could not push into profile queue"); break; - + case Protocol::Server::TimezoneUpdate: break; From 1ec32d374d8872aedd2f13bfdbbb263d98feed17 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 12 Apr 2023 22:27:54 +0200 Subject: [PATCH 0059/1997] update Timezone packet processing --- src/Client/HedgedConnections.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index 13e4fe75b3d..d11954f3838 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -418,6 +418,7 @@ Packet HedgedConnections::receivePacketFromReplica(const ReplicaLocation & repli } replica_with_last_received_packet = replica_location; break; + case Protocol::Server::TimezoneUpdate: case Protocol::Server::PartUUIDs: case Protocol::Server::ProfileInfo: case Protocol::Server::Totals: From 16292eb5a18d1f410421217461a7b3e44b39dbec Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 12 Apr 2023 22:54:51 +0200 Subject: [PATCH 0060/1997] update timezone packet handling in remote inserter --- src/QueryPipeline/RemoteInserter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/QueryPipeline/RemoteInserter.cpp b/src/QueryPipeline/RemoteInserter.cpp index b8a878b56c3..134c169e35f 100644 --- a/src/QueryPipeline/RemoteInserter.cpp +++ b/src/QueryPipeline/RemoteInserter.cpp @@ -130,7 +130,7 @@ void RemoteInserter::onFinish() break; else if (Protocol::Server::Exception == packet.type) packet.exception->rethrow(); - else if (Protocol::Server::Log == packet.type) + else if (Protocol::Server::Log == packet.type || Protocol::Server::TimezoneUpdate == packet.type) { // Do nothing } From bac5fbc3d2cf1b9606d66543244036797221a4b3 Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 13 Apr 2023 13:26:09 +0200 Subject: [PATCH 0061/1997] fix error on connection drop after 1st query --- src/Client/ClientBase.cpp | 4 ---- src/Client/Connection.cpp | 4 ++++ src/Server/TCPHandler.cpp | 4 +++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index f4253ab90f6..2e82144e64d 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1074,13 +1074,9 @@ void ClientBase::onProgress(const Progress & value) void ClientBase::onTimezoneUpdate(const String & tz) { -// std::cerr << "ClientBase::onTimezoneUpdate received new TZ from server: " << tz << std::endl; - Settings settings; settings.timezone = tz; global_context->applySettingsChanges(settings.changes()); - -// LogContextes("ClientBase::onTimezoneUpdate", global_context); } diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index a0025eafd64..08549265848 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -335,6 +335,10 @@ void Connection::receiveHello() nonce.emplace(read_nonce); } } + else if (packet_type == Protocol::Server::TimezoneUpdate) + { + // skip this packet at hello, will receive and process it later + } else if (packet_type == Protocol::Server::Exception) receiveException()->rethrow(); else diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 152c7aba56e..a875507d227 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -183,8 +183,11 @@ void TCPHandler::runImpl() /// User will be authenticated here. It will also set settings from user profile into connection_context. try { + LOG_DEBUG(log, "Before receiveHello"); receiveHello(); + LOG_DEBUG(log, "Before sendHello"); sendHello(); + LOG_DEBUG(log, "Before receiveAddendum"); if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM) receiveAddendum(); @@ -499,7 +502,6 @@ void TCPHandler::runImpl() { std::lock_guard lock(task_callback_mutex); - sendTimezone(); sendLogs(); sendEndOfStream(); } From d5ea52e4b9b4005356f79e3eaadd4d6458fa116e Mon Sep 17 00:00:00 2001 From: zvonand Date: Fri, 14 Apr 2023 01:28:59 +0200 Subject: [PATCH 0062/1997] optimize --- src/Common/DateLUT.h | 11 +++++------ src/Core/SettingsFields.h | 6 +++--- src/DataTypes/TimezoneMixin.h | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/Common/DateLUT.h b/src/Common/DateLUT.h index 810810edb6c..59b280240ea 100644 --- a/src/Common/DateLUT.h +++ b/src/Common/DateLUT.h @@ -28,11 +28,11 @@ public: /// If it is not set, server's timezone (the one which server has) is being used. static ALWAYS_INLINE const DateLUTImpl & instance() { - std::string effective_time_zone; const auto & date_lut = getInstance(); if (DB::CurrentThread::isInitialized()) { + std::string effective_time_zone; const auto query_context = DB::CurrentThread::get().getQueryContext(); if (query_context) @@ -43,6 +43,8 @@ public: return date_lut.getImplementation(effective_time_zone); } + /// Timezone is passed in query_context, but on CH-Client we have no query context, + /// and each time we modify client's global context const auto global_context = DB::CurrentThread::get().getGlobalContext(); if (global_context) { @@ -56,15 +58,12 @@ public: return *date_lut.default_impl.load(std::memory_order_acquire); } - /// Return singleton DateLUTImpl instance for a given time zone. If timezone is an empty string, - /// server's timezone is used. The `timezone` setting is not considered here. static ALWAYS_INLINE const DateLUTImpl & instance(const std::string & time_zone) { - const auto & date_lut = getInstance(); - if (time_zone.empty()) - return *date_lut.default_impl.load(std::memory_order_acquire); + return instance(); + const auto & date_lut = getInstance(); return date_lut.getImplementation(time_zone); } diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index 8e9ffe03008..0ee3ddd4862 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -576,10 +576,10 @@ struct SettingFieldTimezone private: cctz::time_zone validated_tz; - void validateTimezone(const std::string & str) + void validateTimezone(const std::string & tz_str) { - if (str != "" && !cctz::load_time_zone(str, &validated_tz)) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid time zone: {}", str); + if (!tz_str.empty() && !cctz::load_time_zone(tz_str, &validated_tz)) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid time zone: {}", tz_str); } }; diff --git a/src/DataTypes/TimezoneMixin.h b/src/DataTypes/TimezoneMixin.h index 5b7870c7b9a..03ecde5dd0a 100644 --- a/src/DataTypes/TimezoneMixin.h +++ b/src/DataTypes/TimezoneMixin.h @@ -15,7 +15,7 @@ public: explicit TimezoneMixin(const String & time_zone_name = "") : has_explicit_time_zone(!time_zone_name.empty()) - , time_zone(time_zone_name.empty() ? DateLUT::instance() : DateLUT::instance(time_zone_name)) + , time_zone(DateLUT::instance(time_zone_name)) , utc_time_zone(DateLUT::instance("UTC")) { } From 267bbcab007d02748af2b2b18c63de73c4fa327b Mon Sep 17 00:00:00 2001 From: Aleksei Golub Date: Fri, 14 Apr 2023 00:09:57 +0300 Subject: [PATCH 0063/1997] Added ability to implicitly use file table function in clickhouse-local --- programs/local/LocalServer.cpp | 3 +- src/Databases/DatabaseFactory.cpp | 21 +- src/Databases/DatabaseFileSystem.cpp | 132 +++++++++ src/Databases/DatabaseFileSystem.h | 51 ++++ src/Databases/DatabasesOverlay.cpp | 267 ++++++++++++++++++ src/Databases/DatabasesOverlay.h | 68 +++++ ...cal_implicit_file_table_function.reference | 9 + ...ouse_local_implicit_file_table_function.sh | 43 +++ 8 files changed, 591 insertions(+), 3 deletions(-) create mode 100644 src/Databases/DatabaseFileSystem.cpp create mode 100644 src/Databases/DatabaseFileSystem.h create mode 100644 src/Databases/DatabasesOverlay.cpp create mode 100644 src/Databases/DatabasesOverlay.h create mode 100644 tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.reference create mode 100755 tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 5768e744f94..566d11791ca 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -647,7 +648,7 @@ void LocalServer::processConfig() * if such tables will not be dropped, clickhouse-server will not be able to load them due to security reasons. */ std::string default_database = config().getString("default_database", "_local"); - DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared(default_database, global_context)); + DatabaseCatalog::instance().attachDatabase(default_database, CreateClickHouseLocalDatabaseOverlay(default_database, global_context)); global_context->setCurrentDatabase(default_database); applyCmdOptions(global_context); diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 89a799349bf..b023bb06ad1 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -132,13 +133,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String static const std::unordered_set database_engines{"Ordinary", "Atomic", "Memory", "Dictionary", "Lazy", "Replicated", "MySQL", "MaterializeMySQL", "MaterializedMySQL", - "PostgreSQL", "MaterializedPostgreSQL", "SQLite"}; + "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "FileSystem"}; if (!database_engines.contains(engine_name)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine name `{}` does not exist", engine_name); static const std::unordered_set engines_with_arguments{"MySQL", "MaterializeMySQL", "MaterializedMySQL", - "Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite"}; + "Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "FileSystem"}; static const std::unordered_set engines_with_table_overrides{"MaterializeMySQL", "MaterializedMySQL", "MaterializedPostgreSQL"}; bool engine_may_have_arguments = engines_with_arguments.contains(engine_name); @@ -432,6 +433,22 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String return std::make_shared(context, engine_define, create.attach, database_path); } #endif + else if (engine_name == "FileSystem") { + const ASTFunction * engine = engine_define->engine; + + // If init_path is empty, then the current path from Poco will be used + std::string init_path; + + if (engine->arguments && engine->arguments->children.size() > 0) { + if (engine->arguments->children.size() != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "FileSystem database requires at most 1 argument: file_system_path"); + + const auto & arguments = engine->arguments->children; + init_path = safeGetLiteralValue(arguments[0], engine_name); + } + + return std::make_shared(database_name, init_path, context); + } throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Unknown database engine: {}", engine_name); } diff --git a/src/Databases/DatabaseFileSystem.cpp b/src/Databases/DatabaseFileSystem.cpp new file mode 100644 index 00000000000..9e2273970c3 --- /dev/null +++ b/src/Databases/DatabaseFileSystem.cpp @@ -0,0 +1,132 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +DatabaseFileSystem::DatabaseFileSystem(const String & name_, const String & path_, ContextPtr context_) + : IDatabase(name_), WithContext(context_->getGlobalContext()), path(path_), log(&Poco::Logger::get("DatabaseFileSystem(" + name_ + ")")) +{ + if (path.empty()) + path = Poco::Path::current(); +} + +std::string DatabaseFileSystem::getTablePath(const std::string& table_name) const +{ + return Poco::Path(path, table_name).toString(); +} + +void DatabaseFileSystem::addTable(const std::string& table_name, StoragePtr table_storage) const +{ + std::lock_guard lock(mutex); + loaded_tables.emplace(table_name, table_storage); +} + +bool DatabaseFileSystem::isTableExist(const String & name, ContextPtr) const +{ + { + std::lock_guard lock(mutex); + if (loaded_tables.find(name) != loaded_tables.end()) + return true; + } + + Poco::File table_file(getTablePath(name)); + return table_file.exists() && table_file.isFile(); +} + +StoragePtr DatabaseFileSystem::tryGetTable(const String & name, ContextPtr context_) const +{ + // Check if the table exists in the loaded tables map + { + std::lock_guard lock(mutex); + auto it = loaded_tables.find(name); + if (it != loaded_tables.end()) + return it->second; + } + + auto table_path = getTablePath(name); + + // If the table doesn't exist in the tables map, check if the corresponding file exists + Poco::File table_file(table_path); + if (!table_file.exists()) + return nullptr; + + // If the file exists, create a new table using TableFunctionFile and return it. + auto args = makeASTFunction("file", std::make_shared(table_path)); + + auto table_function = TableFunctionFactory::instance().get(args, context_); + if (!table_function) + return nullptr; + + auto table_storage = table_function->execute(args, context_, name); + if (table_storage) + addTable(name, table_storage); + + return table_storage; +} + +ASTPtr DatabaseFileSystem::getCreateDatabaseQuery() const +{ + auto settings = getContext()->getSettingsRef(); + ParserCreateQuery parser; + + String query = "CREATE DATABASE " + backQuoteIfNeed(getDatabaseName()) + " ENGINE = FileSystem(" + backQuoteIfNeed(path) + ")"; + ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth); + + if (const auto database_comment = getDatabaseComment(); !database_comment.empty()) + { + auto & ast_create_query = ast->as(); + ast_create_query.set(ast_create_query.comment, std::make_shared(database_comment)); + } + + return ast; +} + +void DatabaseFileSystem::shutdown() +{ + Tables tables_snapshot; + { + std::lock_guard lock(mutex); + tables_snapshot = loaded_tables; + } + + for (const auto & kv : tables_snapshot) + { + auto table_id = kv.second->getStorageID(); + kv.second->flushAndShutdown(); + } + + std::lock_guard lock(mutex); + loaded_tables.clear(); +} + +/** + * Returns an empty vector because the database is read-only and no tables can be backed up. + */ +std::vector> DatabaseFileSystem::getTablesForBackup(const FilterByNameFunction&, const ContextPtr&) const { + return {}; +} + +/** + * + * Returns an empty iterator because the database does not have its own tables + * But only caches them for quick access. + */ +DatabaseTablesIteratorPtr DatabaseFileSystem::getTablesIterator(ContextPtr, const FilterByNameFunction&) const { + return std::make_unique(Tables{}, getDatabaseName()); +} + +} // DB diff --git a/src/Databases/DatabaseFileSystem.h b/src/Databases/DatabaseFileSystem.h new file mode 100644 index 00000000000..474a7e78335 --- /dev/null +++ b/src/Databases/DatabaseFileSystem.h @@ -0,0 +1,51 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +class Context; + +/** + * DatabaseFileSystem allows to interact with files stored on the file system + * Uses TableFunctionFile to implicitly load file when a user requests the table, and provides read-only access to the data in the file + * Tables are cached inside the database for quick access + * + * Used in clickhouse-local to access local files + */ +class DatabaseFileSystem : public IDatabase, protected WithContext +{ +public: + DatabaseFileSystem(const String & name, const String & path, ContextPtr context); + + String getEngineName() const override { return "FileSystem"; } + + bool isTableExist(const String & name, ContextPtr context) const override; + + StoragePtr tryGetTable(const String & name, ContextPtr context) const override; + + bool empty() const override { return true; } + + ASTPtr getCreateDatabaseQuery() const override; + + void shutdown() override; + + std::vector> getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override; + +protected: + std::string getTablePath(const std::string & table_name) const; + void addTable(const std::string & table_name, StoragePtr table_storage) const; + +private: + String path; + mutable Tables loaded_tables TSA_GUARDED_BY(mutex); + Poco::Logger * log; +}; + +} // DB diff --git a/src/Databases/DatabasesOverlay.cpp b/src/Databases/DatabasesOverlay.cpp new file mode 100644 index 00000000000..9c3d802df73 --- /dev/null +++ b/src/Databases/DatabasesOverlay.cpp @@ -0,0 +1,267 @@ +#include + +#include +#include +#include + +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; + extern const int CANNOT_GET_CREATE_TABLE_QUERY; +} + +DatabasesOverlay::DatabasesOverlay(const String & name_, ContextPtr context_) + : IDatabase(name_), WithContext(context_->getGlobalContext()), log(&Poco::Logger::get("DatabaseOverlay(" + name_ + ")")) +{ +} + +DatabasesOverlay & DatabasesOverlay::registerNextDatabase(DatabasePtr database) +{ + databases.push_back(std::move(database)); + return *this; +} + +bool DatabasesOverlay::isTableExist(const String & table_name, ContextPtr context_) const +{ + for (const auto & db : databases) + { + if (db->isTableExist(table_name, context_)) + return true; + } + return false; +} + +StoragePtr DatabasesOverlay::tryGetTable(const String & table_name, ContextPtr context_) const +{ + StoragePtr result = nullptr; + for (const auto & db : databases) + { + result = db->tryGetTable(table_name, context_); + if (result) + break; + } + return result; +} + +void DatabasesOverlay::createTable(ContextPtr context_, const String & table_name, const StoragePtr & table, const ASTPtr & query) +{ + for (auto & db : databases) + { + try + { + db->createTable(context_, table_name, table, query); + return; + } + catch (...) + { + continue; + } + } + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no databases for CREATE TABLE {} query in Database{}", table_name, getEngineName()); +} + +void DatabasesOverlay::dropTable(ContextPtr context_, const String & table_name, bool sync) +{ + for (auto & db : databases) + { + try + { + db->dropTable(context_, table_name, sync); + return; + } + catch (...) + { + continue; + } + } + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no databases for DROP TABLE {} query in Database{}", table_name, getEngineName()); +} + +void DatabasesOverlay::attachTable( + ContextPtr context_, const String & table_name, const StoragePtr & table, const String & relative_table_path) +{ + for (auto & db : databases) + { + try + { + db->attachTable(context_, table_name, table, relative_table_path); + return; + } + catch (...) + { + continue; + } + } + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no databases for ATTACH TABLE query in Database{}", getEngineName()); +} + +StoragePtr DatabasesOverlay::detachTable(ContextPtr context_, const String & table_name) +{ + StoragePtr result = nullptr; + for (auto & db : databases) + { + try + { + result = db->detachTable(context_, table_name); + if (result) + return result; + } + catch (...) + { + continue; + } + } + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no databases for DETACH TABLE {} query in Database{}", table_name, getEngineName()); +} + +ASTPtr DatabasesOverlay::getCreateTableQueryImpl(const String & name, ContextPtr context_, bool throw_on_error) const +{ + ASTPtr result = nullptr; + for (const auto & db : databases) + { + result = db->tryGetCreateTableQuery(name, context_); + if (result) + break; + } + if (!result && throw_on_error) + throw Exception(ErrorCodes::CANNOT_GET_CREATE_TABLE_QUERY, "There is no metadata of table {} in Database{}", name, getEngineName()); + return result; +} + +/* + * DatabaseOverlay cannot be constructed by "CREATE DATABASE" query, as it is not a traditional ClickHouse database + * To use DatabaseOverlay, it must be constructed programmatically in code + */ +ASTPtr DatabasesOverlay::getCreateDatabaseQuery() const +{ + return std::make_shared(); +} + +String DatabasesOverlay::getTableDataPath(const String & table_name) const +{ + String result; + for (const auto & db : databases) + { + result = db->getTableDataPath(table_name); + if (!result.empty()) + break; + } + return result; +} + +String DatabasesOverlay::getTableDataPath(const ASTCreateQuery & query) const +{ + String result; + for (const auto & db : databases) + { + result = db->getTableDataPath(query); + if (!result.empty()) + break; + } + return result; +} + +UUID DatabasesOverlay::tryGetTableUUID(const String & table_name) const +{ + UUID result = UUIDHelpers::Nil; + for (const auto & db : databases) + { + result = db->tryGetTableUUID(table_name); + if (result != UUIDHelpers::Nil) + break; + } + return result; +} + +void DatabasesOverlay::drop(ContextPtr context_) +{ + for (auto & db : databases) + db->drop(context_); +} + +void DatabasesOverlay::alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata) +{ + for (auto & db : databases) + { + try + { + db->alterTable(local_context, table_id, metadata); + return; + } + catch (...) + { + continue; + } + } + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no databases for alterTable in Database{}", getEngineName()); +} + +std::vector> +DatabasesOverlay::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const +{ + std::vector> result; + for (const auto & db : databases) + { + auto dbBackup = db->getTablesForBackup(filter, local_context); + result.insert(result.end(), std::make_move_iterator(dbBackup.begin()), std::make_move_iterator(dbBackup.end())); + } + return result; +} + +void DatabasesOverlay::createTableRestoredFromBackup( + const ASTPtr & create_table_query, + ContextMutablePtr local_context, + std::shared_ptr /*restore_coordination*/, + UInt64 /*timeout_ms*/) +{ + /// Creates a tables by executing a "CREATE TABLE" query. + InterpreterCreateQuery interpreter{create_table_query, local_context}; + interpreter.setInternal(true); + interpreter.execute(); +} + +bool DatabasesOverlay::empty() const +{ + for (const auto & db : databases) + { + if (!db->empty()) + return false; + } + return true; +} + +void DatabasesOverlay::shutdown() +{ + for (auto & db : databases) + db->shutdown(); +} + +DatabaseTablesIteratorPtr DatabasesOverlay::getTablesIterator(ContextPtr context_, const FilterByNameFunction & filter_by_table_name) const +{ + Tables tables; + for (const auto & db : databases) + { + for (auto table_it = db->getTablesIterator(context_, filter_by_table_name); table_it->isValid(); table_it->next()) + tables.insert({table_it->name(), table_it->table()}); + } + return std::make_unique(std::move(tables), getDatabaseName()); +} + +DatabasePtr CreateClickHouseLocalDatabaseOverlay(const String & name_, ContextPtr context_) +{ + auto databaseCombiner = std::make_shared(name_, context_); + databaseCombiner->registerNextDatabase(std::make_shared(name_, "", context_)); + databaseCombiner->registerNextDatabase(std::make_shared(name_, context_)); + return databaseCombiner; +} + +} diff --git a/src/Databases/DatabasesOverlay.h b/src/Databases/DatabasesOverlay.h new file mode 100644 index 00000000000..77f0085161b --- /dev/null +++ b/src/Databases/DatabasesOverlay.h @@ -0,0 +1,68 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/** + * Implements the IDatabase interface and combines multiple other databases + * Searches for tables in each database in order until found, and delegates operations to the appropriate database + * Useful for combining databases + * + * Used in clickhouse-local to combine DatabaseFileSystem and DatabaseMemory + */ +class DatabasesOverlay : public IDatabase, protected WithContext +{ +public: + DatabasesOverlay(const String & name_, ContextPtr context_); + + /// Not thread-safe. Use only as factory to initialize database + DatabasesOverlay & registerNextDatabase(DatabasePtr database); + + String getEngineName() const override { return "Overlay"; } + +public: + bool isTableExist(const String & table_name, ContextPtr context) const override; + + StoragePtr tryGetTable(const String & table_name, ContextPtr context) const override; + + void createTable(ContextPtr context, const String & table_name, const StoragePtr & table, const ASTPtr & query) override; + + void dropTable(ContextPtr context, const String & table_name, bool sync) override; + + void attachTable(ContextPtr context, const String & table_name, const StoragePtr & table, const String & relative_table_path) override; + + StoragePtr detachTable(ContextPtr context, const String & table_name) override; + + ASTPtr getCreateTableQueryImpl(const String & name, ContextPtr context, bool throw_on_error) const override; + ASTPtr getCreateDatabaseQuery() const override; + + String getTableDataPath(const String & table_name) const override; + String getTableDataPath(const ASTCreateQuery & query) const override; + + UUID tryGetTableUUID(const String & table_name) const override; + + void drop(ContextPtr context) override; + + void alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata) override; + + std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override; + + void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr restore_coordination, UInt64 timeout_ms) override; + + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; + + bool empty() const override; + + void shutdown() override; + +protected: + std::vector databases; + Poco::Logger * log; +}; + +DatabasePtr CreateClickHouseLocalDatabaseOverlay(const String & name_, ContextPtr context_); + +} diff --git a/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.reference b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.reference new file mode 100644 index 00000000000..0fcd843e737 --- /dev/null +++ b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.reference @@ -0,0 +1,9 @@ +Test 1: check explicit and implicit call of the file table function +explicit: +4 +implicit: +4 +Test 2: check FileSystem database +4 +Test 3: check show database with FileSystem +test02707 diff --git a/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh new file mode 100755 index 00000000000..4d8d7b1395a --- /dev/null +++ b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +dir=02707_clickhouse_local_tmp +[[ -d $dir ]] && rm -r $dir +mkdir $dir +# Create temporary csv file for tests +echo '"id","str","int","text"' > $dir/tmp.csv +echo '1,"abc",123,"abacaba"' >> $dir/tmp.csv +echo '2,"def",456,"bacabaa"' >> $dir/tmp.csv +echo '3,"story",78912,"acabaab"' >> $dir/tmp.csv +echo '4,"history",21321321,"cabaaba"' >> $dir/tmp.csv + +################# +echo "Test 1: check explicit and implicit call of the file table function" + +echo "explicit:" +$CLICKHOUSE_LOCAL -q 'SELECT COUNT(*) FROM file("02707_clickhouse_local_tmp/tmp.csv")' +echo "implicit:" +$CLICKHOUSE_LOCAL -q 'SELECT COUNT(*) FROM "02707_clickhouse_local_tmp/tmp.csv"' + +################# +echo "Test 2: check FileSystem database" +$CLICKHOUSE_LOCAL --multiline --multiquery -q """ +DROP DATABASE IF EXISTS test; +CREATE DATABASE test ENGINE = FileSystem('02707_clickhouse_local_tmp'); +SELECT COUNT(*) FROM test.\`tmp.csv\`; +DROP DATABASE test; +""" + +################# +echo "Test 3: check show database with FileSystem" +$CLICKHOUSE_LOCAL --multiline --multiquery -q """ +DROP DATABASE IF EXISTS test02707; +CREATE DATABASE test02707 ENGINE = FileSystem('02707_clickhouse_local_tmp'); +SHOW DATABASES; +DROP DATABASE test02707; +""" | grep "test02707" + +rm -r $dir \ No newline at end of file From 22be85d9764d6ebe3511313c9dadcbdf070c53ad Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 18 Apr 2023 02:42:30 +0200 Subject: [PATCH 0064/1997] renamed setting --- docs/en/operations/settings/settings.md | 8 +++---- .../functions/date-time-functions.md | 2 +- docs/ru/operations/settings/settings.md | 8 +++---- .../functions/date-time-functions.md | 2 +- src/Client/ClientBase.cpp | 5 +--- src/Common/DateLUT.cpp | 2 +- src/Core/Settings.h | 2 +- src/Core/SettingsFields.cpp | 15 ++++++++++++ src/Core/SettingsFields.h | 23 +++++++++++-------- src/Interpreters/Context.cpp | 1 - src/Server/TCPHandler.cpp | 6 +---- .../0_stateless/02681_timezone_setting.sql | 12 +++++----- 12 files changed, 48 insertions(+), 38 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 7caf3d4333f..dd81b07b9c0 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4008,9 +4008,9 @@ Default value: `0`. Use this setting only for backward compatibility if your use cases depend on old syntax. ::: -## timezone {#timezone} +## session_timezone {#session_timezone} -If specified, sets a implicit timezone (instead of server-default). All DateTime/DateTime64 values (and/or functions results) that have no explicit timezone specified are treated as having this timezone instead of default. +If specified, sets an implicit timezone (instead of server-default). All DateTime/DateTime64 values (and/or functions results) that have no explicit timezone specified are treated as having this timezone instead of default. Examples: ```clickhouse @@ -4020,13 +4020,13 @@ Europe/Berlin Europe/Berlin ``` ```clickhouse -SELECT timeZone(), serverTimezone() SETTINGS timezone = 'Asia/Novosibirsk' FORMAT TSV +SELECT timeZone(), serverTimezone() SETTINGS session_timezone = 'Asia/Novosibirsk' FORMAT TSV Asia/Novosibirsk Europe/Berlin ``` ```clickhouse -SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS timezone = 'America/Denver' FORMAT TSV +SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS session_timezone = 'America/Denver' FORMAT TSV 1999-12-13 07:23:23.123 ``` diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index f96041996d4..c1b8d201745 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -26,7 +26,7 @@ SELECT ## timeZone -Returns the default timezone of the server for current session. This can be modified using `SET timezone = 'New/Value'` +Returns the default timezone of the server for current session. This can be modified using `SET session_timezone = 'New/Value'` If it is executed in the context of a distributed table, then it generates a normal column with values relevant to each shard. Otherwise it produces a constant value. **Syntax** diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 1687e37dba2..fd4d1e11df7 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -4075,9 +4075,9 @@ SELECT sum(number) FROM numbers(10000000000) SETTINGS partial_result_on_first_ca Значение по умолчанию: `false` -## timezone {#timezone} +## session_timezone {#session_timezone} -Задаёт значение часового пояса (timezone) по умолчанию для текущей сессии вместо часового пояса сервера. То есть, все значения DateTime/DateTime64, для которых явно не задан параметр timezone, будут интерпретированы как относящиеся к указанной зоне. +Задаёт значение часового пояса (session_timezone) по умолчанию для текущей сессии вместо часового пояса сервера. То есть, все значения DateTime/DateTime64, для которых явно не задан параметр timezone, будут интерпретированы как относящиеся к указанной зоне. Примеры: ```clickhouse @@ -4087,13 +4087,13 @@ Europe/Berlin Europe/Berlin ``` ```clickhouse -SELECT timeZone(), serverTimezone() SETTINGS timezone = 'Asia/Novosibirsk' FORMAT TSV +SELECT timeZone(), serverTimezone() SETTINGS session_timezone = 'Asia/Novosibirsk' FORMAT TSV Asia/Novosibirsk Europe/Berlin ``` ```clickhouse -SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS timezone = 'America/Denver' FORMAT TSV +SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS session_timezone = 'America/Denver' FORMAT TSV 1999-12-13 07:23:23.123 ``` diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 132b54c1040..3e378c08308 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -26,7 +26,7 @@ SELECT ## timeZone {#timezone} -Возвращает часовой пояс сервера, считающийся умолчанием для текущей сессии. +Возвращает часовой пояс сервера, считающийся умолчанием для текущей сессии. Можно изменить значение с помощью `SET session_timezone = 'New/Timezone''` Если функция вызывается в контексте распределенной таблицы, то она генерирует обычный столбец со значениями, актуальными для каждого шарда. Иначе возвращается константа. **Синтаксис** diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 2e82144e64d..5126777fa1e 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -451,12 +451,9 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query) /// Also do not output too much data if we're fuzzing. if (block.rows() == 0 || (query_fuzzer_runs != 0 && processed_rows >= 100)) { -// LogContextes("ClientBase::onData header", global_context); return; } -// LogContextes("ClientBase::onData DATA block", global_context); - /// If results are written INTO OUTFILE, we can avoid clearing progress to avoid flicker. if (need_render_progress && tty_buf && (!select_into_file || select_into_file_and_stdout)) progress_indication.clearProgressOutput(*tty_buf); @@ -1075,7 +1072,7 @@ void ClientBase::onProgress(const Progress & value) void ClientBase::onTimezoneUpdate(const String & tz) { Settings settings; - settings.timezone = tz; + settings.session_timezone = tz; global_context->applySettingsChanges(settings.changes()); } diff --git a/src/Common/DateLUT.cpp b/src/Common/DateLUT.cpp index 16bd69a20f8..f7e7df016cb 100644 --- a/src/Common/DateLUT.cpp +++ b/src/Common/DateLUT.cpp @@ -167,5 +167,5 @@ DateLUT & DateLUT::getInstance() std::string DateLUT::extractTimezoneFromContext(const DB::ContextPtr query_context) { - return query_context->getSettingsRef().timezone.value; + return query_context->getSettingsRef().session_timezone.value; } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b3e9f3fb220..837958aa5b0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -734,7 +734,7 @@ class IColumn; M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ M(Bool, function_json_value_return_type_allow_nullable, false, "Allow function to return nullable type.", 0) \ M(Bool, function_json_value_return_type_allow_complex, false, "Allow function to return complex type, such as: struct, array, map.", 0) \ - M(Timezone, timezone, "", "Use specified timezone for interpreting Date and DateTime instead of server's timezone.", 0) \ + M(Timezone, session_timezone, "", "Use specified timezone for interpreting Date and DateTime instead of server's timezone in current session.", 0) \ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index f4169aa0c64..65720056c8a 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -13,6 +13,7 @@ #include +extern const char * auto_time_zones[]; namespace DB { @@ -26,6 +27,14 @@ namespace ErrorCodes namespace { + bool checkIsExitingTimeZone(const std::string_view timezone) + { + for (auto * it = auto_time_zones; *it; ++it) + if (timezone == *it) + return true; + return false; + } + template T stringToNumber(const String & str) { @@ -463,6 +472,12 @@ void SettingFieldTimezone::readBinary(ReadBuffer & in) *this = std::move(str); } +void SettingFieldTimezone::validateTimezone(std::string_view str) +{ + if (str != "" && !checkIsExitingTimeZone(str)) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid time zone: {}", str); +} + String SettingFieldCustom::toString() const { return value.dump(); diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index 0ee3ddd4862..e3b18a606a1 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -2,7 +2,6 @@ #include #include -#include #include #include #include @@ -553,13 +552,16 @@ struct SettingFieldTimezone String value; bool changed = false; - explicit SettingFieldTimezone(std::string_view str = {}) { validateTimezone(std::string(str)); value = str; } + explicit SettingFieldTimezone(std::string_view str = {}) { validateTimezone(str); value = str; } +// explicit SettingFieldTimezone(std::string_view str = {}) { validateTimezone(std::string(str)); value = str; } explicit SettingFieldTimezone(const String & str) { validateTimezone(str); value = str; } - explicit SettingFieldTimezone(String && str) { validateTimezone(std::string(str)); value = std::move(str); } +// explicit SettingFieldTimezone(String && str) { validateTimezone(std::string(str)); value = std::move(str); } + explicit SettingFieldTimezone(String && str) { validateTimezone(str); value = std::move(str); } explicit SettingFieldTimezone(const char * str) { validateTimezone(str); value = str; } explicit SettingFieldTimezone(const Field & f) { const String & str = f.safeGet(); validateTimezone(str); value = str; } - SettingFieldTimezone & operator =(std::string_view str) { validateTimezone(std::string(str)); value = str; changed = true; return *this; } +// SettingFieldTimezone & operator =(std::string_view str) { validateTimezone(std::string(str)); value = str; changed = true; return *this; } + SettingFieldTimezone & operator =(std::string_view str) { validateTimezone(str); value = str; changed = true; return *this; } SettingFieldTimezone & operator =(const String & str) { *this = std::string_view{str}; return *this; } SettingFieldTimezone & operator =(String && str) { validateTimezone(str); value = std::move(str); changed = true; return *this; } SettingFieldTimezone & operator =(const char * str) { *this = std::string_view{str}; return *this; } @@ -575,12 +577,13 @@ struct SettingFieldTimezone void readBinary(ReadBuffer & in); private: - cctz::time_zone validated_tz; - void validateTimezone(const std::string & tz_str) - { - if (!tz_str.empty() && !cctz::load_time_zone(tz_str, &validated_tz)) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid time zone: {}", tz_str); - } + static void validateTimezone(std::string_view str); +// cctz::time_zone validated_tz; +// void validateTimezone(const std::string & tz_str) +// { +// if (!tz_str.empty() && !cctz::load_time_zone(tz_str, &validated_tz)) +// throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid time zone: {}", tz_str); +// } }; /// Can keep a value of any type. Used for user-defined settings. diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 82ecd87faa0..400eb570131 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1680,7 +1680,6 @@ void Context::applySettingChange(const SettingChange & change) void Context::applySettingsChanges(const SettingsChanges & changes) { auto lock = getLock(); - for (const SettingChange & change : changes) applySettingChange(change); applySettingsQuirks(settings); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index a875507d227..e44609529ba 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -183,11 +183,8 @@ void TCPHandler::runImpl() /// User will be authenticated here. It will also set settings from user profile into connection_context. try { - LOG_DEBUG(log, "Before receiveHello"); receiveHello(); - LOG_DEBUG(log, "Before sendHello"); sendHello(); - LOG_DEBUG(log, "Before receiveAddendum"); if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM) receiveAddendum(); @@ -465,7 +462,6 @@ void TCPHandler::runImpl() sendSelectProfileEvents(); sendLogs(); - return false; }; @@ -1072,7 +1068,7 @@ void TCPHandler::sendTimezone() if (client_tcp_protocol_version < DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES) return; - const String & tz = query_context->getSettingsRef().timezone.toString(); + const String & tz = query_context->getSettingsRef().session_timezone.toString(); LOG_DEBUG(log, "TCPHandler::sendTimezone(): {}", tz); writeVarUInt(Protocol::Server::TimezoneUpdate, *out); diff --git a/tests/queries/0_stateless/02681_timezone_setting.sql b/tests/queries/0_stateless/02681_timezone_setting.sql index 73afb4c029b..f66e8d2b646 100644 --- a/tests/queries/0_stateless/02681_timezone_setting.sql +++ b/tests/queries/0_stateless/02681_timezone_setting.sql @@ -1,11 +1,11 @@ -SET timezone = 'Абырвалг'; -- { serverError BAD_ARGUMENTS} +SET session_timezone = 'Абырвалг'; -- { serverError BAD_ARGUMENTS} -SET timezone = 'Asia/Novosibirsk'; +SET session_timezone = 'Asia/Novosibirsk'; SELECT toDateTime64(toDateTime64('2022-12-12 23:23:23.123', 3), 3, 'Europe/Zurich'); -SELECT toDateTime64(toDateTime64('2022-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS timezone = 'Europe/Zurich'; +SELECT toDateTime64(toDateTime64('2022-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS session_timezone = 'Europe/Zurich'; -SET timezone = 'Asia/Manila'; +SET session_timezone = 'Asia/Manila'; SELECT toDateTime64(toDateTime64('2022-12-12 23:23:23.123', 3), 3, 'Asia/Novosibirsk'); -SELECT timezone(), timezoneOf(now()) SETTINGS timezone = 'Europe/Zurich' FORMAT TSV; -SELECT timezone(), timezoneOf(now()) SETTINGS timezone = 'Pacific/Pitcairn' FORMAT TSV; +SELECT timezone(), timezoneOf(now()) SETTINGS session_timezone = 'Europe/Zurich' FORMAT TSV; +SELECT timezone(), timezoneOf(now()) SETTINGS session_timezone = 'Pacific/Pitcairn' FORMAT TSV; From 0550b0640ce0020d4e4f0015447631c4b742ab13 Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 18 Apr 2023 03:35:29 +0200 Subject: [PATCH 0065/1997] fix linking issue --- src/Core/SettingsFields.cpp | 15 --------------- src/Core/SettingsFields.h | 23 ++++++++++------------- 2 files changed, 10 insertions(+), 28 deletions(-) diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index 65720056c8a..f4169aa0c64 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -13,7 +13,6 @@ #include -extern const char * auto_time_zones[]; namespace DB { @@ -27,14 +26,6 @@ namespace ErrorCodes namespace { - bool checkIsExitingTimeZone(const std::string_view timezone) - { - for (auto * it = auto_time_zones; *it; ++it) - if (timezone == *it) - return true; - return false; - } - template T stringToNumber(const String & str) { @@ -472,12 +463,6 @@ void SettingFieldTimezone::readBinary(ReadBuffer & in) *this = std::move(str); } -void SettingFieldTimezone::validateTimezone(std::string_view str) -{ - if (str != "" && !checkIsExitingTimeZone(str)) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid time zone: {}", str); -} - String SettingFieldCustom::toString() const { return value.dump(); diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index e3b18a606a1..0ee3ddd4862 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -552,16 +553,13 @@ struct SettingFieldTimezone String value; bool changed = false; - explicit SettingFieldTimezone(std::string_view str = {}) { validateTimezone(str); value = str; } -// explicit SettingFieldTimezone(std::string_view str = {}) { validateTimezone(std::string(str)); value = str; } + explicit SettingFieldTimezone(std::string_view str = {}) { validateTimezone(std::string(str)); value = str; } explicit SettingFieldTimezone(const String & str) { validateTimezone(str); value = str; } -// explicit SettingFieldTimezone(String && str) { validateTimezone(std::string(str)); value = std::move(str); } - explicit SettingFieldTimezone(String && str) { validateTimezone(str); value = std::move(str); } + explicit SettingFieldTimezone(String && str) { validateTimezone(std::string(str)); value = std::move(str); } explicit SettingFieldTimezone(const char * str) { validateTimezone(str); value = str; } explicit SettingFieldTimezone(const Field & f) { const String & str = f.safeGet(); validateTimezone(str); value = str; } -// SettingFieldTimezone & operator =(std::string_view str) { validateTimezone(std::string(str)); value = str; changed = true; return *this; } - SettingFieldTimezone & operator =(std::string_view str) { validateTimezone(str); value = str; changed = true; return *this; } + SettingFieldTimezone & operator =(std::string_view str) { validateTimezone(std::string(str)); value = str; changed = true; return *this; } SettingFieldTimezone & operator =(const String & str) { *this = std::string_view{str}; return *this; } SettingFieldTimezone & operator =(String && str) { validateTimezone(str); value = std::move(str); changed = true; return *this; } SettingFieldTimezone & operator =(const char * str) { *this = std::string_view{str}; return *this; } @@ -577,13 +575,12 @@ struct SettingFieldTimezone void readBinary(ReadBuffer & in); private: - static void validateTimezone(std::string_view str); -// cctz::time_zone validated_tz; -// void validateTimezone(const std::string & tz_str) -// { -// if (!tz_str.empty() && !cctz::load_time_zone(tz_str, &validated_tz)) -// throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid time zone: {}", tz_str); -// } + cctz::time_zone validated_tz; + void validateTimezone(const std::string & tz_str) + { + if (!tz_str.empty() && !cctz::load_time_zone(tz_str, &validated_tz)) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid time zone: {}", tz_str); + } }; /// Can keep a value of any type. Used for user-defined settings. From 96553bc3d8e70d06e03191f4b848ed07c91e5c6a Mon Sep 17 00:00:00 2001 From: Aleksei Golub Date: Sun, 16 Apr 2023 23:25:57 +0300 Subject: [PATCH 0066/1997] Fix style and tests --- src/Databases/DatabaseFactory.cpp | 6 ++- src/Databases/DatabaseFileSystem.cpp | 45 +++++++++++-------- src/Databases/DatabasesOverlay.cpp | 1 - ...ouse_local_implicit_file_table_function.sh | 14 +++--- 4 files changed, 38 insertions(+), 28 deletions(-) diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index b023bb06ad1..9c13881fc7b 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -433,13 +433,15 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String return std::make_shared(context, engine_define, create.attach, database_path); } #endif - else if (engine_name == "FileSystem") { + else if (engine_name == "FileSystem") + { const ASTFunction * engine = engine_define->engine; // If init_path is empty, then the current path from Poco will be used std::string init_path; - if (engine->arguments && engine->arguments->children.size() > 0) { + if (engine->arguments && !engine->arguments->children.empty()) + { if (engine->arguments->children.size() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "FileSystem database requires at most 1 argument: file_system_path"); diff --git a/src/Databases/DatabaseFileSystem.cpp b/src/Databases/DatabaseFileSystem.cpp index 9e2273970c3..8b92ad8080a 100644 --- a/src/Databases/DatabaseFileSystem.cpp +++ b/src/Databases/DatabaseFileSystem.cpp @@ -59,23 +59,30 @@ StoragePtr DatabaseFileSystem::tryGetTable(const String & name, ContextPtr conte auto table_path = getTablePath(name); - // If the table doesn't exist in the tables map, check if the corresponding file exists - Poco::File table_file(table_path); - if (!table_file.exists()) + try + { + // If the table doesn't exist in the tables map, check if the corresponding file exists + Poco::File table_file(table_path); + if (!table_file.exists()) + return nullptr; + + // If the file exists, create a new table using TableFunctionFile and return it. + auto args = makeASTFunction("file", std::make_shared(table_path)); + + auto table_function = TableFunctionFactory::instance().get(args, context_); + if (!table_function) + return nullptr; + + auto table_storage = table_function->execute(args, context_, name); + if (table_storage) + addTable(name, table_storage); + + return table_storage; + } + catch (...) + { return nullptr; - - // If the file exists, create a new table using TableFunctionFile and return it. - auto args = makeASTFunction("file", std::make_shared(table_path)); - - auto table_function = TableFunctionFactory::instance().get(args, context_); - if (!table_function) - return nullptr; - - auto table_storage = table_function->execute(args, context_, name); - if (table_storage) - addTable(name, table_storage); - - return table_storage; + } } ASTPtr DatabaseFileSystem::getCreateDatabaseQuery() const @@ -116,7 +123,8 @@ void DatabaseFileSystem::shutdown() /** * Returns an empty vector because the database is read-only and no tables can be backed up. */ -std::vector> DatabaseFileSystem::getTablesForBackup(const FilterByNameFunction&, const ContextPtr&) const { +std::vector> DatabaseFileSystem::getTablesForBackup(const FilterByNameFunction&, const ContextPtr&) const +{ return {}; } @@ -125,7 +133,8 @@ std::vector> DatabaseFileSystem::getTablesForBacku * Returns an empty iterator because the database does not have its own tables * But only caches them for quick access. */ -DatabaseTablesIteratorPtr DatabaseFileSystem::getTablesIterator(ContextPtr, const FilterByNameFunction&) const { +DatabaseTablesIteratorPtr DatabaseFileSystem::getTablesIterator(ContextPtr, const FilterByNameFunction&) const +{ return std::make_unique(Tables{}, getDatabaseName()); } diff --git a/src/Databases/DatabasesOverlay.cpp b/src/Databases/DatabasesOverlay.cpp index 9c3d802df73..da26f9282a0 100644 --- a/src/Databases/DatabasesOverlay.cpp +++ b/src/Databases/DatabasesOverlay.cpp @@ -14,7 +14,6 @@ namespace DB namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; extern const int CANNOT_GET_CREATE_TABLE_QUERY; } diff --git a/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh index 4d8d7b1395a..eea1e47ba7f 100755 --- a/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh +++ b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh @@ -4,8 +4,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -dir=02707_clickhouse_local_tmp -[[ -d $dir ]] && rm -r $dir +dir=${CLICKHOUSE_TEST_UNIQUE_NAME} +[[ -d $dir ]] && rm -rd $dir mkdir $dir # Create temporary csv file for tests echo '"id","str","int","text"' > $dir/tmp.csv @@ -18,15 +18,15 @@ echo '4,"history",21321321,"cabaaba"' >> $dir/tmp.csv echo "Test 1: check explicit and implicit call of the file table function" echo "explicit:" -$CLICKHOUSE_LOCAL -q 'SELECT COUNT(*) FROM file("02707_clickhouse_local_tmp/tmp.csv")' +$CLICKHOUSE_LOCAL -q "SELECT COUNT(*) FROM file('${dir}/tmp.csv')" echo "implicit:" -$CLICKHOUSE_LOCAL -q 'SELECT COUNT(*) FROM "02707_clickhouse_local_tmp/tmp.csv"' +$CLICKHOUSE_LOCAL -q "SELECT COUNT(*) FROM \"${dir}/tmp.csv\"" ################# echo "Test 2: check FileSystem database" $CLICKHOUSE_LOCAL --multiline --multiquery -q """ DROP DATABASE IF EXISTS test; -CREATE DATABASE test ENGINE = FileSystem('02707_clickhouse_local_tmp'); +CREATE DATABASE test ENGINE = FileSystem('${dir}'); SELECT COUNT(*) FROM test.\`tmp.csv\`; DROP DATABASE test; """ @@ -35,9 +35,9 @@ DROP DATABASE test; echo "Test 3: check show database with FileSystem" $CLICKHOUSE_LOCAL --multiline --multiquery -q """ DROP DATABASE IF EXISTS test02707; -CREATE DATABASE test02707 ENGINE = FileSystem('02707_clickhouse_local_tmp'); +CREATE DATABASE test02707 ENGINE = FileSystem('${dir}'); SHOW DATABASES; DROP DATABASE test02707; """ | grep "test02707" -rm -r $dir \ No newline at end of file +rm -rd $dir From 1e8c0a2db9671f0862975499f16b923a49c3a2ec Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 19 Apr 2023 00:06:15 +0200 Subject: [PATCH 0067/1997] Lighter timezone validation Reused external variable from src/Storages/System/StorageSystemTimeZones.generated.cpp Required changes to CMakeLists of some standalone modules to link properly --- programs/library-bridge/CMakeLists.txt | 8 +++++++- programs/odbc-bridge/CMakeLists.txt | 6 +++++- src/Core/SettingsFields.cpp | 15 +++++++++++++++ src/Core/SettingsFields.h | 24 ++++++++++++++---------- utils/check-marks/CMakeLists.txt | 6 +++++- utils/keeper-data-dumper/CMakeLists.txt | 8 +++++++- utils/wal-dump/CMakeLists.txt | 6 +++++- 7 files changed, 58 insertions(+), 15 deletions(-) diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index 1cacc391ca5..97af7c3b22e 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -13,11 +13,17 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES library-bridge.cpp ) +set(CLICKHOUSE_LIBRARY_BRIDGE_EXTERNAL_SOURCES + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Storages/System/StorageSystemTimeZones.generated.cpp +) + if (OS_LINUX) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") endif () -clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES}) +clickhouse_add_executable(clickhouse-library-bridge + ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES} + ${CLICKHOUSE_LIBRARY_BRIDGE_EXTERNAL_SOURCES}) target_link_libraries(clickhouse-library-bridge PRIVATE daemon diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 118610e4dcd..bf1b42df026 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -15,13 +15,17 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES validateODBCConnectionString.cpp ) +set(ODBC_BRIDGE_EXTERNAL_SOURCES + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Storages/System/StorageSystemTimeZones.generated.cpp +) + if (OS_LINUX) # clickhouse-odbc-bridge is always a separate binary. # Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers. set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") endif () -clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) +clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES} ${ODBC_BRIDGE_EXTERNAL_SOURCES}) target_link_libraries(clickhouse-odbc-bridge PRIVATE daemon diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index f4169aa0c64..65720056c8a 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -13,6 +13,7 @@ #include +extern const char * auto_time_zones[]; namespace DB { @@ -26,6 +27,14 @@ namespace ErrorCodes namespace { + bool checkIsExitingTimeZone(const std::string_view timezone) + { + for (auto * it = auto_time_zones; *it; ++it) + if (timezone == *it) + return true; + return false; + } + template T stringToNumber(const String & str) { @@ -463,6 +472,12 @@ void SettingFieldTimezone::readBinary(ReadBuffer & in) *this = std::move(str); } +void SettingFieldTimezone::validateTimezone(std::string_view str) +{ + if (str != "" && !checkIsExitingTimeZone(str)) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid time zone: {}", str); +} + String SettingFieldCustom::toString() const { return value.dump(); diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index 0ee3ddd4862..e78fef9f455 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -2,7 +2,7 @@ #include #include -#include +//#include #include #include #include @@ -553,13 +553,16 @@ struct SettingFieldTimezone String value; bool changed = false; - explicit SettingFieldTimezone(std::string_view str = {}) { validateTimezone(std::string(str)); value = str; } +// explicit SettingFieldTimezone(std::string_view str = {}) { validateTimezone(std::string(str)); value = str; } + explicit SettingFieldTimezone(std::string_view str = {}) { validateTimezone(str); value = str; } explicit SettingFieldTimezone(const String & str) { validateTimezone(str); value = str; } - explicit SettingFieldTimezone(String && str) { validateTimezone(std::string(str)); value = std::move(str); } +// explicit SettingFieldTimezone(String && str) { validateTimezone(std::string(str)); value = std::move(str); } + explicit SettingFieldTimezone(String && str) { validateTimezone(str); value = std::move(str); } explicit SettingFieldTimezone(const char * str) { validateTimezone(str); value = str; } explicit SettingFieldTimezone(const Field & f) { const String & str = f.safeGet(); validateTimezone(str); value = str; } - SettingFieldTimezone & operator =(std::string_view str) { validateTimezone(std::string(str)); value = str; changed = true; return *this; } +// SettingFieldTimezone & operator =(std::string_view str) { validateTimezone(std::string(str)); value = str; changed = true; return *this; } + SettingFieldTimezone & operator =(std::string_view str) { validateTimezone(str); value = str; changed = true; return *this; } SettingFieldTimezone & operator =(const String & str) { *this = std::string_view{str}; return *this; } SettingFieldTimezone & operator =(String && str) { validateTimezone(str); value = std::move(str); changed = true; return *this; } SettingFieldTimezone & operator =(const char * str) { *this = std::string_view{str}; return *this; } @@ -575,12 +578,13 @@ struct SettingFieldTimezone void readBinary(ReadBuffer & in); private: - cctz::time_zone validated_tz; - void validateTimezone(const std::string & tz_str) - { - if (!tz_str.empty() && !cctz::load_time_zone(tz_str, &validated_tz)) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid time zone: {}", tz_str); - } +// cctz::time_zone validated_tz; +// void validateTimezone(const std::string & str) +// { +// if (!str.empty() && !cctz::load_time_zone(str, &validated_tz)) +// throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid time zone: {}", str); +// } + static void validateTimezone(std::string_view str); }; /// Can keep a value of any type. Used for user-defined settings. diff --git a/utils/check-marks/CMakeLists.txt b/utils/check-marks/CMakeLists.txt index 05546a2989b..456fb3d7112 100644 --- a/utils/check-marks/CMakeLists.txt +++ b/utils/check-marks/CMakeLists.txt @@ -1,2 +1,6 @@ -clickhouse_add_executable (check-marks main.cpp) +set(CHECK_MARKS_EXTERNAL_SOURCES + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Storages/System/StorageSystemTimeZones.generated.cpp +) + +clickhouse_add_executable (check-marks ${CHECK_MARKS_EXTERNAL_SOURCES} main.cpp) target_link_libraries(check-marks PRIVATE dbms boost::program_options) diff --git a/utils/keeper-data-dumper/CMakeLists.txt b/utils/keeper-data-dumper/CMakeLists.txt index 1f55e50e68e..a6858a29e8b 100644 --- a/utils/keeper-data-dumper/CMakeLists.txt +++ b/utils/keeper-data-dumper/CMakeLists.txt @@ -1,2 +1,8 @@ -clickhouse_add_executable(keeper-data-dumper main.cpp) +set(KEEPER_DATA_DUMPER_EXTERNAL_SOURCES + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Storages/System/StorageSystemTimeZones.generated.cpp +) + +clickhouse_add_executable(keeper-data-dumper + ${KEEPER_DATA_DUMPER_EXTERNAL_SOURCES} + main.cpp) target_link_libraries(keeper-data-dumper PRIVATE dbms) diff --git a/utils/wal-dump/CMakeLists.txt b/utils/wal-dump/CMakeLists.txt index 3d59e95b4ca..754799a6faf 100644 --- a/utils/wal-dump/CMakeLists.txt +++ b/utils/wal-dump/CMakeLists.txt @@ -1,2 +1,6 @@ -clickhouse_add_executable (wal-dump main.cpp) +set(WAL_DUMP_EXTERNAL_SOURCES + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Storages/System/StorageSystemTimeZones.generated.cpp +) + +clickhouse_add_executable (wal-dump ${WAL_DUMP_EXTERNAL_SOURCES} main.cpp) target_link_libraries(wal-dump PRIVATE dbms boost::program_options) From 24be7203d931b57a35241fec1abe31a9099ba096 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 19 Apr 2023 00:39:08 +0200 Subject: [PATCH 0068/1997] add errorcode reference --- src/Core/SettingsFields.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index 65720056c8a..e952688a968 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -22,6 +22,7 @@ namespace ErrorCodes extern const int SIZE_OF_FIXED_STRING_DOESNT_MATCH; extern const int CANNOT_PARSE_BOOL; extern const int CANNOT_PARSE_NUMBER; + extern const int BAD_ARGUMENTS; } From 542c09cb518988cf54261edbab691c459efa9a88 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 19 Apr 2023 01:35:49 +0200 Subject: [PATCH 0069/1997] fix keeper standalone linking --- programs/keeper/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 761335fb707..ff2de3f581c 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -107,6 +107,8 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/GraphiteWriter.cpp ${CMAKE_CURRENT_BINARY_DIR}/../../src/Daemon/GitHash.generated.cpp + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Storages/System/StorageSystemTimeZones.generated.cpp + Keeper.cpp clickhouse-keeper.cpp ) From ce7dc8b123502aee8af1578d87133f8283c66a5b Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 19 Apr 2023 13:42:07 +0200 Subject: [PATCH 0070/1997] tidy --- src/Core/SettingsFields.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index e952688a968..6af38586ed8 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -475,7 +475,7 @@ void SettingFieldTimezone::readBinary(ReadBuffer & in) void SettingFieldTimezone::validateTimezone(std::string_view str) { - if (str != "" && !checkIsExitingTimeZone(str)) + if (!str.empty() && !checkIsExitingTimeZone(str)) throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid time zone: {}", str); } From daae5025e8586156a016687672100a8ec0db6016 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 19 Apr 2023 14:45:51 +0200 Subject: [PATCH 0071/1997] small updates due to review --- docs/en/operations/settings/settings.md | 1 + docs/en/sql-reference/functions/date-time-functions.md | 2 +- docs/ru/operations/settings/settings.md | 1 + programs/client/Client.cpp | 2 +- src/Client/ClientBase.cpp | 5 ----- 5 files changed, 4 insertions(+), 7 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index dd81b07b9c0..2010f763c84 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4011,6 +4011,7 @@ Use this setting only for backward compatibility if your use cases depend on old ## session_timezone {#session_timezone} If specified, sets an implicit timezone (instead of server-default). All DateTime/DateTime64 values (and/or functions results) that have no explicit timezone specified are treated as having this timezone instead of default. +Setting this to `''` (empty string) effectively resets implicit timezone to server timezone. Examples: ```clickhouse diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index c1b8d201745..265ce676ef7 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -51,7 +51,7 @@ If it is executed in the context of a distributed table, then it generates a nor **Syntax** ``` sql -timeZone() +serverTimeZone() ``` Alias: `ServerTimezone`, `servertimezone`. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index fd4d1e11df7..8180f5435b8 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -4078,6 +4078,7 @@ SELECT sum(number) FROM numbers(10000000000) SETTINGS partial_result_on_first_ca ## session_timezone {#session_timezone} Задаёт значение часового пояса (session_timezone) по умолчанию для текущей сессии вместо часового пояса сервера. То есть, все значения DateTime/DateTime64, для которых явно не задан параметр timezone, будут интерпретированы как относящиеся к указанной зоне. +При значении настройки `''` (пустая строка), будет совпадать с часовым поясом сервера. Примеры: ```clickhouse diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 528c504e555..b760efc21d1 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -324,7 +324,7 @@ try { // All that just to set DB::CurrentThread::get().getGlobalContext() - // which is required for client timezone (pushed as from server) to work. + // which is required for client timezone (pushed from server) to work. auto thread_group = std::make_shared(); thread_status.attachToGroup(thread_group, false); } diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 5126777fa1e..6df86db886b 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -78,11 +78,6 @@ namespace fs = std::filesystem; using namespace std::literals; -namespace -{ -using namespace DB; -} - namespace CurrentMetrics { extern const Metric MemoryTracking; From 61f55930ceee99ce23cdab794ce77945f9a6ee1c Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Wed, 19 Apr 2023 15:46:17 +0300 Subject: [PATCH 0072/1997] Update docs/en/operations/settings/settings.md Co-authored-by: Vasily Nemkov --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 2010f763c84..366e7de8d28 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4034,7 +4034,7 @@ SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zuric Possible values: -- Any valid timezone in `Region/Place` notation, e.g. `Europe/Berlin` +- Any timezone name from `system.time_zones`, e.g. `Europe/Berlin`, `UTC` or `Zulu` Default value: `''`. From b281ceacbb4cc8f66a56b9bc12d5ab521098ce08 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 19 Apr 2023 14:47:57 +0200 Subject: [PATCH 0073/1997] Update docs/ru/operations/settings/settings.md --- docs/ru/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 8180f5435b8..f0f497b6254 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -4101,6 +4101,6 @@ SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zuric Возможные значения: -- Строка вида `Регион/Город`, например `Europe/Zurich` +- Любая зона из `system.time_zones`, например `Europe/Berlin`, `UTC` или `Zulu` Значение по умолчанию: `''`. From b06d7355d597abdf6692b5c232fb12449d57aa5b Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Wed, 19 Apr 2023 17:39:40 +0300 Subject: [PATCH 0074/1997] Update src/Core/SettingsFields.h Co-authored-by: Vasily Nemkov --- src/Core/SettingsFields.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index e78fef9f455..8bd7370c980 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -2,7 +2,6 @@ #include #include -//#include #include #include #include From b81ce64fa23cf2d05edd488eeb1adbf981784a54 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Wed, 19 Apr 2023 17:39:52 +0300 Subject: [PATCH 0075/1997] Update src/Client/ClientBase.cpp Co-authored-by: Vasily Nemkov --- src/Client/ClientBase.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 6df86db886b..bd83246871b 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1531,9 +1531,9 @@ void ClientBase::receiveLogsAndProfileEvents(ASTPtr parsed_query) { auto packet_type = connection->checkPacket(0); - while (packet_type && (*packet_type == Protocol::Server::Log || - *packet_type == Protocol::Server::ProfileEvents || - *packet_type == Protocol::Server::TimezoneUpdate)) + while (packet_type && (*packet_type == Protocol::Server::Log + || *packet_type == Protocol::Server::ProfileEvents + || *packet_type == Protocol::Server::TimezoneUpdate)) { receiveAndProcessPacket(parsed_query, false); packet_type = connection->checkPacket(0); From 21d5846cabd0717184f44d98b8480fefc683e807 Mon Sep 17 00:00:00 2001 From: Aleksei Golub Date: Tue, 18 Apr 2023 18:12:11 +0300 Subject: [PATCH 0076/1997] Fix test --- .../02707_clickhouse_local_implicit_file_table_function.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh index eea1e47ba7f..24de0ad579c 100755 --- a/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh +++ b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh @@ -7,6 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) dir=${CLICKHOUSE_TEST_UNIQUE_NAME} [[ -d $dir ]] && rm -rd $dir mkdir $dir + # Create temporary csv file for tests echo '"id","str","int","text"' > $dir/tmp.csv echo '1,"abc",123,"abacaba"' >> $dir/tmp.csv @@ -40,4 +41,5 @@ SHOW DATABASES; DROP DATABASE test02707; """ | grep "test02707" +# Remove temporary dir with files rm -rd $dir From e997b1393ce12ba639049147afdedb13e338af38 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 19 Apr 2023 22:40:13 +0200 Subject: [PATCH 0077/1997] Play with MMAP_THRESHOLD (set it to 128MiB) Signed-off-by: Azat Khuzhin --- src/Common/Allocator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/Allocator.cpp b/src/Common/Allocator.cpp index c02210f2ece..0fb90e5a47e 100644 --- a/src/Common/Allocator.cpp +++ b/src/Common/Allocator.cpp @@ -8,7 +8,7 @@ * See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html */ #ifdef NDEBUG - __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 256 * (1ULL << 20); + __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 128 * (1ULL << 20); #else /** * In debug build, use small mmap threshold to reproduce more memory From bf55f43e1933fdcbbc2ec85e5b0823c6a7e3eb5e Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 20 Apr 2023 02:53:42 +0200 Subject: [PATCH 0078/1997] update cmakelists --- src/CMakeLists.txt | 5 +++++ src/Core/SettingsFields.cpp | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 76e5ef83e41..5ac3f6e1654 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -191,6 +191,11 @@ list (APPEND dbms_headers Dictionaries/DictionaryStructure.h Dictionaries/getDictionaryConfigurationFromAST.h) +# Required for validation of Timezone in session_timezone setting. +# This way we don't need to create timezone via cctz each time, but check against pregenerated char** +list (APPEND dbms_sources + Storages/System/StorageSystemTimeZones.generated.cpp) + if (NOT ENABLE_SSL) list (REMOVE_ITEM clickhouse_common_io_sources Common/OpenSSLHelpers.cpp) list (REMOVE_ITEM clickhouse_common_io_headers Common/OpenSSLHelpers.h) diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index 6af38586ed8..c0556519563 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -28,7 +28,7 @@ namespace ErrorCodes namespace { - bool checkIsExitingTimeZone(const std::string_view timezone) + bool checkIsExistingTimeZone(const std::string_view timezone) { for (auto * it = auto_time_zones; *it; ++it) if (timezone == *it) @@ -475,7 +475,7 @@ void SettingFieldTimezone::readBinary(ReadBuffer & in) void SettingFieldTimezone::validateTimezone(std::string_view str) { - if (!str.empty() && !checkIsExitingTimeZone(str)) + if (!str.empty() && !checkIsExistingTimeZone(str)) throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid time zone: {}", str); } From f4af76ab8baee97c06cf1e53346da6107c7ccbbd Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 20 Apr 2023 11:58:51 +0200 Subject: [PATCH 0079/1997] cleanup cmakelists --- programs/library-bridge/CMakeLists.txt | 7 +------ programs/odbc-bridge/CMakeLists.txt | 6 +----- utils/check-marks/CMakeLists.txt | 6 +----- utils/keeper-data-dumper/CMakeLists.txt | 8 +------- utils/wal-dump/CMakeLists.txt | 6 +----- 5 files changed, 5 insertions(+), 28 deletions(-) diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index 97af7c3b22e..79497d5fb2e 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -13,17 +13,12 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES library-bridge.cpp ) -set(CLICKHOUSE_LIBRARY_BRIDGE_EXTERNAL_SOURCES - ${CMAKE_CURRENT_BINARY_DIR}/../../src/Storages/System/StorageSystemTimeZones.generated.cpp -) - if (OS_LINUX) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") endif () clickhouse_add_executable(clickhouse-library-bridge - ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES} - ${CLICKHOUSE_LIBRARY_BRIDGE_EXTERNAL_SOURCES}) + ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES}) target_link_libraries(clickhouse-library-bridge PRIVATE daemon diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index bf1b42df026..118610e4dcd 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -15,17 +15,13 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES validateODBCConnectionString.cpp ) -set(ODBC_BRIDGE_EXTERNAL_SOURCES - ${CMAKE_CURRENT_BINARY_DIR}/../../src/Storages/System/StorageSystemTimeZones.generated.cpp -) - if (OS_LINUX) # clickhouse-odbc-bridge is always a separate binary. # Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers. set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") endif () -clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES} ${ODBC_BRIDGE_EXTERNAL_SOURCES}) +clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) target_link_libraries(clickhouse-odbc-bridge PRIVATE daemon diff --git a/utils/check-marks/CMakeLists.txt b/utils/check-marks/CMakeLists.txt index 456fb3d7112..05546a2989b 100644 --- a/utils/check-marks/CMakeLists.txt +++ b/utils/check-marks/CMakeLists.txt @@ -1,6 +1,2 @@ -set(CHECK_MARKS_EXTERNAL_SOURCES - ${CMAKE_CURRENT_BINARY_DIR}/../../src/Storages/System/StorageSystemTimeZones.generated.cpp -) - -clickhouse_add_executable (check-marks ${CHECK_MARKS_EXTERNAL_SOURCES} main.cpp) +clickhouse_add_executable (check-marks main.cpp) target_link_libraries(check-marks PRIVATE dbms boost::program_options) diff --git a/utils/keeper-data-dumper/CMakeLists.txt b/utils/keeper-data-dumper/CMakeLists.txt index a6858a29e8b..1f55e50e68e 100644 --- a/utils/keeper-data-dumper/CMakeLists.txt +++ b/utils/keeper-data-dumper/CMakeLists.txt @@ -1,8 +1,2 @@ -set(KEEPER_DATA_DUMPER_EXTERNAL_SOURCES - ${CMAKE_CURRENT_BINARY_DIR}/../../src/Storages/System/StorageSystemTimeZones.generated.cpp -) - -clickhouse_add_executable(keeper-data-dumper - ${KEEPER_DATA_DUMPER_EXTERNAL_SOURCES} - main.cpp) +clickhouse_add_executable(keeper-data-dumper main.cpp) target_link_libraries(keeper-data-dumper PRIVATE dbms) diff --git a/utils/wal-dump/CMakeLists.txt b/utils/wal-dump/CMakeLists.txt index 754799a6faf..3d59e95b4ca 100644 --- a/utils/wal-dump/CMakeLists.txt +++ b/utils/wal-dump/CMakeLists.txt @@ -1,6 +1,2 @@ -set(WAL_DUMP_EXTERNAL_SOURCES - ${CMAKE_CURRENT_BINARY_DIR}/../../src/Storages/System/StorageSystemTimeZones.generated.cpp -) - -clickhouse_add_executable (wal-dump ${WAL_DUMP_EXTERNAL_SOURCES} main.cpp) +clickhouse_add_executable (wal-dump main.cpp) target_link_libraries(wal-dump PRIVATE dbms boost::program_options) From e37745811cd6000348655c7c42cdc25436a3090e Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 20 Apr 2023 12:04:12 +0200 Subject: [PATCH 0080/1997] style --- programs/library-bridge/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index 79497d5fb2e..1cacc391ca5 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -17,8 +17,7 @@ if (OS_LINUX) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") endif () -clickhouse_add_executable(clickhouse-library-bridge - ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES}) +clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES}) target_link_libraries(clickhouse-library-bridge PRIVATE daemon From 65d23af611f28fbe0f2baf1dac6cd1aa317676b5 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 24 Apr 2023 19:35:49 +0000 Subject: [PATCH 0081/1997] Refactor PreparedSets [1] --- src/Interpreters/PreparedSets.cpp | 101 +++++++++++-------- src/Interpreters/PreparedSets.h | 157 ++++++++++++++++++++++++++---- 2 files changed, 200 insertions(+), 58 deletions(-) diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index 7b0efddae87..11af41cae8f 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -5,6 +5,8 @@ #include #include #include +#include "Common/logger_useful.h" +#include "Processors/QueryPlan/CreatingSetsStep.h" namespace DB { @@ -66,30 +68,30 @@ String PreparedSetKey::toString() const return buf.str(); } -SubqueryForSet & PreparedSets::createOrGetSubquery(const String & subquery_id, const PreparedSetKey & key, - SizeLimits set_size_limit, bool transform_null_in) -{ - SubqueryForSet & subquery = subqueries[subquery_id]; +// SubqueryForSet & PreparedSets::createOrGetSubquery(const String & subquery_id, const PreparedSetKey & key, +// SizeLimits set_size_limit, bool transform_null_in) +// { +// SubqueryForSet & subquery = subqueries[subquery_id]; - /// If you already created a Set with the same subquery / table for another ast - /// In that case several PreparedSetKey would share same subquery and set - /// Not sure if it's really possible case (maybe for distributed query when set was filled by external table?) - if (subquery.set.isValid()) - sets[key] = subquery.set; - else - { - subquery.set_in_progress = std::make_shared(set_size_limit, false, transform_null_in); - sets[key] = FutureSet(subquery.promise_to_fill_set.get_future()); - } +// /// If you already created a Set with the same subquery / table for another ast +// /// In that case several PreparedSetKey would share same subquery and set +// /// Not sure if it's really possible case (maybe for distributed query when set was filled by external table?) +// if (subquery.set.isValid()) +// sets[key] = subquery.set; +// else +// { +// subquery.set_in_progress = std::make_shared(set_size_limit, false, transform_null_in); +// sets[key] = FutureSet(subquery.promise_to_fill_set.get_future()); +// } - if (!subquery.set_in_progress) - { - subquery.key = key.toString(); - subquery.set_in_progress = std::make_shared(set_size_limit, false, transform_null_in); - } +// if (!subquery.set_in_progress) +// { +// subquery.key = key.toString(); +// subquery.set_in_progress = std::make_shared(set_size_limit, false, transform_null_in); +// } - return subquery; -} +// return subquery; +// } /// If the subquery is not associated with any set, create default-constructed SubqueryForSet. /// It's aimed to fill external table passed to SubqueryForSet::createSource. @@ -154,26 +156,6 @@ QueryPlanPtr SubqueryForSet::detachSource() } -FutureSet::FutureSet(SetPtr set) -{ - std::promise promise; - promise.set_value(set); - *this = FutureSet(promise.get_future()); -} - - -bool FutureSet::isReady() const -{ - return future_set.valid() && - future_set.wait_for(std::chrono::seconds(0)) == std::future_status::ready; -} - -bool FutureSet::isCreated() const -{ - return isReady() && get() != nullptr && get()->isCreated(); -} - - std::variant, SharedSet> PreparedSetsCache::findOrPromiseToBuild(const String & key) { std::lock_guard lock(cache_mutex); @@ -194,4 +176,41 @@ std::variant, SharedSet> PreparedSetsCache::findOrPromiseTo return promise_to_fill_set; } +std::unique_ptr FutureSetFromSubquery::buildPlan(const ContextPtr & context, bool create_ordered_set) +{ + if (set) + return nullptr; + + auto set_cache = context->getPreparedSetsCache(); + if (set_cache) + { + auto from_cache = set_cache->findOrPromiseToBuild(subquery.key); + if (from_cache.index() == 0) + { + subquery.promise_to_fill_set = std::move(std::get<0>(from_cache)); + } + else + { + LOG_TRACE(&Poco::Logger::get("FutureSetFromSubquery"), "Waiting for set, key: {}", subquery.key); + set = std::get<1>(from_cache).get(); + return nullptr; + } + } + + subquery.set = set = std::make_shared(size_limits, create_ordered_set, transform_null_in); + + auto plan = subquery.detachSource(); + + const Settings & settings = context->getSettingsRef(); + auto creating_set = std::make_unique( + plan->getCurrentDataStream(), + subquery_id, + std::move(subquery), + SizeLimits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode), + context); + creating_set->setStepDescription("Create set for subquery"); + plan->addStep(std::move(creating_set)); + return plan; +} + }; diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index 4a7d1c3de46..a4898fe5ec6 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -10,6 +10,14 @@ #include #include #include +#include "Core/Block.h" +#include "Interpreters/Context.h" +#include "Interpreters/Set.h" +#include "Processors/Executors/CompletedPipelineExecutor.h" +#include "Processors/QueryPlan/BuildQueryPipelineSettings.h" +#include "Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h" +#include "Processors/Sinks/NullSink.h" +#include namespace DB { @@ -25,30 +33,83 @@ class InterpreterSelectWithUnionQuery; /// At analysis stage the FutureSets are created but not necessarily filled. Then for non-constant sets there /// must be an explicit step to build them before they can be used. /// FutureSet objects can be stored in PreparedSets and are not intended to be used from multiple threads. -class FutureSet final +// class FutureSet final +// { +// public: +// FutureSet() = default; + +// /// Create FutureSet from an object that will be created in the future. +// explicit FutureSet(const std::shared_future & future_set_) : future_set(future_set_) {} + +// /// Create FutureSet from a ready set. +// explicit FutureSet(SetPtr readySet); + +// /// The set object will be ready in the future, as opposed to 'null' object when FutureSet is default constructed. +// bool isValid() const { return future_set.valid(); } + +// /// The the value of SetPtr is ready, but the set object might not have been filled yet. +// bool isReady() const; + +// /// The set object is ready and filled. +// bool isCreated() const; + +// SetPtr get() const { chassert(isReady()); return future_set.get(); } + +// private: +// std::shared_future future_set; +// }; + +class FutureSet { public: - FutureSet() = default; + virtual ~FutureSet() = default; - /// Create FutureSet from an object that will be created in the future. - explicit FutureSet(const std::shared_future & future_set_) : future_set(future_set_) {} + virtual bool isReady() const = 0; + virtual SetPtr get() const = 0; - /// Create FutureSet from a ready set. - explicit FutureSet(SetPtr readySet); + virtual SetPtr buildOrderedSetInplace(const ContextPtr & context) = 0; + virtual std::unique_ptr build(const ContextPtr & context) = 0; +}; - /// The set object will be ready in the future, as opposed to 'null' object when FutureSet is default constructed. - bool isValid() const { return future_set.valid(); } +using FutureSetPtr = std::unique_ptr; - /// The the value of SetPtr is ready, but the set object might not have been filled yet. - bool isReady() const; +class FutureSetFromTuple final : public FutureSet +{ +public: + FutureSetFromTuple(Block block_, const SizeLimits & size_limits_, bool transform_null_in_); - /// The set object is ready and filled. - bool isCreated() const; + bool isReady() const override { return set != nullptr; } + SetPtr get() const override { return set; } - SetPtr get() const { chassert(isReady()); return future_set.get(); } + SetPtr buildOrderedSetInplace(const ContextPtr &) override + { + fill(true); + return set; + } + + std::unique_ptr build(const ContextPtr &) override + { + fill(false); + return nullptr; + } private: - std::shared_future future_set; + Block block; + SizeLimits size_limits; + bool transform_null_in; + + SetPtr set; + + void fill(bool create_ordered_set) + { + if (set) + return; + + set = std::make_shared(size_limits, create_ordered_set, transform_null_in); + set->setHeader(block.cloneEmpty().getColumnsWithTypeAndName()); + set->insertFromBlock(block.getColumnsWithTypeAndName()); + set->finishInsert(); + } }; /// Information on how to build set for the [GLOBAL] IN section. @@ -66,11 +127,12 @@ public: /// Build this set from the result of the subquery. String key; - SetPtr set_in_progress; + SetPtr set; /// After set_in_progress is finished it will be put into promise_to_fill_set and thus all FutureSet's /// that are referencing this set will be filled. + std::promise promise_to_fill_set; - FutureSet set = FutureSet{promise_to_fill_set.get_future()}; + // FutureSet set = FutureSet{promise_to_fill_set.get_future()}; /// If set, put the result into the table. /// This is a temporary table for transferring to remote servers for distributed query processing. @@ -80,6 +142,67 @@ public: std::unique_ptr source; }; +class FutureSetFromSubquery : public FutureSet +{ +public: + FutureSetFromSubquery(SubqueryForSet subquery_, String subquery_id, SizeLimits set_size_limit_, bool transform_null_in_); + + bool isReady() const override { return set != nullptr; } + SetPtr get() const override { return set; } + + SetPtr buildOrderedSetInplace(const ContextPtr & context) override + { + auto plan = buildPlan(context, true); + + auto builder = plan->buildQueryPipeline(QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); + auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder)); + pipeline.complete(std::make_shared(Block())); + + CompletedPipelineExecutor executor(pipeline); + executor.execute(); + + return set; + } + + std::unique_ptr build(const ContextPtr & context) override + { + return buildPlan(context, false); + } + +private: + SetPtr set; + SubqueryForSet subquery; + String subquery_id; + SizeLimits size_limits; + bool transform_null_in; + + std::unique_ptr buildPlan(const ContextPtr & context, bool create_ordered_set); +}; + +// class FutureSetFromFuture : public FutureSet +// { +// public: +// FutureSetFromFuture(std::shared_future future_set_); + +// bool isReady() const override { return future_set.wait_for(std::chrono::seconds(0)) == std::future_status::ready; } +// SetPtr get() const override { return future_set.get(); } + +// SetPtr buildOrderedSetInplace(const ContextPtr &) override +// { +// fill(true); +// return set; +// } + +// std::unique_ptr build(const ContextPtr &) override +// { +// fill(false); +// return nullptr; +// } + +// private: +// std::shared_future future_set; +// } + struct PreparedSetKey { /// Prepared sets for tuple literals are indexed by the hash of the tree contents and by the desired @@ -132,7 +255,7 @@ private: std::unordered_map sets; /// This is the information required for building sets - SubqueriesForSets subqueries; + // SubqueriesForSets subqueries; }; using PreparedSetsPtr = std::shared_ptr; From 80a2f30a0cc651db608735391d5173452f7b41ff Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 25 Apr 2023 18:14:08 +0000 Subject: [PATCH 0082/1997] Refactor PreparedSets [2] --- src/Columns/ColumnSet.h | 4 +- src/Interpreters/ActionsVisitor.cpp | 56 +++--- src/Interpreters/ActionsVisitor.h | 7 +- src/Interpreters/ExpressionAnalyzer.cpp | 188 ++++++++++----------- src/Interpreters/ExpressionAnalyzer.h | 4 +- src/Interpreters/GlobalSubqueriesVisitor.h | 24 +-- src/Interpreters/PreparedSets.cpp | 110 +++++++++--- src/Interpreters/PreparedSets.h | 75 +++++--- src/Planner/PlannerContext.h | 20 +-- 9 files changed, 283 insertions(+), 205 deletions(-) diff --git a/src/Columns/ColumnSet.h b/src/Columns/ColumnSet.h index 3f5cf4ad280..ccd9aa19896 100644 --- a/src/Columns/ColumnSet.h +++ b/src/Columns/ColumnSet.h @@ -21,7 +21,7 @@ class ColumnSet final : public COWHelper private: friend class COWHelper; - ColumnSet(size_t s_, FutureSet data_) : data(std::move(data_)) { s = s_; } + ColumnSet(size_t s_, FutureSetPtr data_) : data(std::move(data_)) { s = s_; } ColumnSet(const ColumnSet &) = default; public: @@ -35,7 +35,7 @@ public: Field operator[](size_t) const override { return {}; } private: - FutureSet data; + FutureSetPtr data; }; } diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 3bb3ea67e29..00feecb44f5 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -422,9 +422,8 @@ Block createBlockForSet( } -SetPtr makeExplicitSet( - const ASTFunction * node, const ActionsDAG & actions, bool create_ordered_set, - ContextPtr context, const SizeLimits & size_limits, PreparedSets & prepared_sets) +FutureSetPtr makeExplicitSet( + const ASTFunction * node, const ActionsDAG & actions, ContextPtr context, PreparedSets & prepared_sets) { const IAST & args = *node->arguments; @@ -448,7 +447,7 @@ SetPtr makeExplicitSet( element_type = low_cardinality_type->getDictionaryType(); auto set_key = PreparedSetKey::forLiteral(*right_arg, set_element_types); - if (auto set = prepared_sets.get(set_key)) + if (auto set = prepared_sets.getFuture(set_key)) return set; /// Already prepared. Block block; @@ -458,14 +457,7 @@ SetPtr makeExplicitSet( else block = createBlockForSet(left_arg_type, right_arg, set_element_types, context); - SetPtr set - = std::make_shared(size_limits, create_ordered_set, context->getSettingsRef().transform_null_in); - set->setHeader(block.cloneEmpty().getColumnsWithTypeAndName()); - set->insertFromBlock(block.getColumnsWithTypeAndName()); - set->finishInsert(); - - prepared_sets.set(set_key, set); - return set; + return prepared_sets.addFromTuple(set_key, block); } class ScopeStack::Index @@ -952,7 +944,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & return; } - FutureSet prepared_set; + FutureSetPtr prepared_set; if (checkFunctionIsInOrGlobalInOperator(node)) { /// Let's find the type of the first argument (then getActionsImpl will be called again and will not affect anything). @@ -961,7 +953,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & if (!data.no_makeset && !(data.is_create_parameterized_view && !analyzeReceiveQueryParams(ast).empty())) prepared_set = makeSet(node, data, data.no_subqueries); - if (prepared_set.isValid()) + if (prepared_set) { /// Transform tuple or subquery into a set. } @@ -1174,14 +1166,14 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & num_arguments += columns.size() - 1; arg += columns.size() - 1; } - else if (checkFunctionIsInOrGlobalInOperator(node) && arg == 1 && prepared_set.isValid()) + else if (checkFunctionIsInOrGlobalInOperator(node) && arg == 1 && prepared_set) { ColumnWithTypeAndName column; column.type = std::make_shared(); /// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name, /// so that sets with the same literal representation do not fuse together (they can have different types). - const bool is_constant_set = prepared_set.isCreated(); + const bool is_constant_set = typeid_cast(prepared_set.get()) == nullptr; if (is_constant_set) column.name = data.getUniqueName("__set"); else @@ -1373,7 +1365,7 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */, data.addColumn(std::move(column)); } -FutureSet ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_subqueries) +FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_subqueries) { if (!data.prepared_sets) return {}; @@ -1394,11 +1386,8 @@ FutureSet ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no return {}; auto set_key = PreparedSetKey::forSubquery(*right_in_operand); - { - auto set = data.prepared_sets->getFuture(set_key); - if (set.isValid()) - return set; - } + if (auto set = data.prepared_sets->getFuture(set_key)) + return set; /// A special case is if the name of the table is specified on the right side of the IN statement, /// and the table has the type Set (a previously prepared set). @@ -1409,20 +1398,16 @@ FutureSet ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no if (table) { - StorageSet * storage_set = dynamic_cast(table.get()); - if (storage_set) - { - SetPtr set = storage_set->getSet(); - data.prepared_sets->set(set_key, set); - return FutureSet(set); - } + if (StorageSet * storage_set = dynamic_cast(table.get())) + return data.prepared_sets->addFromStorage(set_key, storage_set->getSet()); } } /// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery. - String set_id = right_in_operand->getColumnName(); - bool transform_null_in = data.getContext()->getSettingsRef().transform_null_in; - SubqueryForSet & subquery_for_set = data.prepared_sets->createOrGetSubquery(set_id, set_key, data.set_size_limit, transform_null_in); + // String set_id = right_in_operand->getColumnName(); + //bool transform_null_in = data.getContext()->getSettingsRef().transform_null_in; + SubqueryForSet subquery_for_set; // = data.prepared_sets->createOrGetSubquery(set_id, set_key, data.set_size_limit, transform_null_in); + subquery_for_set.key = right_in_operand->getColumnName(); /** The following happens for GLOBAL INs or INs: * - in the addExternalStorage function, the IN (SELECT ...) subquery is replaced with IN _data1, @@ -1432,13 +1417,13 @@ FutureSet ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no * In case that we have HAVING with IN subquery, we have to force creating set for it. * Also it doesn't make sense if it is GLOBAL IN or ordinary IN. */ - if (data.create_source_for_in && !subquery_for_set.hasSource()) + if (data.create_source_for_in) { auto interpreter = interpretSubquery(right_in_operand, data.getContext(), data.subquery_depth, {}); subquery_for_set.createSource(*interpreter); } - return subquery_for_set.set; + return data.prepared_sets->addFromSubquery(set_key, std::move(subquery_for_set)); } else { @@ -1446,8 +1431,7 @@ FutureSet ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no const auto & index = data.actions_stack.getLastActionsIndex(); if (data.prepared_sets && index.contains(left_in_operand->getColumnName())) /// An explicit enumeration of values in parentheses. - return FutureSet( - makeExplicitSet(&node, last_actions, false, data.getContext(), data.set_size_limit, *data.prepared_sets)); + return makeExplicitSet(&node, last_actions, data.getContext(), *data.prepared_sets); else return {}; } diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 260fd5ab2c0..71d57620196 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -25,9 +25,8 @@ class IFunctionOverloadResolver; using FunctionOverloadResolverPtr = std::shared_ptr; /// The case of an explicit enumeration of values. -SetPtr makeExplicitSet( - const ASTFunction * node, const ActionsDAG & actions, bool create_ordered_set, - ContextPtr context, const SizeLimits & limits, PreparedSets & prepared_sets); +FutureSetPtr makeExplicitSet( + const ASTFunction * node, const ActionsDAG & actions, ContextPtr context, PreparedSets & prepared_sets); /** For ActionsVisitor * A stack of ExpressionActions corresponding to nested lambda expressions. @@ -219,7 +218,7 @@ private: static void visit(const ASTLiteral & literal, const ASTPtr & ast, Data & data); static void visit(ASTExpressionList & expression_list, const ASTPtr & ast, Data & data); - static FutureSet makeSet(const ASTFunction & node, Data & data, bool no_subqueries); + static FutureSetPtr makeSet(const ASTFunction & node, Data & data, bool no_subqueries); static ASTs doUntuple(const ASTFunction * function, ActionsMatcher::Data & data); static std::optional getNameAndTypeFromAST(const ASTPtr & ast, Data & data); }; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index cc54e7620f6..7dece81734c 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -450,76 +450,76 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global, b } -void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name, const SelectQueryOptions & query_options) -{ - if (!prepared_sets) - return; +// void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name, const SelectQueryOptions & query_options) +// { +// if (!prepared_sets) +// return; - auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name); +// auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name); - if (prepared_sets->getFuture(set_key).isValid()) - return; /// Already prepared. +// if (prepared_sets->getFuture(set_key).isValid()) +// return; /// Already prepared. - if (auto set_ptr_from_storage_set = isPlainStorageSetInSubquery(subquery_or_table_name)) - { - prepared_sets->set(set_key, set_ptr_from_storage_set); - return; - } +// if (auto set_ptr_from_storage_set = isPlainStorageSetInSubquery(subquery_or_table_name)) +// { +// prepared_sets->set(set_key, set_ptr_from_storage_set); +// return; +// } - auto build_set = [&] () -> SetPtr - { - LOG_TRACE(getLogger(), "Building set, key: {}", set_key.toString()); +// auto build_set = [&] () -> SetPtr +// { +// LOG_TRACE(getLogger(), "Building set, key: {}", set_key.toString()); - auto interpreter_subquery = interpretSubquery(subquery_or_table_name, getContext(), {}, query_options); - auto io = interpreter_subquery->execute(); - PullingAsyncPipelineExecutor executor(io.pipeline); +// auto interpreter_subquery = interpretSubquery(subquery_or_table_name, getContext(), {}, query_options); +// auto io = interpreter_subquery->execute(); +// PullingAsyncPipelineExecutor executor(io.pipeline); - SetPtr set = std::make_shared(settings.size_limits_for_set_used_with_index, true, getContext()->getSettingsRef().transform_null_in); - set->setHeader(executor.getHeader().getColumnsWithTypeAndName()); +// SetPtr set = std::make_shared(settings.size_limits_for_set_used_with_index, true, getContext()->getSettingsRef().transform_null_in); +// set->setHeader(executor.getHeader().getColumnsWithTypeAndName()); - Block block; - while (executor.pull(block)) - { - if (block.rows() == 0) - continue; +// Block block; +// while (executor.pull(block)) +// { +// if (block.rows() == 0) +// continue; - /// If the limits have been exceeded, give up and let the default subquery processing actions take place. - if (!set->insertFromBlock(block.getColumnsWithTypeAndName())) - return nullptr; - } +// /// If the limits have been exceeded, give up and let the default subquery processing actions take place. +// if (!set->insertFromBlock(block.getColumnsWithTypeAndName())) +// return nullptr; +// } - set->finishInsert(); +// set->finishInsert(); - return set; - }; +// return set; +// }; - SetPtr set; +// SetPtr set; - auto set_cache = getContext()->getPreparedSetsCache(); - if (set_cache) - { - auto from_cache = set_cache->findOrPromiseToBuild(set_key.toString()); - if (from_cache.index() == 0) - { - set = build_set(); - std::get<0>(from_cache).set_value(set); - } - else - { - LOG_TRACE(getLogger(), "Waiting for set, key: {}", set_key.toString()); - set = std::get<1>(from_cache).get(); - } - } - else - { - set = build_set(); - } +// auto set_cache = getContext()->getPreparedSetsCache(); +// if (set_cache) +// { +// auto from_cache = set_cache->findOrPromiseToBuild(set_key.toString()); +// if (from_cache.index() == 0) +// { +// set = build_set(); +// std::get<0>(from_cache).set_value(set); +// } +// else +// { +// LOG_TRACE(getLogger(), "Waiting for set, key: {}", set_key.toString()); +// set = std::get<1>(from_cache).get(); +// } +// } +// else +// { +// set = build_set(); +// } - if (!set) - return; +// if (!set) +// return; - prepared_sets->set(set_key, std::move(set)); -} +// prepared_sets->set(set_key, std::move(set)); +// } SetPtr ExpressionAnalyzer::isPlainStorageSetInSubquery(const ASTPtr & subquery_or_table_name) { @@ -536,50 +536,50 @@ SetPtr ExpressionAnalyzer::isPlainStorageSetInSubquery(const ASTPtr & subquery_o /// Performance optimization for IN() if storage supports it. -void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) -{ - if (!node || !storage() || !storage()->supportsIndexForIn()) - return; +// void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) +// { +// if (!node || !storage() || !storage()->supportsIndexForIn()) +// return; - for (auto & child : node->children) - { - /// Don't descend into subqueries. - if (child->as()) - continue; +// for (auto & child : node->children) +// { +// /// Don't descend into subqueries. +// if (child->as()) +// continue; - /// Don't descend into lambda functions - const auto * func = child->as(); - if (func && func->name == "lambda") - continue; +// /// Don't descend into lambda functions +// const auto * func = child->as(); +// if (func && func->name == "lambda") +// continue; - makeSetsForIndex(child); - } +// makeSetsForIndex(child); +// } - const auto * func = node->as(); - if (func && functionIsInOrGlobalInOperator(func->name)) - { - const IAST & args = *func->arguments; - const ASTPtr & left_in_operand = args.children.at(0); +// const auto * func = node->as(); +// if (func && functionIsInOrGlobalInOperator(func->name)) +// { +// const IAST & args = *func->arguments; +// const ASTPtr & left_in_operand = args.children.at(0); - if (storage()->mayBenefitFromIndexForIn(left_in_operand, getContext(), metadata_snapshot)) - { - const ASTPtr & arg = args.children.at(1); - if (arg->as() || arg->as()) - { - if (settings.use_index_for_in_with_subqueries) - tryMakeSetForIndexFromSubquery(arg, query_options); - } - else - { - auto temp_actions = std::make_shared(columns_after_join); - getRootActions(left_in_operand, true, temp_actions); +// if (storage()->mayBenefitFromIndexForIn(left_in_operand, getContext(), metadata_snapshot)) +// { +// const ASTPtr & arg = args.children.at(1); +// if (arg->as() || arg->as()) +// { +// if (settings.use_index_for_in_with_subqueries) +// tryMakeSetForIndexFromSubquery(arg, query_options); +// } +// else +// { +// auto temp_actions = std::make_shared(columns_after_join); +// getRootActions(left_in_operand, true, temp_actions); - if (prepared_sets && temp_actions->tryFindInOutputs(left_in_operand->getColumnName())) - makeExplicitSet(func, *temp_actions, true, getContext(), settings.size_limits_for_set, *prepared_sets); - } - } - } -} +// if (prepared_sets && temp_actions->tryFindInOutputs(left_in_operand->getColumnName())) +// makeExplicitSet(func, *temp_actions, true, getContext(), settings.size_limits_for_set, *prepared_sets); +// } +// } +// } +// } void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts) diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 1b6e8e24091..e4926b80625 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -144,7 +144,7 @@ public: /** Create Set from a subquery or a table expression in the query. The created set is suitable for using the index. * The set will not be created if its size hits the limit. */ - void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name, const SelectQueryOptions & query_options = {}); + // void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name, const SelectQueryOptions & query_options = {}); /** Checks if subquery is not a plain StorageSet. * Because while making set we will read data from StorageSet which is not allowed. @@ -364,7 +364,7 @@ public: ActionsDAGPtr appendProjectResult(ExpressionActionsChain & chain) const; /// Create Set-s that we make from IN section to use index on them. - void makeSetsForIndex(const ASTPtr & node); + // void makeSetsForIndex(const ASTPtr & node); private: StorageMetadataPtr metadata_snapshot; diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index b105cae31c6..9fb2c02bd58 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -167,19 +167,21 @@ public: { /// Do not materialize external tables if it's explain statement. } - else if (getContext()->getSettingsRef().use_index_for_in_with_subqueries) - { - auto external_table = external_storage_holder->getTable(); - auto table_out = external_table->write({}, external_table->getInMemoryMetadataPtr(), getContext()); - auto io = interpreter->execute(); - io.pipeline.complete(std::move(table_out)); - CompletedPipelineExecutor executor(io.pipeline); - executor.execute(); - } + // else if (getContext()->getSettingsRef().use_index_for_in_with_subqueries) + // { + // auto external_table = external_storage_holder->getTable(); + // auto table_out = external_table->write({}, external_table->getInMemoryMetadataPtr(), getContext()); + // auto io = interpreter->execute(); + // io.pipeline.complete(std::move(table_out)); + // CompletedPipelineExecutor executor(io.pipeline); + // executor.execute(); + // } else { - auto & subquery_for_set = prepared_sets->getSubquery(external_table_name); - subquery_for_set.createSource(*interpreter, external_storage); + // auto & subquery_for_set = prepared_sets->getSubquery(external_table_name); + // subquery_for_set.createSource(*interpreter, external_storage); + auto key = subquery_or_table_name->getColumnName(); + prepared_sets->addStorageToSubquery(key, std::move(external_storage)); } /** NOTE If it was written IN tmp_table - the existing temporary (but not external) table, diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index 11af41cae8f..300ef2aadb6 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -95,11 +95,51 @@ String PreparedSetKey::toString() const /// If the subquery is not associated with any set, create default-constructed SubqueryForSet. /// It's aimed to fill external table passed to SubqueryForSet::createSource. -SubqueryForSet & PreparedSets::getSubquery(const String & subquery_id) { return subqueries[subquery_id]; } +void PreparedSets::addStorageToSubquery(const String & subquery_id, StoragePtr storage) +{ + auto it = subqueries.find(subquery_id); + if (it == subqueries.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find subquery {}", subquery_id); -void PreparedSets::set(const PreparedSetKey & key, SetPtr set_) { sets[key] = FutureSet(set_); } + it->second->addStorage(std::move(storage)); +} -FutureSet PreparedSets::getFuture(const PreparedSetKey & key) const +FutureSetPtr PreparedSets::addFromStorage(const PreparedSetKey & key, SetPtr set_) +{ + auto from_storage = std::make_shared(std::move(set_)); + auto [it, inserted] = sets.emplace(key, std::move(from_storage)); + + if (!inserted) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Duplicate set: {}", key.toString()); + + return it->second; +} + +FutureSetPtr PreparedSets::addFromTuple(const PreparedSetKey & key, Block block) +{ + auto from_tuple = std::make_shared(std::move(block)); + auto [it, inserted] = sets.emplace(key, std::move(from_tuple)); + + if (!inserted) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Duplicate set: {}", key.toString()); + + return it->second; +} + +FutureSetPtr PreparedSets::addFromSubquery(const PreparedSetKey & key, SubqueryForSet subquery) +{ + auto id = subquery.key; + auto from_subquery = std::make_shared(std::move(subquery)); + auto [it, inserted] = sets.emplace(key, from_subquery); + + if (!inserted) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Duplicate set: {}", key.toString()); + + subqueries.emplace(id, std::move(from_subquery)); + return it->second; +} + +FutureSetPtr PreparedSets::getFuture(const PreparedSetKey & key) const { auto it = sets.find(key); if (it == sets.end()) @@ -107,24 +147,24 @@ FutureSet PreparedSets::getFuture(const PreparedSetKey & key) const return it->second; } -SetPtr PreparedSets::get(const PreparedSetKey & key) const -{ - auto it = sets.find(key); - if (it == sets.end() || !it->second.isReady()) - return nullptr; - return it->second.get(); -} +// SetPtr PreparedSets::get(const PreparedSetKey & key) const +// { +// auto it = sets.find(key); +// if (it == sets.end() || !it->second.isReady()) +// return nullptr; +// return it->second.get(); +// } -std::vector PreparedSets::getByTreeHash(IAST::Hash ast_hash) const -{ - std::vector res; - for (const auto & it : this->sets) - { - if (it.first.ast_hash == ast_hash) - res.push_back(it.second); - } - return res; -} +// std::vector PreparedSets::getByTreeHash(IAST::Hash ast_hash) const +// { +// std::vector res; +// for (const auto & it : this->sets) +// { +// if (it.first.ast_hash == ast_hash) +// res.push_back(it.second); +// } +// return res; +// } PreparedSets::SubqueriesForSets PreparedSets::detachSubqueries() { @@ -197,14 +237,18 @@ std::unique_ptr FutureSetFromSubquery::buildPlan(const ContextPtr & c } } - subquery.set = set = std::make_shared(size_limits, create_ordered_set, transform_null_in); + + const auto & settings = context->getSettingsRef(); + auto size_limits = getSizeLimitsForSet(settings, create_ordered_set); + + subquery.set = set = std::make_shared(size_limits, create_ordered_set, settings.transform_null_in); auto plan = subquery.detachSource(); + auto description = subquery.key; - const Settings & settings = context->getSettingsRef(); auto creating_set = std::make_unique( plan->getCurrentDataStream(), - subquery_id, + description, std::move(subquery), SizeLimits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode), context); @@ -213,4 +257,24 @@ std::unique_ptr FutureSetFromSubquery::buildPlan(const ContextPtr & c return plan; } + +static SizeLimits getSizeLimitsForUnorderedSet(const Settings & settings) +{ + return SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode); +} + +static SizeLimits getSizeLimitsForOrderedSet(const Settings & settings) +{ + if (settings.use_index_for_in_with_subqueries_max_values && + settings.use_index_for_in_with_subqueries_max_values < settings.max_rows_in_set) + return getSizeLimitsForUnorderedSet(settings); + + return SizeLimits(settings.use_index_for_in_with_subqueries_max_values, settings.max_bytes_in_set, OverflowMode::BREAK); +} + +SizeLimits FutureSet::getSizeLimitsForSet(const Settings & settings, bool ordered_set) +{ + return ordered_set ? getSizeLimitsForOrderedSet(settings) : getSizeLimitsForUnorderedSet(settings); +} + }; diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index a4898fe5ec6..8ebabc32b0a 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -69,43 +69,46 @@ public: virtual SetPtr buildOrderedSetInplace(const ContextPtr & context) = 0; virtual std::unique_ptr build(const ContextPtr & context) = 0; + + static SizeLimits getSizeLimitsForSet(const Settings & settings, bool ordered_set); }; -using FutureSetPtr = std::unique_ptr; +using FutureSetPtr = std::shared_ptr; class FutureSetFromTuple final : public FutureSet { public: - FutureSetFromTuple(Block block_, const SizeLimits & size_limits_, bool transform_null_in_); + FutureSetFromTuple(Block block_); bool isReady() const override { return set != nullptr; } SetPtr get() const override { return set; } - SetPtr buildOrderedSetInplace(const ContextPtr &) override + SetPtr buildOrderedSetInplace(const ContextPtr & context) override { - fill(true); + fill(context, true); return set; } - std::unique_ptr build(const ContextPtr &) override + std::unique_ptr build(const ContextPtr & context) override { - fill(false); + fill(context, false); return nullptr; } private: Block block; - SizeLimits size_limits; - bool transform_null_in; SetPtr set; - void fill(bool create_ordered_set) + void fill(const ContextPtr & context, bool create_ordered_set) { if (set) return; - set = std::make_shared(size_limits, create_ordered_set, transform_null_in); + const auto & settings = context->getSettingsRef(); + auto size_limits = getSizeLimitsForSet(settings, create_ordered_set); + + set = std::make_shared(size_limits, create_ordered_set, settings.transform_null_in); set->setHeader(block.cloneEmpty().getColumnsWithTypeAndName()); set->insertFromBlock(block.getColumnsWithTypeAndName()); set->finishInsert(); @@ -145,13 +148,16 @@ public: class FutureSetFromSubquery : public FutureSet { public: - FutureSetFromSubquery(SubqueryForSet subquery_, String subquery_id, SizeLimits set_size_limit_, bool transform_null_in_); + FutureSetFromSubquery(SubqueryForSet subquery_); bool isReady() const override { return set != nullptr; } SetPtr get() const override { return set; } SetPtr buildOrderedSetInplace(const ContextPtr & context) override { + if (!context->getSettingsRef().use_index_for_in_with_subqueries) + return nullptr; + auto plan = buildPlan(context, true); auto builder = plan->buildQueryPipeline(QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); @@ -169,16 +175,34 @@ public: return buildPlan(context, false); } + void addStorage(StoragePtr storage) { subquery.table = std::move(storage); } + private: SetPtr set; SubqueryForSet subquery; - String subquery_id; - SizeLimits size_limits; - bool transform_null_in; std::unique_ptr buildPlan(const ContextPtr & context, bool create_ordered_set); }; +class FutureSetFromStorage : public FutureSet +{ +public: + FutureSetFromStorage(SetPtr set_); // : set(std::move(set_) {} + + bool isReady() const override { return set != nullptr; } + SetPtr get() const override { return set; } + + SetPtr buildOrderedSetInplace(const ContextPtr &) override + { + return set->hasExplicitSetElements() ? set : nullptr; + } + + std::unique_ptr build(const ContextPtr &) override { return nullptr; } + +private: + SetPtr set; +}; + // class FutureSetFromFuture : public FutureSet // { // public: @@ -230,15 +254,20 @@ struct PreparedSetKey class PreparedSets { public: - using SubqueriesForSets = std::unordered_map; + using SubqueriesForSets = std::unordered_map>; - SubqueryForSet & createOrGetSubquery(const String & subquery_id, const PreparedSetKey & key, - SizeLimits set_size_limit, bool transform_null_in); - SubqueryForSet & getSubquery(const String & subquery_id); + // SubqueryForSet & createOrGetSubquery(const String & subquery_id, const PreparedSetKey & key, + // SizeLimits set_size_limit, bool transform_null_in); - void set(const PreparedSetKey & key, SetPtr set_); - FutureSet getFuture(const PreparedSetKey & key) const; - SetPtr get(const PreparedSetKey & key) const; + FutureSetPtr addFromStorage(const PreparedSetKey & key, SetPtr set_); + FutureSetPtr addFromTuple(const PreparedSetKey & key, Block block); + FutureSetPtr addFromSubquery(const PreparedSetKey & key, SubqueryForSet subquery); + + void addStorageToSubquery(const String & subquery_id, StoragePtr external_storage); + + FutureSetPtr getFuture(const PreparedSetKey & key) const; + //SubqueryForSet & getSubquery(const String & subquery_id); + // SetPtr get(const PreparedSetKey & key) const; /// Get subqueries and clear them. /// We need to build a plan for subqueries just once. That's why we can clear them after accessing them. @@ -252,10 +281,10 @@ public: bool empty() const; private: - std::unordered_map sets; + std::unordered_map sets; /// This is the information required for building sets - // SubqueriesForSets subqueries; + SubqueriesForSets subqueries; }; using PreparedSetsPtr = std::shared_ptr; diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h index ccc4ab43638..9ecfdb6117a 100644 --- a/src/Planner/PlannerContext.h +++ b/src/Planner/PlannerContext.h @@ -57,18 +57,18 @@ class PlannerSet { public: /// Construct planner set that is ready for execution - explicit PlannerSet(FutureSet set_) + explicit PlannerSet(FutureSetPtr set_) : set(std::move(set_)) {} /// Construct planner set with set and subquery node explicit PlannerSet(QueryTreeNodePtr subquery_node_) - : set(promise_to_build_set.get_future()) - , subquery_node(std::move(subquery_node_)) + //: set(promise_to_build_set.get_future()) + : subquery_node(std::move(subquery_node_)) {} /// Get a reference to a set that might be not built yet - const FutureSet & getSet() const + const FutureSetPtr & getSet() const { return set; } @@ -80,14 +80,14 @@ public: } /// This promise will be fulfilled when set is built and all FutureSet objects will become ready - std::promise extractPromiseToBuildSet() - { - return std::move(promise_to_build_set); - } + // std::promise extractPromiseToBuildSet() + // { + // return std::move(promise_to_build_set); + // } private: - std::promise promise_to_build_set; - FutureSet set; + //std::promise promise_to_build_set; + FutureSetPtr set; QueryTreeNodePtr subquery_node; }; From 125e5c50b891038740a2a75e2570a201999c09f3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 25 Apr 2023 21:30:03 +0000 Subject: [PATCH 0083/1997] allow to flush async insert queue --- programs/server/Server.cpp | 11 ++- src/Access/Common/AccessType.h | 1 + src/Core/Settings.h | 2 +- src/Interpreters/AsynchronousInsertQueue.cpp | 71 +++++++++++++++---- src/Interpreters/AsynchronousInsertQueue.h | 11 ++- src/Interpreters/InterpreterSystemQuery.cpp | 17 +++++ src/Parsers/ASTSystemQuery.h | 1 + .../02726_async_insert_flush_queue.reference | 5 ++ .../02726_async_insert_flush_queue.sql | 28 ++++++++ 9 files changed, 128 insertions(+), 19 deletions(-) create mode 100644 tests/queries/0_stateless/02726_async_insert_flush_queue.reference create mode 100644 tests/queries/0_stateless/02726_async_insert_flush_queue.sql diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 8c0d50bae55..cd08de126c9 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1461,16 +1461,21 @@ try /// Load global settings from default_profile and system_profile. global_context->setDefaultProfiles(config()); - const Settings & settings = global_context->getSettingsRef(); /// Initialize background executors after we load default_profile config. /// This is needed to load proper values of background_pool_size etc. global_context->initializeBackgroundExecutorsIfNeeded(); - if (settings.async_insert_threads) + size_t async_insert_threads = config().getUInt("async_insert_threads", 16); + bool async_insert_queue_flush_on_shutdown = config().getBool("async_insert_queue_flush_on_shutdown", false); + + if (async_insert_threads) + { global_context->setAsynchronousInsertQueue(std::make_shared( global_context, - settings.async_insert_threads)); + async_insert_threads, + async_insert_queue_flush_on_shutdown)); + } size_t mark_cache_size = server_settings.mark_cache_size; String mark_cache_policy = server_settings.mark_cache_policy; diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 57fa75dc67b..ae7e7ab5bf0 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -182,6 +182,7 @@ enum class AccessType M(SYSTEM_SYNC_FILE_CACHE, "SYNC FILE CACHE", GLOBAL, SYSTEM) \ M(SYSTEM_FLUSH_DISTRIBUTED, "FLUSH DISTRIBUTED", TABLE, SYSTEM_FLUSH) \ M(SYSTEM_FLUSH_LOGS, "FLUSH LOGS", GLOBAL, SYSTEM_FLUSH) \ + M(SYSTEM_FLUSH_ASYNC_INSERT_QUEUE, "FLUSH ASYNC INSERT QUEUE", GLOBAL, SYSTEM_FLUSH) \ M(SYSTEM_FLUSH, "", GROUP, SYSTEM) \ M(SYSTEM_THREAD_FUZZER, "SYSTEM START THREAD FUZZER, SYSTEM STOP THREAD FUZZER, START THREAD FUZZER, STOP THREAD FUZZER", GLOBAL, SYSTEM) \ M(SYSTEM_UNFREEZE, "SYSTEM UNFREEZE", GLOBAL, SYSTEM) \ diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 101f6f1f934..96dbe26f820 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -646,7 +646,6 @@ class IColumn; M(UInt64, merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem, (24 * 10 * 1024 * 1024), "If at least as many bytes are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \ M(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead of read with ignore.", 0) \ \ - M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \ M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. Makes sense only for inserts via HTTP protocol. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \ M(Bool, wait_for_async_insert, true, "If true wait for processing of asynchronous insertion", 0) \ M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \ @@ -783,6 +782,7 @@ class IColumn; MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_distributed_schedule_pool_size, 16) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_read_network_bandwidth_for_server, 0) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_write_network_bandwidth_for_server, 0) \ + MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, async_insert_threads, 16) \ /* ---- */ \ MAKE_OBSOLETE(M, DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic) \ MAKE_OBSOLETE(M, UInt64, max_pipeline_depth, 0) \ diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index b8de0246ae2..0a817995eb4 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -128,9 +128,10 @@ void AsynchronousInsertQueue::InsertData::Entry::finish(std::exception_ptr excep } } -AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_) +AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_, bool flush_on_shutdown_) : WithContext(context_) , pool_size(pool_size_) + , flush_on_shutdown(flush_on_shutdown_) , queue_shards(pool_size) , pool(CurrentMetrics::AsynchronousInsertThreads, CurrentMetrics::AsynchronousInsertThreadsActive, pool_size) { @@ -143,8 +144,6 @@ AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t poo AsynchronousInsertQueue::~AsynchronousInsertQueue() { - /// TODO: add a setting for graceful shutdown. - LOG_TRACE(log, "Shutting down the asynchronous insertion queue"); shutdown = true; @@ -156,17 +155,18 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue() assert(dump_by_first_update_threads[i].joinable()); dump_by_first_update_threads[i].join(); + if (flush_on_shutdown) + { + for (auto & [_, elem] : shard.queue) + scheduleDataProcessingJob(elem.key, std::move(elem.data), getContext()); + } + else { - std::lock_guard lock(shard.mutex); for (auto & [_, elem] : shard.queue) - { for (const auto & entry : elem.data->entries) - { entry->finish(std::make_exception_ptr(Exception( ErrorCodes::TIMEOUT_EXCEEDED, "Wait for async insert timeout exceeded)"))); - } - } } } @@ -210,7 +210,9 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) /// to avoid buffering of huge amount of data in memory. auto read_buf = getReadBufferFromASTInsertQuery(query); - LimitReadBuffer limit_buf(*read_buf, settings.async_insert_max_data_size, /* trow_exception */ false, /* exact_limit */ {}); + LimitReadBuffer limit_buf( + *read_buf, settings.async_insert_max_data_size, + /*throw_exception=*/ false, /*exact_limit=*/ {}); WriteBufferFromString write_buf(bytes); copyData(limit_buf, write_buf); @@ -262,18 +264,19 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) assert(data); data->size_in_bytes += entry_data_size; - ++data->query_number; data->entries.emplace_back(entry); insert_future = entry->getFuture(); LOG_TRACE(log, "Have {} pending inserts with total {} bytes of data for query '{}'", data->entries.size(), data->size_in_bytes, key.query_str); + bool has_enough_bytes = data->size_in_bytes >= key.settings.async_insert_max_data_size; + bool has_enough_queries = data->entries.size() >= key.settings.async_insert_max_query_number && key.settings.async_insert_deduplicate; + /// Here we check whether we hit the limit on maximum data size in the buffer. /// And use setting from query context. /// It works, because queries with the same set of settings are already grouped together. - if (data->size_in_bytes >= key.settings.async_insert_max_data_size - || (data->query_number >= key.settings.async_insert_max_query_number && key.settings.async_insert_deduplicate)) + if (!flush_stopped && (has_enough_bytes || has_enough_queries)) { data_to_process = std::move(data); shard.iterators.erase(it); @@ -297,6 +300,47 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) }; } +void AsynchronousInsertQueue::flushAll() +{ + std::lock_guard flush_lock(flush_mutex); + + LOG_DEBUG(log, "Requested to flush asynchronous insert queue"); + + flush_stopped = true; + std::vector queues_to_flush(pool_size); + + for (size_t i = 0; i < pool_size; ++i) + { + std::lock_guard lock(queue_shards[i].mutex); + queues_to_flush[i] = std::move(queue_shards[i].queue); + queue_shards[i].iterators.clear(); + } + + size_t total_queries = 0; + size_t total_bytes = 0; + size_t total_entries = 0; + + for (auto & queue : queues_to_flush) + { + total_queries += queue.size(); + for (auto & [_, entry] : queue) + { + total_bytes += entry.data->size_in_bytes; + total_entries += entry.data->entries.size(); + scheduleDataProcessingJob(entry.key, std::move(entry.data), getContext()); + } + } + + LOG_DEBUG(log, + "Will wait for finishing of {} flushing jobs (about {} inserts, {} bytes, {} distinct queries)", + pool.active(), total_entries, total_bytes, total_queries); + + pool.wait(); + + LOG_DEBUG(log, "Finished flushing of asynchronous insert queue"); + flush_stopped = false; +} + void AsynchronousInsertQueue::processBatchDeadlines(size_t shard_num) { auto & shard = queue_shards[shard_num]; @@ -322,6 +366,9 @@ void AsynchronousInsertQueue::processBatchDeadlines(size_t shard_num) if (shutdown) return; + if (flush_stopped) + continue; + const auto now = std::chrono::steady_clock::now(); while (true) diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index 23a2860364d..97294d70ead 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -16,7 +16,7 @@ class AsynchronousInsertQueue : public WithContext public: using Milliseconds = std::chrono::milliseconds; - AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_); + AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_, bool flush_on_shutdown_); ~AsynchronousInsertQueue(); struct PushResult @@ -37,6 +37,7 @@ public: std::unique_ptr insert_data_buffer; }; + void flushAll(); PushResult push(ASTPtr query, ContextPtr query_context); size_t getPoolSize() const { return pool_size; } @@ -82,9 +83,7 @@ private: using EntryPtr = std::shared_ptr; std::list entries; - size_t size_in_bytes = 0; - size_t query_number = 0; }; using InsertDataPtr = std::unique_ptr; @@ -112,6 +111,8 @@ private: }; const size_t pool_size; + const bool flush_on_shutdown; + std::vector queue_shards; /// Logic and events behind queue are as follows: @@ -123,6 +124,10 @@ private: /// (async_insert_max_data_size setting). If so, then again we dump the data. std::atomic shutdown{false}; + std::atomic flush_stopped{false}; + + /// A mutex that prevents concurrent forced flushes of queue. + mutable std::mutex flush_mutex; /// Dump the data only inside this pool. ThreadPool pool; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 36cb57c3678..f73429913b3 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -564,6 +565,17 @@ BlockIO InterpreterSystemQuery::execute() ); break; } + case Type::FLUSH_ASYNC_INSERT_QUEUE: + { + getContext()->checkAccess(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE); + auto * queue = getContext()->getAsynchronousInsertQueue(); + if (!queue) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Cannot flush asynchronous insert queue because it is not initialized"); + + queue->flushAll(); + break; + } case Type::STOP_LISTEN_QUERIES: case Type::START_LISTEN_QUERIES: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not supported yet", query.type); @@ -1156,6 +1168,11 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_FLUSH_LOGS); break; } + case Type::FLUSH_ASYNC_INSERT_QUEUE: + { + required_access.emplace_back(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE); + break; + } case Type::RESTART_DISK: { required_access.emplace_back(AccessType::SYSTEM_RESTART_DISK); diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index dfe2389edb7..9e2dca8bb23 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -72,6 +72,7 @@ public: START_REPLICATION_QUEUES, FLUSH_LOGS, FLUSH_DISTRIBUTED, + FLUSH_ASYNC_INSERT_QUEUE, STOP_DISTRIBUTED_SENDS, START_DISTRIBUTED_SENDS, START_THREAD_FUZZER, diff --git a/tests/queries/0_stateless/02726_async_insert_flush_queue.reference b/tests/queries/0_stateless/02726_async_insert_flush_queue.reference new file mode 100644 index 00000000000..b94888d227e --- /dev/null +++ b/tests/queries/0_stateless/02726_async_insert_flush_queue.reference @@ -0,0 +1,5 @@ +JSONEachRow 3 +Values 2 +0 +0 +9 diff --git a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql new file mode 100644 index 00000000000..33f40eef14e --- /dev/null +++ b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS t_async_inserts_flush; + +CREATE TABLE t_async_inserts_flush (a UInt64) ENGINE = Memory; + +SET async_insert = 1; +SET wait_for_async_insert = 0; +SET async_insert_busy_timeout_ms = 1000000; + +INSERT INTO t_async_inserts_flush VALUES (1) (2); +INSERT INTO t_async_inserts_flush FORMAT JSONEachRow {"a": 10} {"a": 20}; +INSERT INTO t_async_inserts_flush FORMAT JSONEachRow {"a": "str"} +INSERT INTO t_async_inserts_flush FORMAT JSONEachRow {"a": 100} {"a": 200} +INSERT INTO t_async_inserts_flush VALUES (3) (4) (5); + +SELECT sleep(1) FORMAT Null; + +SELECT format, length(entries.query_id) FROM system.asynchronous_inserts +WHERE database = currentDatabase() AND table = 't_async_inserts_flush' +ORDER BY format; + +SELECT count() FROM t_async_inserts_flush; + +SYSTEM FLUSH ASYNC INSERT QUEUE; + +SELECT count() FROM system.asynchronous_inserts; +SELECT count() FROM t_async_inserts_flush; + +DROP TABLE t_async_inserts_flush; From 2d2483d695f39fd8488e3667d77faaaa4177bd92 Mon Sep 17 00:00:00 2001 From: Aleksei Golub Date: Mon, 24 Apr 2023 21:50:40 +0300 Subject: [PATCH 0084/1997] Rename DatabaseFileSystem to DatabaseFilesystem --- src/Databases/DatabaseFactory.cpp | 16 +++++++------- ...eFileSystem.cpp => DatabaseFilesystem.cpp} | 22 +++++++++---------- ...abaseFileSystem.h => DatabaseFilesystem.h} | 8 +++---- src/Databases/DatabasesOverlay.cpp | 4 ++-- ...cal_implicit_file_table_function.reference | 4 ++-- ...ouse_local_implicit_file_table_function.sh | 8 +++---- 6 files changed, 31 insertions(+), 31 deletions(-) rename src/Databases/{DatabaseFileSystem.cpp => DatabaseFilesystem.cpp} (82%) rename src/Databases/{DatabaseFileSystem.h => DatabaseFilesystem.h} (83%) diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 3356689d892..9950ab5bf45 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -3,11 +3,11 @@ #include #include #include +#include #include #include #include #include -#include #include #include #include @@ -15,10 +15,10 @@ #include #include #include -#include -#include #include +#include #include +#include #include "config.h" @@ -133,13 +133,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String static const std::unordered_set database_engines{"Ordinary", "Atomic", "Memory", "Dictionary", "Lazy", "Replicated", "MySQL", "MaterializeMySQL", "MaterializedMySQL", - "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "FileSystem"}; + "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem"}; if (!database_engines.contains(engine_name)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine name `{}` does not exist", engine_name); static const std::unordered_set engines_with_arguments{"MySQL", "MaterializeMySQL", "MaterializedMySQL", - "Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "FileSystem"}; + "Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem"}; static const std::unordered_set engines_with_table_overrides{"MaterializeMySQL", "MaterializedMySQL", "MaterializedPostgreSQL"}; bool engine_may_have_arguments = engines_with_arguments.contains(engine_name); @@ -433,7 +433,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String return std::make_shared(context, engine_define, create.attach, database_path); } #endif - else if (engine_name == "FileSystem") + else if (engine_name == "Filesystem") { const ASTFunction * engine = engine_define->engine; @@ -443,13 +443,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String if (engine->arguments && !engine->arguments->children.empty()) { if (engine->arguments->children.size() != 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "FileSystem database requires at most 1 argument: file_system_path"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filesystem database requires at most 1 argument: filesystem_path"); const auto & arguments = engine->arguments->children; init_path = safeGetLiteralValue(arguments[0], engine_name); } - return std::make_shared(database_name, init_path, context); + return std::make_shared(database_name, init_path, context); } throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Unknown database engine: {}", engine_name); diff --git a/src/Databases/DatabaseFileSystem.cpp b/src/Databases/DatabaseFilesystem.cpp similarity index 82% rename from src/Databases/DatabaseFileSystem.cpp rename to src/Databases/DatabaseFilesystem.cpp index 8b92ad8080a..177b4717716 100644 --- a/src/Databases/DatabaseFileSystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -17,25 +17,25 @@ namespace DB { -DatabaseFileSystem::DatabaseFileSystem(const String & name_, const String & path_, ContextPtr context_) +DatabaseFilesystem::DatabaseFilesystem(const String & name_, const String & path_, ContextPtr context_) : IDatabase(name_), WithContext(context_->getGlobalContext()), path(path_), log(&Poco::Logger::get("DatabaseFileSystem(" + name_ + ")")) { if (path.empty()) path = Poco::Path::current(); } -std::string DatabaseFileSystem::getTablePath(const std::string& table_name) const +std::string DatabaseFilesystem::getTablePath(const std::string& table_name) const { return Poco::Path(path, table_name).toString(); } -void DatabaseFileSystem::addTable(const std::string& table_name, StoragePtr table_storage) const +void DatabaseFilesystem::addTable(const std::string& table_name, StoragePtr table_storage) const { std::lock_guard lock(mutex); loaded_tables.emplace(table_name, table_storage); } -bool DatabaseFileSystem::isTableExist(const String & name, ContextPtr) const +bool DatabaseFilesystem::isTableExist(const String & name, ContextPtr) const { { std::lock_guard lock(mutex); @@ -47,7 +47,7 @@ bool DatabaseFileSystem::isTableExist(const String & name, ContextPtr) const return table_file.exists() && table_file.isFile(); } -StoragePtr DatabaseFileSystem::tryGetTable(const String & name, ContextPtr context_) const +StoragePtr DatabaseFilesystem::tryGetTable(const String & name, ContextPtr context_) const { // Check if the table exists in the loaded tables map { @@ -85,12 +85,12 @@ StoragePtr DatabaseFileSystem::tryGetTable(const String & name, ContextPtr conte } } -ASTPtr DatabaseFileSystem::getCreateDatabaseQuery() const +ASTPtr DatabaseFilesystem::getCreateDatabaseQuery() const { auto settings = getContext()->getSettingsRef(); ParserCreateQuery parser; - String query = "CREATE DATABASE " + backQuoteIfNeed(getDatabaseName()) + " ENGINE = FileSystem(" + backQuoteIfNeed(path) + ")"; + const String query = fmt::format("CREATE DATABASE {} ENGINE = Filesystem({})", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(path)); ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth); if (const auto database_comment = getDatabaseComment(); !database_comment.empty()) @@ -102,7 +102,7 @@ ASTPtr DatabaseFileSystem::getCreateDatabaseQuery() const return ast; } -void DatabaseFileSystem::shutdown() +void DatabaseFilesystem::shutdown() { Tables tables_snapshot; { @@ -123,7 +123,7 @@ void DatabaseFileSystem::shutdown() /** * Returns an empty vector because the database is read-only and no tables can be backed up. */ -std::vector> DatabaseFileSystem::getTablesForBackup(const FilterByNameFunction&, const ContextPtr&) const +std::vector> DatabaseFilesystem::getTablesForBackup(const FilterByNameFunction&, const ContextPtr&) const { return {}; } @@ -133,7 +133,7 @@ std::vector> DatabaseFileSystem::getTablesForBacku * Returns an empty iterator because the database does not have its own tables * But only caches them for quick access. */ -DatabaseTablesIteratorPtr DatabaseFileSystem::getTablesIterator(ContextPtr, const FilterByNameFunction&) const +DatabaseTablesIteratorPtr DatabaseFilesystem::getTablesIterator(ContextPtr, const FilterByNameFunction&) const { return std::make_unique(Tables{}, getDatabaseName()); } diff --git a/src/Databases/DatabaseFileSystem.h b/src/Databases/DatabaseFilesystem.h similarity index 83% rename from src/Databases/DatabaseFileSystem.h rename to src/Databases/DatabaseFilesystem.h index 474a7e78335..d5fdd528aa5 100644 --- a/src/Databases/DatabaseFileSystem.h +++ b/src/Databases/DatabaseFilesystem.h @@ -12,18 +12,18 @@ namespace DB class Context; /** - * DatabaseFileSystem allows to interact with files stored on the file system + * DatabaseFilesystem allows to interact with files stored on the file system * Uses TableFunctionFile to implicitly load file when a user requests the table, and provides read-only access to the data in the file * Tables are cached inside the database for quick access * * Used in clickhouse-local to access local files */ -class DatabaseFileSystem : public IDatabase, protected WithContext +class DatabaseFilesystem : public IDatabase, protected WithContext { public: - DatabaseFileSystem(const String & name, const String & path, ContextPtr context); + DatabaseFilesystem(const String & name, const String & path, ContextPtr context); - String getEngineName() const override { return "FileSystem"; } + String getEngineName() const override { return "Filesystem"; } bool isTableExist(const String & name, ContextPtr context) const override; diff --git a/src/Databases/DatabasesOverlay.cpp b/src/Databases/DatabasesOverlay.cpp index da26f9282a0..3563fa715a6 100644 --- a/src/Databases/DatabasesOverlay.cpp +++ b/src/Databases/DatabasesOverlay.cpp @@ -4,8 +4,8 @@ #include #include +#include #include -#include #include @@ -258,7 +258,7 @@ DatabaseTablesIteratorPtr DatabasesOverlay::getTablesIterator(ContextPtr context DatabasePtr CreateClickHouseLocalDatabaseOverlay(const String & name_, ContextPtr context_) { auto databaseCombiner = std::make_shared(name_, context_); - databaseCombiner->registerNextDatabase(std::make_shared(name_, "", context_)); + databaseCombiner->registerNextDatabase(std::make_shared(name_, "", context_)); databaseCombiner->registerNextDatabase(std::make_shared(name_, context_)); return databaseCombiner; } diff --git a/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.reference b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.reference index 0fcd843e737..ccc02ad4f34 100644 --- a/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.reference +++ b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.reference @@ -3,7 +3,7 @@ explicit: 4 implicit: 4 -Test 2: check FileSystem database +Test 2: check Filesystem database 4 -Test 3: check show database with FileSystem +Test 3: check show database with Filesystem test02707 diff --git a/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh index 24de0ad579c..7c9095b3d8b 100755 --- a/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh +++ b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh @@ -24,19 +24,19 @@ echo "implicit:" $CLICKHOUSE_LOCAL -q "SELECT COUNT(*) FROM \"${dir}/tmp.csv\"" ################# -echo "Test 2: check FileSystem database" +echo "Test 2: check Filesystem database" $CLICKHOUSE_LOCAL --multiline --multiquery -q """ DROP DATABASE IF EXISTS test; -CREATE DATABASE test ENGINE = FileSystem('${dir}'); +CREATE DATABASE test ENGINE = Filesystem('${dir}'); SELECT COUNT(*) FROM test.\`tmp.csv\`; DROP DATABASE test; """ ################# -echo "Test 3: check show database with FileSystem" +echo "Test 3: check show database with Filesystem" $CLICKHOUSE_LOCAL --multiline --multiquery -q """ DROP DATABASE IF EXISTS test02707; -CREATE DATABASE test02707 ENGINE = FileSystem('${dir}'); +CREATE DATABASE test02707 ENGINE = Filesystem('${dir}'); SHOW DATABASES; DROP DATABASE test02707; """ | grep "test02707" From 79ca39d920fbc52e92f6bbc9496bde2cc1afec42 Mon Sep 17 00:00:00 2001 From: Aleksei Golub Date: Mon, 24 Apr 2023 22:26:16 +0300 Subject: [PATCH 0085/1997] Fixed exception messages --- src/Databases/DatabasesOverlay.cpp | 42 +++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/src/Databases/DatabasesOverlay.cpp b/src/Databases/DatabasesOverlay.cpp index 3563fa715a6..c3af6d9305e 100644 --- a/src/Databases/DatabasesOverlay.cpp +++ b/src/Databases/DatabasesOverlay.cpp @@ -65,7 +65,12 @@ void DatabasesOverlay::createTable(ContextPtr context_, const String & table_nam continue; } } - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no databases for CREATE TABLE {} query in Database{}", table_name, getEngineName()); + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "There is no databases for CREATE TABLE `{}` query in database `{}` (engine {})", + table_name, + getDatabaseName(), + getEngineName()); } void DatabasesOverlay::dropTable(ContextPtr context_, const String & table_name, bool sync) @@ -82,7 +87,12 @@ void DatabasesOverlay::dropTable(ContextPtr context_, const String & table_name, continue; } } - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no databases for DROP TABLE {} query in Database{}", table_name, getEngineName()); + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "There is no databases for DROP TABLE `{}` query in database `{}` (engine {})", + table_name, + getDatabaseName(), + getEngineName()); } void DatabasesOverlay::attachTable( @@ -100,7 +110,12 @@ void DatabasesOverlay::attachTable( continue; } } - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no databases for ATTACH TABLE query in Database{}", getEngineName()); + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "There is no databases for ATTACH TABLE `{}` query in database `{}` (engine {})", + table_name, + getDatabaseName(), + getEngineName()); } StoragePtr DatabasesOverlay::detachTable(ContextPtr context_, const String & table_name) @@ -119,7 +134,12 @@ StoragePtr DatabasesOverlay::detachTable(ContextPtr context_, const String & tab continue; } } - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no databases for DETACH TABLE {} query in Database{}", table_name, getEngineName()); + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "There is no databases for DETACH TABLE `{}` query in database `{}` (engine {})", + table_name, + getDatabaseName(), + getEngineName()); } ASTPtr DatabasesOverlay::getCreateTableQueryImpl(const String & name, ContextPtr context_, bool throw_on_error) const @@ -132,7 +152,12 @@ ASTPtr DatabasesOverlay::getCreateTableQueryImpl(const String & name, ContextPtr break; } if (!result && throw_on_error) - throw Exception(ErrorCodes::CANNOT_GET_CREATE_TABLE_QUERY, "There is no metadata of table {} in Database{}", name, getEngineName()); + throw Exception( + ErrorCodes::CANNOT_GET_CREATE_TABLE_QUERY, + "There is no metadata of table `{}` in database `{}` (engine {})", + name, + getDatabaseName(), + getEngineName()); return result; } @@ -201,7 +226,12 @@ void DatabasesOverlay::alterTable(ContextPtr local_context, const StorageID & ta continue; } } - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no databases for alterTable in Database{}", getEngineName()); + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "There is no databases for ALTER TABLE `{}` query in database `{}` (engine {})", + table_id.table_name, + getDatabaseName(), + getEngineName()); } std::vector> From c9f8dd8bfd3d4123a0a7111f19d8863b19729d9a Mon Sep 17 00:00:00 2001 From: Aleksei Golub Date: Mon, 24 Apr 2023 22:53:32 +0300 Subject: [PATCH 0086/1997] Replaced Poco::File with std::filesystem --- src/Databases/DatabaseFactory.cpp | 2 +- src/Databases/DatabaseFilesystem.cpp | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 9950ab5bf45..8a50c31efc8 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -437,7 +437,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String { const ASTFunction * engine = engine_define->engine; - // If init_path is empty, then the current path from Poco will be used + /// If init_path is empty, then the current path will be used std::string init_path; if (engine->arguments && !engine->arguments->children.empty()) diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index 177b4717716..1decb273ae1 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -8,11 +8,12 @@ #include #include #include -#include -#include #include #include +#include + +namespace fs = std::filesystem; namespace DB { @@ -21,12 +22,12 @@ DatabaseFilesystem::DatabaseFilesystem(const String & name_, const String & path : IDatabase(name_), WithContext(context_->getGlobalContext()), path(path_), log(&Poco::Logger::get("DatabaseFileSystem(" + name_ + ")")) { if (path.empty()) - path = Poco::Path::current(); + path = fs::current_path(); } std::string DatabaseFilesystem::getTablePath(const std::string& table_name) const { - return Poco::Path(path, table_name).toString(); + return fs::path(path) / table_name; } void DatabaseFilesystem::addTable(const std::string& table_name, StoragePtr table_storage) const @@ -43,8 +44,8 @@ bool DatabaseFilesystem::isTableExist(const String & name, ContextPtr) const return true; } - Poco::File table_file(getTablePath(name)); - return table_file.exists() && table_file.isFile(); + fs::path table_file_path(getTablePath(name)); + return fs::exists(table_file_path) && fs::is_regular_file(table_file_path); } StoragePtr DatabaseFilesystem::tryGetTable(const String & name, ContextPtr context_) const @@ -62,8 +63,7 @@ StoragePtr DatabaseFilesystem::tryGetTable(const String & name, ContextPtr conte try { // If the table doesn't exist in the tables map, check if the corresponding file exists - Poco::File table_file(table_path); - if (!table_file.exists()) + if (!fs::exists(table_path) || !fs::is_regular_file(table_path)) return nullptr; // If the file exists, create a new table using TableFunctionFile and return it. From 26812f36fb73ca8a3f1c16a0db54dd4327f7dc6c Mon Sep 17 00:00:00 2001 From: Aleksei Golub Date: Wed, 26 Apr 2023 01:13:29 +0300 Subject: [PATCH 0087/1997] Added read-only database setting; Fixed error messages for filesystem database; added tests --- src/Databases/DatabaseFilesystem.cpp | 46 ++++++++------- src/Databases/DatabaseFilesystem.h | 8 +++ src/Databases/DatabasesOverlay.cpp | 13 ++--- src/Databases/IDatabase.h | 4 +- src/Interpreters/DatabaseCatalog.cpp | 14 ++++- .../02722_database_filesystem.reference | 12 ++++ .../0_stateless/02722_database_filesystem.sh | 58 +++++++++++++++++++ 7 files changed, 124 insertions(+), 31 deletions(-) create mode 100644 tests/queries/0_stateless/02722_database_filesystem.reference create mode 100755 tests/queries/0_stateless/02722_database_filesystem.sh diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index 1decb273ae1..106885e7c3e 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -21,8 +21,7 @@ namespace DB DatabaseFilesystem::DatabaseFilesystem(const String & name_, const String & path_, ContextPtr context_) : IDatabase(name_), WithContext(context_->getGlobalContext()), path(path_), log(&Poco::Logger::get("DatabaseFileSystem(" + name_ + ")")) { - if (path.empty()) - path = fs::current_path(); + path = fs::path(path).lexically_normal().string(); } std::string DatabaseFilesystem::getTablePath(const std::string& table_name) const @@ -48,7 +47,7 @@ bool DatabaseFilesystem::isTableExist(const String & name, ContextPtr) const return fs::exists(table_file_path) && fs::is_regular_file(table_file_path); } -StoragePtr DatabaseFilesystem::tryGetTable(const String & name, ContextPtr context_) const +StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr context_) const { // Check if the table exists in the loaded tables map { @@ -60,24 +59,31 @@ StoragePtr DatabaseFilesystem::tryGetTable(const String & name, ContextPtr conte auto table_path = getTablePath(name); + // If the file exists, create a new table using TableFunctionFile and return it. + auto args = makeASTFunction("file", std::make_shared(table_path)); + + auto table_function = TableFunctionFactory::instance().get(args, context_); + if (!table_function) + return nullptr; + + auto table_storage = table_function->execute(args, context_, name); + if (table_storage) + addTable(name, table_storage); + + return table_storage; +} + +StoragePtr DatabaseFilesystem::getTable(const String & name, ContextPtr context_) const +{ + if (auto storage = getTableImpl(name, context_)) + return storage; + throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name)); +} + +StoragePtr DatabaseFilesystem::tryGetTable(const String & name, ContextPtr context_) const { try { - // If the table doesn't exist in the tables map, check if the corresponding file exists - if (!fs::exists(table_path) || !fs::is_regular_file(table_path)) - return nullptr; - - // If the file exists, create a new table using TableFunctionFile and return it. - auto args = makeASTFunction("file", std::make_shared(table_path)); - - auto table_function = TableFunctionFactory::instance().get(args, context_); - if (!table_function) - return nullptr; - - auto table_storage = table_function->execute(args, context_, name); - if (table_storage) - addTable(name, table_storage); - - return table_storage; + return getTable(name, context_); } catch (...) { @@ -90,7 +96,7 @@ ASTPtr DatabaseFilesystem::getCreateDatabaseQuery() const auto settings = getContext()->getSettingsRef(); ParserCreateQuery parser; - const String query = fmt::format("CREATE DATABASE {} ENGINE = Filesystem({})", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(path)); + const String query = fmt::format("CREATE DATABASE {} ENGINE = Filesystem('{}')", backQuoteIfNeed(getDatabaseName()), path); ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth); if (const auto database_comment = getDatabaseComment(); !database_comment.empty()) diff --git a/src/Databases/DatabaseFilesystem.h b/src/Databases/DatabaseFilesystem.h index d5fdd528aa5..697511ac5b3 100644 --- a/src/Databases/DatabaseFilesystem.h +++ b/src/Databases/DatabaseFilesystem.h @@ -27,10 +27,14 @@ public: bool isTableExist(const String & name, ContextPtr context) const override; + StoragePtr getTable(const String & name, ContextPtr context) const override; + StoragePtr tryGetTable(const String & name, ContextPtr context) const override; bool empty() const override { return true; } + bool isReadOnly() const override { return true; } + ASTPtr getCreateDatabaseQuery() const override; void shutdown() override; @@ -39,9 +43,13 @@ public: DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override; protected: + StoragePtr getTableImpl(const String & name, ContextPtr context) const; + std::string getTablePath(const std::string & table_name) const; + void addTable(const std::string & table_name, StoragePtr table_storage) const; + private: String path; mutable Tables loaded_tables TSA_GUARDED_BY(mutex); diff --git a/src/Databases/DatabasesOverlay.cpp b/src/Databases/DatabasesOverlay.cpp index c3af6d9305e..5a6a4fe5cc6 100644 --- a/src/Databases/DatabasesOverlay.cpp +++ b/src/Databases/DatabasesOverlay.cpp @@ -55,15 +55,11 @@ void DatabasesOverlay::createTable(ContextPtr context_, const String & table_nam { for (auto & db : databases) { - try + if (!db->isReadOnly()) { db->createTable(context_, table_name, table, query); return; } - catch (...) - { - continue; - } } throw Exception( ErrorCodes::LOGICAL_ERROR, @@ -218,8 +214,11 @@ void DatabasesOverlay::alterTable(ContextPtr local_context, const StorageID & ta { try { - db->alterTable(local_context, table_id, metadata); - return; + if (!db->isReadOnly()) + { + db->alterTable(local_context, table_id, metadata); + return; + } } catch (...) { diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index 53a2f372814..6508e2ce060 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -170,7 +170,7 @@ public: /// Get the table for work. Return nullptr if there is no table. virtual StoragePtr tryGetTable(const String & name, ContextPtr context) const = 0; - StoragePtr getTable(const String & name, ContextPtr context) const; + virtual StoragePtr getTable(const String & name, ContextPtr context) const; virtual UUID tryGetTableUUID(const String & /*table_name*/) const { return UUIDHelpers::Nil; } @@ -183,6 +183,8 @@ public: /// Is the database empty. virtual bool empty() const = 0; + virtual bool isReadOnly() const { return false; } + /// Add the table to the database. Record its presence in the metadata. virtual void createTable( ContextPtr /*context*/, diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 8d3fa91a7fe..f9e74fadcbd 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -338,9 +338,17 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( database = it->second; } - auto table = database->tryGetTable(table_id.table_name, context_); - if (!table && exception) - exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs())); + StoragePtr table = nullptr; + try + { + table = database->getTable(table_id.table_name, context_); + } + catch (const Exception & e) + { + if (exception) + exception->emplace(*e.clone()); + } + if (!table) database = nullptr; diff --git a/tests/queries/0_stateless/02722_database_filesystem.reference b/tests/queries/0_stateless/02722_database_filesystem.reference new file mode 100644 index 00000000000..a583f1e2e3c --- /dev/null +++ b/tests/queries/0_stateless/02722_database_filesystem.reference @@ -0,0 +1,12 @@ +Test 1: create filesystem database and check implicit calls +0 +test1 +4 +4 +4 +Test 2: check DatabaseFilesystem access rights on server +OK +OK +OK +OK +OK diff --git a/tests/queries/0_stateless/02722_database_filesystem.sh b/tests/queries/0_stateless/02722_database_filesystem.sh new file mode 100755 index 00000000000..0adeface438 --- /dev/null +++ b/tests/queries/0_stateless/02722_database_filesystem.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# see 01658_read_file_to_stringcolumn.sh +CLICKHOUSE_USER_FILES_PATH=$(clickhouse-client --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +# Prepare data +mkdir -p ${CLICKHOUSE_USER_FILES_PATH}/tmp/ +echo '"id","str","int","text"' > ${CLICKHOUSE_USER_FILES_PATH}/tmp.csv +echo '1,"abc",123,"abacaba"' >> ${CLICKHOUSE_USER_FILES_PATH}/tmp.csv +echo '2,"def",456,"bacabaa"' >> ${CLICKHOUSE_USER_FILES_PATH}/tmp.csv +echo '3,"story",78912,"acabaab"' >> ${CLICKHOUSE_USER_FILES_PATH}/tmp.csv +echo '4,"history",21321321,"cabaaba"' >> ${CLICKHOUSE_USER_FILES_PATH}/tmp.csv + +tmp_dir=${CLICKHOUSE_TEST_UNIQUE_NAME} +[[ -d $tmp_dir ]] && rm -rd $tmp_dir +mkdir $tmp_dir +cp ${CLICKHOUSE_USER_FILES_PATH}/tmp.csv ${tmp_dir}/tmp.csv +cp ${CLICKHOUSE_USER_FILES_PATH}/tmp.csv ${CLICKHOUSE_USER_FILES_PATH}/tmp/tmp.csv + +################# +echo "Test 1: create filesystem database and check implicit calls" +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +DROP DATABASE IF EXISTS test1; +CREATE DATABASE test1 ENGINE = Filesystem; +""" +echo $? +${CLICKHOUSE_CLIENT} --query "SHOW DATABASES" | grep "test1" +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp.csv\`;" +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp/tmp.csv\`;" +${CLICKHOUSE_LOCAL} -q "SELECT COUNT(*) FROM \"${tmp_dir}/tmp.csv\"" + +################# +echo "Test 2: check DatabaseFilesystem access rights on server" +# Allows list files only inside user_files +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../tmp.csv\`;" 2>&1| grep -F "Code: 291" > /dev/null && echo "OK" +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`/tmp/tmp.csv\`;" 2>&1| grep -F "Code: 291" > /dev/null && echo "OK" + +${CLICKHOUSE_CLIENT} --multiline --multiquery --query """ +USE test1; +SELECT COUNT(*) FROM \"../${tmp_dir}/tmp.csv\"; +""" 2>&1| grep -F "Code: 291" > /dev/null && echo "OK" +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../../../../../../tmp.csv\`;" 2>&1| grep -F "Code: 291" > /dev/null && echo "OK" +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +DROP DATABASE IF EXISTS test2; +CREATE DATABASE test2 ENGINE = Filesystem('/tmp'); +SELECT COUNT(*) FROM test2.\`tmp.csv\`; +""" 2>&1| grep -F "Code: 291" > /dev/null && echo "OK" + +# Clean +${CLICKHOUSE_CLIENT} --query "DROP DATABASE test1;" +${CLICKHOUSE_CLIENT} --query "DROP DATABASE test2;" +rm -rd $tmp_dir +rm -rd $CLICKHOUSE_USER_FILES_PATH From 4606e660683992b630f9db952beda9b261f82d76 Mon Sep 17 00:00:00 2001 From: Aleksei Golub Date: Wed, 26 Apr 2023 11:06:01 +0300 Subject: [PATCH 0088/1997] Fix style --- src/Databases/DatabaseFilesystem.cpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index 106885e7c3e..16aed185669 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -18,18 +18,23 @@ namespace fs = std::filesystem; namespace DB { +namespace ErrorCodes +{ + extern const int UNKNOWN_TABLE; +} + DatabaseFilesystem::DatabaseFilesystem(const String & name_, const String & path_, ContextPtr context_) : IDatabase(name_), WithContext(context_->getGlobalContext()), path(path_), log(&Poco::Logger::get("DatabaseFileSystem(" + name_ + ")")) { path = fs::path(path).lexically_normal().string(); } -std::string DatabaseFilesystem::getTablePath(const std::string& table_name) const +std::string DatabaseFilesystem::getTablePath(const std::string & table_name) const { return fs::path(path) / table_name; } -void DatabaseFilesystem::addTable(const std::string& table_name, StoragePtr table_storage) const +void DatabaseFilesystem::addTable(const std::string & table_name, StoragePtr table_storage) const { std::lock_guard lock(mutex); loaded_tables.emplace(table_name, table_storage); @@ -80,7 +85,8 @@ StoragePtr DatabaseFilesystem::getTable(const String & name, ContextPtr context_ throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name)); } -StoragePtr DatabaseFilesystem::tryGetTable(const String & name, ContextPtr context_) const { +StoragePtr DatabaseFilesystem::tryGetTable(const String & name, ContextPtr context_) const +{ try { return getTable(name, context_); @@ -127,9 +133,9 @@ void DatabaseFilesystem::shutdown() } /** - * Returns an empty vector because the database is read-only and no tables can be backed up. + * Returns an empty vector because the database is read-only and no tables can be backed up */ -std::vector> DatabaseFilesystem::getTablesForBackup(const FilterByNameFunction&, const ContextPtr&) const +std::vector> DatabaseFilesystem::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const { return {}; } @@ -137,9 +143,9 @@ std::vector> DatabaseFilesystem::getTablesForBacku /** * * Returns an empty iterator because the database does not have its own tables - * But only caches them for quick access. + * But only caches them for quick access */ -DatabaseTablesIteratorPtr DatabaseFilesystem::getTablesIterator(ContextPtr, const FilterByNameFunction&) const +DatabaseTablesIteratorPtr DatabaseFilesystem::getTablesIterator(ContextPtr, const FilterByNameFunction &) const { return std::make_unique(Tables{}, getDatabaseName()); } From ca1501aeb4e9c7a1db131f4c24255bd24bd99059 Mon Sep 17 00:00:00 2001 From: Aleksei Golub Date: Wed, 26 Apr 2023 13:05:56 +0300 Subject: [PATCH 0089/1997] retrigger checks From 1f90e9bde8ab740ae5fda958ca93f9c4abab6008 Mon Sep 17 00:00:00 2001 From: Aleksei Golub Date: Wed, 26 Apr 2023 14:37:41 +0300 Subject: [PATCH 0090/1997] retrigger checks From e20f92ce0f6ef4d06813932025cdea10a361631c Mon Sep 17 00:00:00 2001 From: Aleksei Golub Date: Thu, 27 Apr 2023 21:26:36 +0300 Subject: [PATCH 0091/1997] Fixed exceptions handling; Fixed style; --- programs/local/LocalServer.cpp | 10 +++++- src/Databases/DatabaseFactory.cpp | 2 +- src/Databases/DatabaseFilesystem.cpp | 48 +++++++++++++++++++++++---- src/Databases/DatabasesOverlay.cpp | 49 ++++++---------------------- src/Databases/DatabasesOverlay.h | 2 -- 5 files changed, 62 insertions(+), 49 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 566d11791ca..4939997b323 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -149,6 +150,13 @@ static DatabasePtr createMemoryDatabaseIfNotExists(ContextPtr context, const Str return system_database; } +static DatabasePtr createClickHouseLocalDatabaseOverlay(const String & name_, ContextPtr context_) +{ + auto databaseCombiner = std::make_shared(name_, context_); + databaseCombiner->registerNextDatabase(std::make_shared(name_, "", context_)); + databaseCombiner->registerNextDatabase(std::make_shared(name_, context_)); + return databaseCombiner; +} /// If path is specified and not empty, will try to setup server environment and load existing metadata void LocalServer::tryInitPath() @@ -648,7 +656,7 @@ void LocalServer::processConfig() * if such tables will not be dropped, clickhouse-server will not be able to load them due to security reasons. */ std::string default_database = config().getString("default_database", "_local"); - DatabaseCatalog::instance().attachDatabase(default_database, CreateClickHouseLocalDatabaseOverlay(default_database, global_context)); + DatabaseCatalog::instance().attachDatabase(default_database, createClickHouseLocalDatabaseOverlay(default_database, global_context)); global_context->setCurrentDatabase(default_database); applyCmdOptions(global_context); diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 8a50c31efc8..1be0d5dd7b2 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -443,7 +443,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String if (engine->arguments && !engine->arguments->children.empty()) { if (engine->arguments->children.size() != 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filesystem database requires at most 1 argument: filesystem_path"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filesystem database requires exactly 1 argument: filesystem_path"); const auto & arguments = engine->arguments->children; init_path = safeGetLiteralValue(arguments[0], engine_name); diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index 16aed185669..8275bdf6151 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -20,24 +20,42 @@ namespace DB namespace ErrorCodes { + extern const int LOGICAL_ERROR; extern const int UNKNOWN_TABLE; + extern const int DATABASE_ACCESS_DENIED; + extern const int BAD_ARGUMENTS; } DatabaseFilesystem::DatabaseFilesystem(const String & name_, const String & path_, ContextPtr context_) : IDatabase(name_), WithContext(context_->getGlobalContext()), path(path_), log(&Poco::Logger::get("DatabaseFileSystem(" + name_ + ")")) { - path = fs::path(path).lexically_normal().string(); + fs::path user_files_path; + if (context_->getApplicationType() != Context::ApplicationType::LOCAL) + user_files_path = fs::canonical(fs::path(getContext()->getUserFilesPath())); + + if (fs::path(path).is_relative()) + path = user_files_path / path; + + path = fs::absolute(path).lexically_normal().string(); } std::string DatabaseFilesystem::getTablePath(const std::string & table_name) const { - return fs::path(path) / table_name; + fs::path table_path = fs::path(path) / table_name; + return table_path.lexically_normal().string(); } void DatabaseFilesystem::addTable(const std::string & table_name, StoragePtr table_storage) const { std::lock_guard lock(mutex); - loaded_tables.emplace(table_name, table_storage); + auto [_, inserted] = loaded_tables.emplace(table_name, table_storage); + if (!inserted) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Table with name `{}` already exists in database `{}` (engine {})", + table_name, + getDatabaseName(), + getEngineName()); } bool DatabaseFilesystem::isTableExist(const String & name, ContextPtr) const @@ -62,8 +80,20 @@ StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr cont return it->second; } + // If run in Local mode, no need for path checking. + bool need_check_path = context_->getApplicationType() != Context::ApplicationType::LOCAL; + std::string user_files_path = fs::canonical(fs::path(context_->getUserFilesPath())).string(); + auto table_path = getTablePath(name); + // Check access for file before checking its existence + if (need_check_path && table_path.find(user_files_path) != 0) + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "File is not inside {}", user_files_path); + + // If the table doesn't exist in the tables map, check if the corresponding file exists + if (!fs::exists(table_path) || !fs::is_regular_file(table_path)) + return nullptr; + // If the file exists, create a new table using TableFunctionFile and return it. auto args = makeASTFunction("file", std::make_shared(table_path)); @@ -89,11 +119,17 @@ StoragePtr DatabaseFilesystem::tryGetTable(const String & name, ContextPtr conte { try { - return getTable(name, context_); + return getTableImpl(name, context_); } - catch (...) + catch (const Exception & e) { - return nullptr; + // Ignore exceptions thrown by TableFunctionFile and which indicate that there is no table + if (e.code() == ErrorCodes::BAD_ARGUMENTS) + return nullptr; + if (e.code() == ErrorCodes::DATABASE_ACCESS_DENIED) + return nullptr; + + throw; } } diff --git a/src/Databases/DatabasesOverlay.cpp b/src/Databases/DatabasesOverlay.cpp index 5a6a4fe5cc6..b44a9798072 100644 --- a/src/Databases/DatabasesOverlay.cpp +++ b/src/Databases/DatabasesOverlay.cpp @@ -1,11 +1,9 @@ #include +#include #include #include -#include - -#include -#include +#include #include @@ -73,15 +71,11 @@ void DatabasesOverlay::dropTable(ContextPtr context_, const String & table_name, { for (auto & db : databases) { - try + if (db->isTableExist(table_name, context_)) { db->dropTable(context_, table_name, sync); return; } - catch (...) - { - continue; - } } throw Exception( ErrorCodes::LOGICAL_ERROR, @@ -119,16 +113,8 @@ StoragePtr DatabasesOverlay::detachTable(ContextPtr context_, const String & tab StoragePtr result = nullptr; for (auto & db : databases) { - try - { - result = db->detachTable(context_, table_name); - if (result) - return result; - } - catch (...) - { - continue; - } + if (db->isTableExist(table_name, context_)) + return db->detachTable(context_, table_name); } throw Exception( ErrorCodes::LOGICAL_ERROR, @@ -212,17 +198,10 @@ void DatabasesOverlay::alterTable(ContextPtr local_context, const StorageID & ta { for (auto & db : databases) { - try + if (!db->isReadOnly() && db->isTableExist(table_id.table_name, local_context)) { - if (!db->isReadOnly()) - { - db->alterTable(local_context, table_id, metadata); - return; - } - } - catch (...) - { - continue; + db->alterTable(local_context, table_id, metadata); + return; } } throw Exception( @@ -239,8 +218,8 @@ DatabasesOverlay::getTablesForBackup(const FilterByNameFunction & filter, const std::vector> result; for (const auto & db : databases) { - auto dbBackup = db->getTablesForBackup(filter, local_context); - result.insert(result.end(), std::make_move_iterator(dbBackup.begin()), std::make_move_iterator(dbBackup.end())); + auto db_backup = db->getTablesForBackup(filter, local_context); + result.insert(result.end(), std::make_move_iterator(db_backup.begin()), std::make_move_iterator(db_backup.end())); } return result; } @@ -284,12 +263,4 @@ DatabaseTablesIteratorPtr DatabasesOverlay::getTablesIterator(ContextPtr context return std::make_unique(std::move(tables), getDatabaseName()); } -DatabasePtr CreateClickHouseLocalDatabaseOverlay(const String & name_, ContextPtr context_) -{ - auto databaseCombiner = std::make_shared(name_, context_); - databaseCombiner->registerNextDatabase(std::make_shared(name_, "", context_)); - databaseCombiner->registerNextDatabase(std::make_shared(name_, context_)); - return databaseCombiner; -} - } diff --git a/src/Databases/DatabasesOverlay.h b/src/Databases/DatabasesOverlay.h index 77f0085161b..0f31bbd6a47 100644 --- a/src/Databases/DatabasesOverlay.h +++ b/src/Databases/DatabasesOverlay.h @@ -63,6 +63,4 @@ protected: Poco::Logger * log; }; -DatabasePtr CreateClickHouseLocalDatabaseOverlay(const String & name_, ContextPtr context_); - } From 491c26fb0aa08dd75adf46699225658fd9a45d5d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Apr 2023 18:55:19 +0200 Subject: [PATCH 0092/1997] Slight improvement in Disks interface --- src/Disks/DiskEncrypted.cpp | 2 +- src/Disks/DiskEncrypted.h | 6 +-- src/Disks/DiskLocal.cpp | 44 +++++++++++++------ src/Disks/DiskLocal.h | 8 ++-- src/Disks/IDisk.h | 8 ++-- src/Disks/IVolume.cpp | 4 +- src/Disks/IVolume.h | 2 +- .../ObjectStorages/DiskObjectStorage.cpp | 17 ++++--- src/Disks/ObjectStorages/DiskObjectStorage.h | 12 +++-- src/Disks/StoragePolicy.cpp | 27 ++++++++++-- src/Disks/VolumeJBOD.cpp | 22 +++++++--- src/Disks/VolumeJBOD.h | 4 +- src/Functions/filesystem.cpp | 6 +-- .../ServerAsynchronousMetrics.cpp | 23 ++++++---- .../MergeTree/MergeTreePartsMover.cpp | 12 +++-- src/Storages/System/StorageSystemDisks.cpp | 6 +-- 16 files changed, 130 insertions(+), 73 deletions(-) diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index db18e9652e7..1f8d75dbeb8 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -184,7 +184,7 @@ public: } UInt64 getSize() const override { return reservation->getSize(); } - UInt64 getUnreservedSpace() const override { return reservation->getUnreservedSpace(); } + std::optional getUnreservedSpace() const override { return reservation->getUnreservedSpace(); } DiskPtr getDisk(size_t i) const override { diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index 8e824a1f7e5..5d04558792e 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -256,17 +256,17 @@ public: return std::make_shared(*this); } - UInt64 getTotalSpace() const override + std::optional getTotalSpace() const override { return delegate->getTotalSpace(); } - UInt64 getAvailableSpace() const override + std::optional getAvailableSpace() const override { return delegate->getAvailableSpace(); } - UInt64 getUnreservedSpace() const override + std::optional getUnreservedSpace() const override { return delegate->getUnreservedSpace(); } diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 49f28a19b31..af9d4ffd19c 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -97,7 +97,8 @@ static void loadDiskLocalConfig(const String & name, tmp_path = context->getPath(); // Create tmp disk for getting total disk space. - keep_free_space_bytes = static_cast(DiskLocal("tmp", tmp_path, 0).getTotalSpace() * ratio); + auto total_space_of_local_disk = DiskLocal("tmp", tmp_path, 0).getTotalSpace(); + keep_free_space_bytes = total_space_of_local_disk ? static_cast(*total_space_of_local_disk * ratio) : 0; } } @@ -128,7 +129,7 @@ public: {} UInt64 getSize() const override { return size; } - UInt64 getUnreservedSpace() const override { return unreserved_space; } + std::optional getUnreservedSpace() const override { return unreserved_space; } DiskPtr getDisk(size_t i) const override { @@ -225,8 +226,11 @@ std::optional DiskLocal::tryReserve(UInt64 bytes) { std::lock_guard lock(DiskLocal::reservation_mutex); - UInt64 available_space = getAvailableSpace(); - UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); + auto available_space = getAvailableSpace(); + + UInt64 unreserved_space = available_space + ? *available_space - std::min(*available_space, reserved_bytes) + : std::numeric_limits::max(); if (bytes == 0) { @@ -237,12 +241,24 @@ std::optional DiskLocal::tryReserve(UInt64 bytes) if (unreserved_space >= bytes) { - LOG_TRACE( - logger, - "Reserved {} on local disk {}, having unreserved {}.", - ReadableSize(bytes), - backQuote(name), - ReadableSize(unreserved_space)); + if (available_space) + { + LOG_TRACE( + logger, + "Reserved {} on local disk {}, having unreserved {}.", + ReadableSize(bytes), + backQuote(name), + ReadableSize(unreserved_space)); + } + else + { + LOG_TRACE( + logger, + "Reserved {} on local disk {}.", + ReadableSize(bytes), + backQuote(name)); + } + ++reservation_count; reserved_bytes += bytes; return {unreserved_space - bytes}; @@ -268,14 +284,14 @@ static UInt64 getTotalSpaceByName(const String & name, const String & disk_path, return total_size - keep_free_space_bytes; } -UInt64 DiskLocal::getTotalSpace() const +std::optional DiskLocal::getTotalSpace() const { if (broken || readonly) return 0; return getTotalSpaceByName(name, disk_path, keep_free_space_bytes); } -UInt64 DiskLocal::getAvailableSpace() const +std::optional DiskLocal::getAvailableSpace() const { if (broken || readonly) return 0; @@ -292,10 +308,10 @@ UInt64 DiskLocal::getAvailableSpace() const return total_size - keep_free_space_bytes; } -UInt64 DiskLocal::getUnreservedSpace() const +std::optional DiskLocal::getUnreservedSpace() const { std::lock_guard lock(DiskLocal::reservation_mutex); - auto available_space = getAvailableSpace(); + auto available_space = *getAvailableSpace(); available_space -= std::min(available_space, reserved_bytes); return available_space; } diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 7ea2c04704c..6da62332726 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -31,11 +31,9 @@ public: ReservationPtr reserve(UInt64 bytes) override; - UInt64 getTotalSpace() const override; - - UInt64 getAvailableSpace() const override; - - UInt64 getUnreservedSpace() const override; + std::optional getTotalSpace() const override; + std::optional getAvailableSpace() const override; + std::optional getUnreservedSpace() const override; UInt64 getKeepingFreeSpace() const override { return keep_free_space_bytes; } diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 68798047cfd..7202d1f5cfc 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -127,13 +127,13 @@ public: const String & getName() const override { return name; } /// Total available space on the disk. - virtual UInt64 getTotalSpace() const = 0; + virtual std::optional getTotalSpace() const = 0; /// Space currently available on the disk. - virtual UInt64 getAvailableSpace() const = 0; + virtual std::optional getAvailableSpace() const = 0; /// Space available for reservation (available space minus reserved space). - virtual UInt64 getUnreservedSpace() const = 0; + virtual std::optional getUnreservedSpace() const = 0; /// Amount of bytes which should be kept free on the disk. virtual UInt64 getKeepingFreeSpace() const { return 0; } @@ -463,7 +463,7 @@ public: /// Space available for reservation /// (with this reservation already take into account). - virtual UInt64 getUnreservedSpace() const = 0; + virtual std::optional getUnreservedSpace() const = 0; /// Get i-th disk where reservation take place. virtual DiskPtr getDisk(size_t i = 0) const = 0; /// NOLINT diff --git a/src/Disks/IVolume.cpp b/src/Disks/IVolume.cpp index eb474f12ad2..15b52acb422 100644 --- a/src/Disks/IVolume.cpp +++ b/src/Disks/IVolume.cpp @@ -49,9 +49,9 @@ IVolume::IVolume( throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Volume must contain at least one disk"); } -UInt64 IVolume::getMaxUnreservedFreeSpace() const +std::optional IVolume::getMaxUnreservedFreeSpace() const { - UInt64 res = 0; + std::optional res = 0; for (const auto & disk : disks) res = std::max(res, disk->getUnreservedSpace()); return res; diff --git a/src/Disks/IVolume.h b/src/Disks/IVolume.h index ada28caa960..f40d4dcba60 100644 --- a/src/Disks/IVolume.h +++ b/src/Disks/IVolume.h @@ -74,7 +74,7 @@ public: virtual VolumeType getType() const = 0; /// Return biggest unreserved space across all disks - UInt64 getMaxUnreservedFreeSpace() const; + std::optional getMaxUnreservedFreeSpace() const; DiskPtr getDisk() const { return getDisk(0); } virtual DiskPtr getDisk(size_t i) const { return disks[i]; } diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index bf5d0ab829d..2f4e0db070f 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -469,18 +469,25 @@ void DiskObjectStorage::removeSharedRecursive( transaction->commit(); } -std::optional DiskObjectStorage::tryReserve(UInt64 bytes) +bool DiskObjectStorage::tryReserve(UInt64 bytes) { std::lock_guard lock(reservation_mutex); auto available_space = getAvailableSpace(); - UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); + if (!available_space) + { + ++reservation_count; + reserved_bytes += bytes; + return true; + } + + UInt64 unreserved_space = *available_space - std::min(*available_space, reserved_bytes); if (bytes == 0) { LOG_TRACE(log, "Reserved 0 bytes on remote disk {}", backQuote(name)); ++reservation_count; - return {unreserved_space}; + return true; } if (unreserved_space >= bytes) @@ -493,14 +500,14 @@ std::optional DiskObjectStorage::tryReserve(UInt64 bytes) ReadableSize(unreserved_space)); ++reservation_count; reserved_bytes += bytes; - return {unreserved_space - bytes}; + return true; } else { LOG_TRACE(log, "Could not reserve {} on remote disk {}. Not enough unreserved space", ReadableSize(bytes), backQuote(name)); } - return {}; + return false; } bool DiskObjectStorage::supportsCache() const diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 4372bc75950..2c544e01ca9 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -53,11 +53,9 @@ public: const std::string & getCacheName() const override { return object_storage->getCacheName(); } - UInt64 getTotalSpace() const override { return std::numeric_limits::max(); } - - UInt64 getAvailableSpace() const override { return std::numeric_limits::max(); } - - UInt64 getUnreservedSpace() const override { return std::numeric_limits::max(); } + std::optional getTotalSpace() const override { return {}; } + std::optional getAvailableSpace() const override { return {}; } + std::optional getUnreservedSpace() const override { return {}; } UInt64 getKeepingFreeSpace() const override { return 0; } @@ -223,7 +221,7 @@ private: UInt64 reservation_count = 0; std::mutex reservation_mutex; - std::optional tryReserve(UInt64 bytes); + bool tryReserve(UInt64 bytes); const bool send_metadata; size_t threadpool_size; @@ -244,7 +242,7 @@ public: UInt64 getSize() const override { return size; } - UInt64 getUnreservedSpace() const override { return unreserved_space; } + std::optional getUnreservedSpace() const override { return unreserved_space; } DiskPtr getDisk(size_t i) const override; diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index f4be8b8fe86..92cca23ca76 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -211,7 +211,11 @@ UInt64 StoragePolicy::getMaxUnreservedFreeSpace() const { UInt64 res = 0; for (const auto & volume : volumes) - res = std::max(res, volume->getMaxUnreservedFreeSpace()); + { + auto max_unreserved_for_volume = volume->getMaxUnreservedFreeSpace(); + if (max_unreserved_for_volume) + res = std::max(res, *max_unreserved_for_volume); + } return res; } @@ -248,22 +252,37 @@ ReservationPtr StoragePolicy::reserveAndCheck(UInt64 bytes) const ReservationPtr StoragePolicy::makeEmptyReservationOnLargestDisk() const { UInt64 max_space = 0; + bool found_bottomless_disk = false; DiskPtr max_disk; + for (const auto & volume : volumes) { for (const auto & disk : volume->getDisks()) { - auto avail_space = disk->getAvailableSpace(); - if (avail_space > max_space) + auto available_space = disk->getAvailableSpace(); + + if (!available_space) { - max_space = avail_space; + max_disk = disk; + found_bottomless_disk = true; + break; + } + + if (*available_space > max_space) + { + max_space = *available_space; max_disk = disk; } } + + if (found_bottomless_disk) + break; } + if (!max_disk) throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "There is no space on any disk in storage policy: {}. " "It's likely all disks are broken", name); + auto reservation = max_disk->reserve(0); if (!reservation) { diff --git a/src/Disks/VolumeJBOD.cpp b/src/Disks/VolumeJBOD.cpp index 64bd2619665..885b1d56b0d 100644 --- a/src/Disks/VolumeJBOD.cpp +++ b/src/Disks/VolumeJBOD.cpp @@ -40,20 +40,28 @@ VolumeJBOD::VolumeJBOD( auto ratio = config.getDouble(config_prefix + ".max_data_part_size_ratio"); if (ratio < 0) throw Exception(ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG, "'max_data_part_size_ratio' have to be not less then 0."); + UInt64 sum_size = 0; std::vector sizes; for (const auto & disk : disks) { - sizes.push_back(disk->getTotalSpace()); - sum_size += sizes.back(); + auto size = disk->getTotalSpace(); + sizes.push_back(*size); + if (size) + sum_size += *size; + else + break; } - max_data_part_size = static_cast(sum_size * ratio / disks.size()); - for (size_t i = 0; i < disks.size(); ++i) + if (sizes.size() == disks.size()) { - if (sizes[i] < max_data_part_size) + max_data_part_size = static_cast(sum_size * ratio / disks.size()); + for (size_t i = 0; i < disks.size(); ++i) { - LOG_WARNING(logger, "Disk {} on volume {} have not enough space ({}) for containing part the size of max_data_part_size ({})", - backQuote(disks[i]->getName()), backQuote(config_prefix), ReadableSize(sizes[i]), ReadableSize(max_data_part_size)); + if (sizes[i] < max_data_part_size) + { + LOG_WARNING(logger, "Disk {} on volume {} have not enough space ({}) for containing part the size of max_data_part_size ({})", + backQuote(disks[i]->getName()), backQuote(config_prefix), ReadableSize(sizes[i]), ReadableSize(max_data_part_size)); + } } } } diff --git a/src/Disks/VolumeJBOD.h b/src/Disks/VolumeJBOD.h index ef6f215bf18..8d270a6c71c 100644 --- a/src/Disks/VolumeJBOD.h +++ b/src/Disks/VolumeJBOD.h @@ -68,7 +68,7 @@ private: struct DiskWithSize { DiskPtr disk; - uint64_t free_size = 0; + std::optional free_size = 0; DiskWithSize(DiskPtr disk_) : disk(disk_) @@ -80,7 +80,7 @@ private: return free_size < rhs.free_size; } - ReservationPtr reserve(uint64_t bytes) + ReservationPtr reserve(UInt64 bytes) { ReservationPtr reservation = disk->reserve(bytes); if (!reservation) diff --git a/src/Functions/filesystem.cpp b/src/Functions/filesystem.cpp index 1eb1c27211c..9fbf9b0cbe7 100644 --- a/src/Functions/filesystem.cpp +++ b/src/Functions/filesystem.cpp @@ -22,19 +22,19 @@ namespace struct FilesystemAvailable { static constexpr auto name = "filesystemAvailable"; - static std::uintmax_t get(const DiskPtr & disk) { return disk->getAvailableSpace(); } + static UInt64 get(const DiskPtr & disk) { return disk->getAvailableSpace().value_or(std::numeric_limits::max()); } }; struct FilesystemUnreserved { static constexpr auto name = "filesystemUnreserved"; - static std::uintmax_t get(const DiskPtr & disk) { return disk->getUnreservedSpace(); } + static UInt64 get(const DiskPtr & disk) { return disk->getUnreservedSpace().value_or(std::numeric_limits::max()); } }; struct FilesystemCapacity { static constexpr auto name = "filesystemCapacity"; - static std::uintmax_t get(const DiskPtr & disk) { return disk->getTotalSpace(); } + static UInt64 get(const DiskPtr & disk) { return disk->getTotalSpace().value_or(std::numeric_limits::max()); } }; template diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index e6e1a03f11c..0fbcfc9e6a1 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -191,14 +191,21 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values auto available = disk->getAvailableSpace(); auto unreserved = disk->getUnreservedSpace(); - new_values[fmt::format("DiskTotal_{}", name)] = { total, - "The total size in bytes of the disk (virtual filesystem). Remote filesystems can show a large value like 16 EiB." }; - new_values[fmt::format("DiskUsed_{}", name)] = { total - available, - "Used bytes on the disk (virtual filesystem). Remote filesystems not always provide this information." }; - new_values[fmt::format("DiskAvailable_{}", name)] = { available, - "Available bytes on the disk (virtual filesystem). Remote filesystems can show a large value like 16 EiB." }; - new_values[fmt::format("DiskUnreserved_{}", name)] = { unreserved, - "Available bytes on the disk (virtual filesystem) without the reservations for merges, fetches, and moves. Remote filesystems can show a large value like 16 EiB." }; + new_values[fmt::format("DiskTotal_{}", name)] = { *total, + "The total size in bytes of the disk (virtual filesystem). Remote filesystems may not provide this information." }; + + if (available) + { + new_values[fmt::format("DiskUsed_{}", name)] = { *total - *available, + "Used bytes on the disk (virtual filesystem). Remote filesystems not always provide this information." }; + + new_values[fmt::format("DiskAvailable_{}", name)] = { *available, + "Available bytes on the disk (virtual filesystem). Remote filesystems may not provide this information." }; + } + + if (unreserved) + new_values[fmt::format("DiskUnreserved_{}", name)] = { *unreserved, + "Available bytes on the disk (virtual filesystem) without the reservations for merges, fetches, and moves. Remote filesystems may not provide this information." }; } } diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index e1da57744b3..391b04573d7 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -111,11 +111,15 @@ bool MergeTreePartsMover::selectPartsForMove( { for (const auto & disk : volumes[i]->getDisks()) { - UInt64 required_maximum_available_space = static_cast(disk->getTotalSpace() * policy->getMoveFactor()); - UInt64 unreserved_space = disk->getUnreservedSpace(); + auto total_space = disk->getTotalSpace(); + auto unreserved_space = disk->getUnreservedSpace(); + if (total_space && unreserved_space) + { + UInt64 required_maximum_available_space = static_cast(*total_space * policy->getMoveFactor()); - if (unreserved_space < required_maximum_available_space && !disk->isBroken()) - need_to_move.emplace(disk, required_maximum_available_space - unreserved_space); + if (*unreserved_space < required_maximum_available_space && !disk->isBroken()) + need_to_move.emplace(disk, required_maximum_available_space - *unreserved_space); + } } } } diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index 002da7abd14..23a00cc7ae5 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -64,9 +64,9 @@ Pipe StorageSystemDisks::read( { col_name->insert(disk_name); col_path->insert(disk_ptr->getPath()); - col_free->insert(disk_ptr->getAvailableSpace()); - col_total->insert(disk_ptr->getTotalSpace()); - col_unreserved->insert(disk_ptr->getUnreservedSpace()); + col_free->insert(disk_ptr->getAvailableSpace().value_or(std::numeric_limits::max())); + col_total->insert(disk_ptr->getTotalSpace().value_or(std::numeric_limits::max())); + col_unreserved->insert(disk_ptr->getUnreservedSpace().value_or(std::numeric_limits::max())); col_keep->insert(disk_ptr->getKeepingFreeSpace()); auto data_source_description = disk_ptr->getDataSourceDescription(); col_type->insert(toString(data_source_description.type)); From c9e30d3cf5f5f0ac9f35e2e08df429bacbe4cd25 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Apr 2023 19:04:20 +0200 Subject: [PATCH 0093/1997] Properly check the limit for `sleepEachRow` function. Add a setting `function_sleep_max_microseconds_per_block` --- src/Core/Settings.h | 3 ++- src/Functions/sleep.h | 21 +++++++++++++++---- .../02725_sleep_max_time.reference | 0 .../0_stateless/02725_sleep_max_time.sql | 1 + 4 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/02725_sleep_max_time.reference create mode 100644 tests/queries/0_stateless/02725_sleep_max_time.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 7f1fe838b80..5aa054d43b0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -633,7 +633,8 @@ class IColumn; M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \ M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \ \ - M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ + M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function `range` per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ + M(UInt64, function_sleep_max_microseconds_per_block, 3000, "Maximum number of microseconds the function `sleep` is allowed to sleep for each block. If a user called it with a larger value, it throws an exception. It is a safety threshold.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ \ M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap.", 0) \ diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h index d1960860308..93525c3f310 100644 --- a/src/Functions/sleep.h +++ b/src/Functions/sleep.h @@ -9,7 +9,8 @@ #include #include #include -#include +#include + namespace ProfileEvents { @@ -40,11 +41,17 @@ enum class FunctionSleepVariant template class FunctionSleep : public IFunction { +private: + UInt64 max_microseconds; public: static constexpr auto name = variant == FunctionSleepVariant::PerBlock ? "sleep" : "sleepEachRow"; - static FunctionPtr create(ContextPtr) + static FunctionPtr create(ContextPtr context) + { + return std::make_shared>(context->getSettingsRef().function_sleep_max_microseconds_per_block); + } + + FunctionSleep(UInt64 max_microseconds_) : max_microseconds(max_microseconds_) { - return std::make_shared>(); } /// Get the name of the function. @@ -105,13 +112,19 @@ public: if (size > 0) { /// When sleeping, the query cannot be cancelled. For ability to cancel query, we limit sleep time. - if (seconds > 3.0) /// The choice is arbitrary + if (seconds * 1e6 > max_microseconds) throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is 3 seconds. Requested: {}", toString(seconds)); if (!dry_run) { UInt64 count = (variant == FunctionSleepVariant::PerBlock ? 1 : size); UInt64 microseconds = static_cast(seconds * count * 1e6); + + if (microseconds > max_microseconds) + throw Exception(ErrorCodes::TOO_SLOW, + "The maximum sleep time is 3 seconds. Requested: {} microseconds per block (of size {})", + microseconds, size); + sleepForMicroseconds(microseconds); ProfileEvents::increment(ProfileEvents::SleepFunctionCalls, count); ProfileEvents::increment(ProfileEvents::SleepFunctionMicroseconds, microseconds); diff --git a/tests/queries/0_stateless/02725_sleep_max_time.reference b/tests/queries/0_stateless/02725_sleep_max_time.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02725_sleep_max_time.sql b/tests/queries/0_stateless/02725_sleep_max_time.sql new file mode 100644 index 00000000000..b8378aee17e --- /dev/null +++ b/tests/queries/0_stateless/02725_sleep_max_time.sql @@ -0,0 +1 @@ +SELECT * FROM system.numbers WHERE sleepEachRow(0.05) LIMIT 10; -- { serverError TOO_SLOW } From 3de0c319c2d6b6206196ece48b228f72f3a9aecd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Apr 2023 19:08:52 +0200 Subject: [PATCH 0094/1997] Add compatibility --- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.h | 1 + src/Functions/sleep.h | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 5aa054d43b0..2ab4fe9b32a 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -634,7 +634,7 @@ class IColumn; M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \ \ M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function `range` per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ - M(UInt64, function_sleep_max_microseconds_per_block, 3000, "Maximum number of microseconds the function `sleep` is allowed to sleep for each block. If a user called it with a larger value, it throws an exception. It is a safety threshold.", 0) \ + M(UInt64, function_sleep_max_microseconds_per_block, 3000000, "Maximum number of microseconds the function `sleep` is allowed to sleep for each block. If a user called it with a larger value, it throws an exception. It is a safety threshold.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ \ M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 266d14f645b..33010dc6b3b 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -80,6 +80,7 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"23.5", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximim sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}}}, {"23.4", {{"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h index 93525c3f310..db4f0e7dd3e 100644 --- a/src/Functions/sleep.h +++ b/src/Functions/sleep.h @@ -112,7 +112,7 @@ public: if (size > 0) { /// When sleeping, the query cannot be cancelled. For ability to cancel query, we limit sleep time. - if (seconds * 1e6 > max_microseconds) + if (max_microseconds && seconds * 1e6 > max_microseconds) throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is 3 seconds. Requested: {}", toString(seconds)); if (!dry_run) @@ -120,7 +120,7 @@ public: UInt64 count = (variant == FunctionSleepVariant::PerBlock ? 1 : size); UInt64 microseconds = static_cast(seconds * count * 1e6); - if (microseconds > max_microseconds) + if (max_microseconds && microseconds > max_microseconds) throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is 3 seconds. Requested: {} microseconds per block (of size {})", microseconds, size); From 582cf2ca8427c572a83c0bc249275c22fae6de5c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Apr 2023 19:48:33 +0200 Subject: [PATCH 0095/1997] Update tests --- src/Functions/sleep.h | 6 +++--- tests/queries/0_stateless/00956_sensitive_data_masking.sh | 1 + tests/queries/0_stateless/01107_atomic_db_detach_attach.sh | 4 ++-- tests/queries/0_stateless/01114_database_atomic.sh | 6 +++--- .../queries/0_stateless/01192_rename_database_zookeeper.sh | 4 ++-- tests/queries/0_stateless/01238_http_memory_tracking.sh | 2 +- tests/queries/0_stateless/01246_buffer_flush.sql | 2 ++ tests/queries/0_stateless/01338_long_select_and_alter.sh | 2 +- .../0_stateless/01338_long_select_and_alter_zookeeper.sh | 2 +- .../01532_execute_merges_on_single_replica_long.sql | 1 + .../01715_background_checker_blather_zookeeper_long.sql | 1 + .../01737_clickhouse_server_wait_server_pool_long.sh | 2 +- 12 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h index db4f0e7dd3e..fba8293e5ff 100644 --- a/src/Functions/sleep.h +++ b/src/Functions/sleep.h @@ -113,7 +113,7 @@ public: { /// When sleeping, the query cannot be cancelled. For ability to cancel query, we limit sleep time. if (max_microseconds && seconds * 1e6 > max_microseconds) - throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is 3 seconds. Requested: {}", toString(seconds)); + throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is {} microseconds. Requested: {}", max_microseconds, seconds); if (!dry_run) { @@ -122,8 +122,8 @@ public: if (max_microseconds && microseconds > max_microseconds) throw Exception(ErrorCodes::TOO_SLOW, - "The maximum sleep time is 3 seconds. Requested: {} microseconds per block (of size {})", - microseconds, size); + "The maximum sleep time is {} microseconds. Requested: {} microseconds per block (of size {})", + max_microseconds, microseconds, size); sleepForMicroseconds(microseconds); ProfileEvents::increment(ProfileEvents::SleepFunctionCalls, count); diff --git a/tests/queries/0_stateless/00956_sensitive_data_masking.sh b/tests/queries/0_stateless/00956_sensitive_data_masking.sh index ccd9bbcf10e..a31a71ce381 100755 --- a/tests/queries/0_stateless/00956_sensitive_data_masking.sh +++ b/tests/queries/0_stateless/00956_sensitive_data_masking.sh @@ -65,6 +65,7 @@ echo 5 # run in background rm -f "$tmp_file2" >/dev/null 2>&1 bash -c "$CLICKHOUSE_CLIENT \ + --function_sleep_max_microseconds_per_block 60 \ --query=\"select sleepEachRow(1) from numbers(10) where ignore('find_me_TOPSECRET=TOPSECRET')=0 and ignore('fwerkh_that_magic_string_make_me_unique') = 0 FORMAT Null\" \ --log_queries=1 --ignore-error --multiquery |& grep -v '^(query: ' > $tmp_file2" & diff --git a/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh b/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh index e4dad56bc29..e2a23258584 100755 --- a/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh +++ b/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS test_01107" $CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01107 ENGINE=Atomic" $CLICKHOUSE_CLIENT -q "CREATE TABLE test_01107.mt (n UInt64) ENGINE=MergeTree() ORDER BY tuple()" -$CLICKHOUSE_CLIENT -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(3) FROM numbers(5)" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(3) FROM numbers(5)" & sleep 1 $CLICKHOUSE_CLIENT -q "DETACH TABLE test_01107.mt" --database_atomic_wait_for_drop_and_detach_synchronously=0 @@ -23,7 +23,7 @@ $CLICKHOUSE_CLIENT -q "DETACH DATABASE test_01107" --database_atomic_wait_for_dr $CLICKHOUSE_CLIENT -q "ATTACH DATABASE test_01107" $CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01107.mt" -$CLICKHOUSE_CLIENT -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(1) FROM numbers(5)" && echo "end" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(1) FROM numbers(5)" && echo "end" & sleep 1 $CLICKHOUSE_CLIENT -q "DROP DATABASE test_01107" --database_atomic_wait_for_drop_and_detach_synchronously=0 && sleep 1 && echo "dropped" wait diff --git a/tests/queries/0_stateless/01114_database_atomic.sh b/tests/queries/0_stateless/01114_database_atomic.sh index 4a3d35e48b7..634b19a7624 100755 --- a/tests/queries/0_stateless/01114_database_atomic.sh +++ b/tests/queries/0_stateless/01114_database_atomic.sh @@ -49,8 +49,8 @@ $CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW $CLICKHOUSE_CLIENT -q "SELECT name, uuid, create_table_query FROM system.tables WHERE database='test_01114_2'" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g" -$CLICKHOUSE_CLIENT -q "SELECT count(col), sum(col) FROM (SELECT n + sleepEachRow(1.5) AS col FROM test_01114_1.mt)" & # 33s (1.5s * 22 rows per partition), result: 110, 5995 -$CLICKHOUSE_CLIENT -q "INSERT INTO test_01114_2.mt SELECT number + sleepEachRow(1.5) FROM numbers(30)" & # 45s (1.5s * 30 rows) +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "SELECT count(col), sum(col) FROM (SELECT n + sleepEachRow(1.5) AS col FROM test_01114_1.mt)" & # 33s (1.5s * 22 rows per partition), result: 110, 5995 +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "INSERT INTO test_01114_2.mt SELECT number + sleepEachRow(1.5) FROM numbers(30)" & # 45s (1.5s * 30 rows) sleep 1 # SELECT and INSERT should start before the following RENAMEs $CLICKHOUSE_CLIENT -nm -q " @@ -74,7 +74,7 @@ INSERT INTO test_01114_1.mt SELECT 's' || toString(number) FROM numbers(5); SELECT count() FROM test_01114_1.mt " # result: 5 -$CLICKHOUSE_CLIENT -q "SELECT tuple(s, sleepEachRow(3)) FROM test_01114_1.mt" > /dev/null & # 15s (3s * 5 rows) +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "SELECT tuple(s, sleepEachRow(3)) FROM test_01114_1.mt" > /dev/null & # 15s (3s * 5 rows) sleep 1 $CLICKHOUSE_CLIENT -q "DROP DATABASE test_01114_1" --database_atomic_wait_for_drop_and_detach_synchronously=0 && echo "dropped" diff --git a/tests/queries/0_stateless/01192_rename_database_zookeeper.sh b/tests/queries/0_stateless/01192_rename_database_zookeeper.sh index dec1276111a..ac516e83c84 100755 --- a/tests/queries/0_stateless/01192_rename_database_zookeeper.sh +++ b/tests/queries/0_stateless/01192_rename_database_zookeeper.sh @@ -20,7 +20,7 @@ $CLICKHOUSE_CLIENT -q "SELECT engine, splitByChar('/', data_path)[-2], uuid, spl # 3. check RENAME don't wait for INSERT $CLICKHOUSE_CLIENT -q "CREATE TABLE test_01192.mt (n UInt64) ENGINE=MergeTree ORDER BY n" -$CLICKHOUSE_CLIENT -q "INSERT INTO test_01192.mt SELECT number + sleepEachRow(1.5) FROM numbers(10)" && echo "inserted" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 15 -q "INSERT INTO test_01192.mt SELECT number + sleepEachRow(1.5) FROM numbers(10)" && echo "inserted" & sleep 1 $CLICKHOUSE_CLIENT -q "RENAME DATABASE test_01192 TO default" 2>&1| grep -F "already exists" > /dev/null && echo "ok" @@ -60,7 +60,7 @@ $CLICKHOUSE_CLIENT -q "SELECT database, name, status, origin FROM system.diction $CLICKHOUSE_CLIENT -q "SELECT dictGet('test_01192_atomic.dict', '_part', toUInt64(1))" # 8. check RENAME don't wait for INSERT -$CLICKHOUSE_CLIENT -q "INSERT INTO test_01192_atomic.mt SELECT number + sleepEachRow(1) + 10 FROM numbers(10)" && echo "inserted" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10 -q "INSERT INTO test_01192_atomic.mt SELECT number + sleepEachRow(1) + 10 FROM numbers(10)" && echo "inserted" & sleep 1 $CLICKHOUSE_CLIENT --check_table_dependencies=0 -q "RENAME DATABASE test_01192 TO test_01192_renamed" 2>&1| grep -F "not supported" > /dev/null && echo "ok" diff --git a/tests/queries/0_stateless/01238_http_memory_tracking.sh b/tests/queries/0_stateless/01238_http_memory_tracking.sh index 9b0fe875416..eb42159ce15 100755 --- a/tests/queries/0_stateless/01238_http_memory_tracking.sh +++ b/tests/queries/0_stateless/01238_http_memory_tracking.sh @@ -10,7 +10,7 @@ set -o pipefail # This is needed to keep at least one running query for user for the time of test. # (1k http queries takes ~1 second, let's run for 5x more to avoid flaps) -${CLICKHOUSE_CLIENT} --format Null -n <<<'SELECT sleepEachRow(1) FROM numbers(5)' & +${CLICKHOUSE_CLIENT} --function_sleep_max_microseconds_per_block 5 --format Null -n <<<'SELECT sleepEachRow(1) FROM numbers(5)' & # ignore "yes: standard output: Broken pipe" yes 'SELECT 1' 2>/dev/null | { diff --git a/tests/queries/0_stateless/01246_buffer_flush.sql b/tests/queries/0_stateless/01246_buffer_flush.sql index ac507d94b69..36bcaae383f 100644 --- a/tests/queries/0_stateless/01246_buffer_flush.sql +++ b/tests/queries/0_stateless/01246_buffer_flush.sql @@ -1,5 +1,7 @@ -- Tags: no-fasttest +SET function_sleep_max_microseconds_per_block = 4000000; + drop table if exists data_01256; drop table if exists buffer_01256; diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.sh b/tests/queries/0_stateless/01338_long_select_and_alter.sh index 2e3080e9cfc..04a10cfe55e 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENG $CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) FROM numbers(5)" -$CLICKHOUSE_CLIENT --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & # to be sure that select took all required locks sleep 2 diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh index 12bc3b09472..829352110f6 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENG $CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) FROM numbers(5)" -$CLICKHOUSE_CLIENT --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & # to be sure that select took all required locks sleep 2 diff --git a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql index f217b6094b2..d39ffdc4049 100644 --- a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql +++ b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql @@ -44,6 +44,7 @@ SYSTEM STOP REPLICATION QUEUES execute_on_single_replica_r2; OPTIMIZE TABLE execute_on_single_replica_r1 FINAL SETTINGS replication_alter_partitions_sync=0; /* if we will check immediately we can find the log entry unchecked */ +SET function_sleep_max_microseconds_per_block = 4000000; SELECT * FROM numbers(4) where sleepEachRow(1); SELECT '****************************'; diff --git a/tests/queries/0_stateless/01715_background_checker_blather_zookeeper_long.sql b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper_long.sql index 87e1a039488..32481be1bcd 100644 --- a/tests/queries/0_stateless/01715_background_checker_blather_zookeeper_long.sql +++ b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper_long.sql @@ -18,6 +18,7 @@ DETACH TABLE i20203_1; ATTACH TABLE i20203_2; -- sleep 10 seconds +SET function_sleep_max_microseconds_per_block = 10000000; SELECT number from numbers(10) where sleepEachRow(1) Format Null; SELECT num_tries < 50 diff --git a/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh b/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh index d83656e0e8c..adab3906e5b 100755 --- a/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh +++ b/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh @@ -54,7 +54,7 @@ if ! $CLICKHOUSE_CLIENT_BINARY --host 127.1 --port "$server_port" --format Null fi query_id="$CLICKHOUSE_DATABASE-$SECONDS" -$CLICKHOUSE_CLIENT_BINARY --query_id "$query_id" --host 127.1 --port "$server_port" --format Null -q 'select sleepEachRow(1) from numbers(10)' 2>/dev/null & +$CLICKHOUSE_CLIENT_BINARY --query_id "$query_id" --host 127.1 --port "$server_port" --format Null --function_sleep_max_microseconds_per_block 0 -q 'select sleepEachRow(1) from numbers(10)' 2>/dev/null & client_pid=$! # wait until the query will appear in processlist (max 10 second) From 95caa02cbc053f672ffa83a6dbe1a96259ea4d25 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Apr 2023 23:28:08 +0200 Subject: [PATCH 0096/1997] Update test --- ...02494_zero_copy_and_projection_and_mutation_work_together.sql | 1 + .../02572_query_views_log_background_thread.reference | 1 + .../0_stateless/02572_query_views_log_background_thread.sql | 1 + 3 files changed, 3 insertions(+) diff --git a/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql b/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql index 7a51d86dd30..b6ab9b7d0c3 100644 --- a/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql +++ b/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql @@ -70,6 +70,7 @@ SYSTEM SYNC REPLICA wikistat2; -- it doesn't make test flaky, rarely we will not delete the parts because of cleanup thread was slow. -- Such condition will lead to successful queries. +SET function_sleep_max_microseconds_per_block = 5000000; SELECT 0 FROM numbers(5) WHERE sleepEachRow(1) = 1; select sum(hits), count() from wikistat1 GROUP BY project, subproject, path settings allow_experimental_projection_optimization = 1, force_optimize_projection = 1; diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference index eeba62c5dc8..22dfaf93781 100644 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference @@ -4,6 +4,7 @@ insert into buffer_02572 values (1); select * from data_02572; select * from copy_02572; -- we cannot use OPTIMIZE, this will attach query context, so let's wait +SET function_sleep_max_microseconds_per_block = 6000000; select sleepEachRow(1) from numbers(3*2) format Null; select * from data_02572; 1 diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql b/tests/queries/0_stateless/02572_query_views_log_background_thread.sql index dc229412b13..939c189c5fe 100644 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.sql @@ -22,6 +22,7 @@ insert into buffer_02572 values (1); select * from data_02572; select * from copy_02572; -- we cannot use OPTIMIZE, this will attach query context, so let's wait +SET function_sleep_max_microseconds_per_block = 6000000; select sleepEachRow(1) from numbers(3*2) format Null; select * from data_02572; select * from copy_02572; From 748a21b791f5846a4f9f1d49d38fc077c7f9d3d1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Apr 2023 01:44:03 +0200 Subject: [PATCH 0097/1997] Fix typo --- src/Core/SettingsChangesHistory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 33010dc6b3b..e0d23d139f3 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -80,7 +80,7 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { - {"23.5", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximim sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, + {"23.5", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}}}, {"23.4", {{"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, From 57d852a60e804da746ce5e4cde2d56222afe677e Mon Sep 17 00:00:00 2001 From: Aleksei Golub Date: Sun, 30 Apr 2023 14:46:11 +0300 Subject: [PATCH 0098/1997] Fixed table existence checking --- src/Databases/DatabaseFactory.cpp | 2 +- src/Databases/DatabaseFilesystem.cpp | 60 ++++++++++++++----- src/Databases/DatabaseFilesystem.h | 2 + .../02722_database_filesystem.reference | 5 +- .../0_stateless/02722_database_filesystem.sh | 24 ++++++-- 5 files changed, 70 insertions(+), 23 deletions(-) diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 1be0d5dd7b2..8a50c31efc8 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -443,7 +443,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String if (engine->arguments && !engine->arguments->children.empty()) { if (engine->arguments->children.size() != 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filesystem database requires exactly 1 argument: filesystem_path"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filesystem database requires at most 1 argument: filesystem_path"); const auto & arguments = engine->arguments->children; init_path = safeGetLiteralValue(arguments[0], engine_name); diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index 8275bdf6151..7f22b8a16a0 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -24,19 +25,27 @@ namespace ErrorCodes extern const int UNKNOWN_TABLE; extern const int DATABASE_ACCESS_DENIED; extern const int BAD_ARGUMENTS; + extern const int FILE_DOESNT_EXIST; } DatabaseFilesystem::DatabaseFilesystem(const String & name_, const String & path_, ContextPtr context_) : IDatabase(name_), WithContext(context_->getGlobalContext()), path(path_), log(&Poco::Logger::get("DatabaseFileSystem(" + name_ + ")")) { fs::path user_files_path; - if (context_->getApplicationType() != Context::ApplicationType::LOCAL) + const auto & application_type = context_->getApplicationType(); + + if (application_type != Context::ApplicationType::LOCAL) user_files_path = fs::canonical(fs::path(getContext()->getUserFilesPath())); if (fs::path(path).is_relative()) path = user_files_path / path; + else if (application_type != Context::ApplicationType::LOCAL && !pathStartsWith(fs::path(path), user_files_path)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path must be inside user-files path ({})", user_files_path.string()); path = fs::absolute(path).lexically_normal().string(); + + if (!fs::exists(path)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path does not exist ({})", path); } std::string DatabaseFilesystem::getTablePath(const std::string & table_name) const @@ -58,7 +67,32 @@ void DatabaseFilesystem::addTable(const std::string & table_name, StoragePtr tab getEngineName()); } -bool DatabaseFilesystem::isTableExist(const String & name, ContextPtr) const +bool DatabaseFilesystem::checkTableFilePath(const std::string & table_path, ContextPtr context_, bool throw_on_error) const { + // If run in Local mode, no need for path checking. + bool need_check_path = context_->getApplicationType() != Context::ApplicationType::LOCAL; + std::string user_files_path = fs::canonical(fs::path(context_->getUserFilesPath())).string(); + + // Check access for file before checking its existence + if (need_check_path && !fileOrSymlinkPathStartsWith(table_path, user_files_path)) + { + if (throw_on_error) + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "File is not inside {}", user_files_path); + else + return false; + } + + // Check if the corresponding file exists + if (!fs::exists(table_path) || !fs::is_regular_file(table_path)) { + if (throw_on_error) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File does not exist ({})", table_path); + else + return false; + } + + return true; +} + +bool DatabaseFilesystem::isTableExist(const String & name, ContextPtr context_) const { { std::lock_guard lock(mutex); @@ -67,7 +101,8 @@ bool DatabaseFilesystem::isTableExist(const String & name, ContextPtr) const } fs::path table_file_path(getTablePath(name)); - return fs::exists(table_file_path) && fs::is_regular_file(table_file_path); + + return checkTableFilePath(table_file_path, context_, false); } StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr context_) const @@ -80,19 +115,9 @@ StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr cont return it->second; } - // If run in Local mode, no need for path checking. - bool need_check_path = context_->getApplicationType() != Context::ApplicationType::LOCAL; - std::string user_files_path = fs::canonical(fs::path(context_->getUserFilesPath())).string(); - auto table_path = getTablePath(name); - // Check access for file before checking its existence - if (need_check_path && table_path.find(user_files_path) != 0) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "File is not inside {}", user_files_path); - - // If the table doesn't exist in the tables map, check if the corresponding file exists - if (!fs::exists(table_path) || !fs::is_regular_file(table_path)) - return nullptr; + checkTableFilePath(table_path, context_, true); // If the file exists, create a new table using TableFunctionFile and return it. auto args = makeASTFunction("file", std::make_shared(table_path)); @@ -101,6 +126,7 @@ StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr cont if (!table_function) return nullptr; + // TableFunctionFile throws exceptions, if table cannot be created auto table_storage = table_function->execute(args, context_, name); if (table_storage) addTable(name, table_storage); @@ -110,6 +136,7 @@ StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr cont StoragePtr DatabaseFilesystem::getTable(const String & name, ContextPtr context_) const { + // rethrow all exceptions from TableFunctionFile to show correct error to user if (auto storage = getTableImpl(name, context_)) return storage; throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name)); @@ -123,11 +150,14 @@ StoragePtr DatabaseFilesystem::tryGetTable(const String & name, ContextPtr conte } catch (const Exception & e) { - // Ignore exceptions thrown by TableFunctionFile and which indicate that there is no table + // Ignore exceptions thrown by TableFunctionFile, which indicate that there is no table + // see tests/02722_database_filesystem.sh for more details if (e.code() == ErrorCodes::BAD_ARGUMENTS) return nullptr; if (e.code() == ErrorCodes::DATABASE_ACCESS_DENIED) return nullptr; + if (e.code() == ErrorCodes::FILE_DOESNT_EXIST) + return nullptr; throw; } diff --git a/src/Databases/DatabaseFilesystem.h b/src/Databases/DatabaseFilesystem.h index 697511ac5b3..3d2ad695cc6 100644 --- a/src/Databases/DatabaseFilesystem.h +++ b/src/Databases/DatabaseFilesystem.h @@ -49,6 +49,8 @@ protected: void addTable(const std::string & table_name, StoragePtr table_storage) const; + bool checkTableFilePath(const std::string & table_path, ContextPtr context_, bool throw_on_error) const; + private: String path; diff --git a/tests/queries/0_stateless/02722_database_filesystem.reference b/tests/queries/0_stateless/02722_database_filesystem.reference index a583f1e2e3c..c65dda7933a 100644 --- a/tests/queries/0_stateless/02722_database_filesystem.reference +++ b/tests/queries/0_stateless/02722_database_filesystem.reference @@ -4,7 +4,10 @@ test1 4 4 4 -Test 2: check DatabaseFilesystem access rights on server +Test 2: check DatabaseFilesystem access rights and errors handling on server +OK +OK +OK OK OK OK diff --git a/tests/queries/0_stateless/02722_database_filesystem.sh b/tests/queries/0_stateless/02722_database_filesystem.sh index 0adeface438..80f97af693e 100755 --- a/tests/queries/0_stateless/02722_database_filesystem.sh +++ b/tests/queries/0_stateless/02722_database_filesystem.sh @@ -21,6 +21,7 @@ tmp_dir=${CLICKHOUSE_TEST_UNIQUE_NAME} mkdir $tmp_dir cp ${CLICKHOUSE_USER_FILES_PATH}/tmp.csv ${tmp_dir}/tmp.csv cp ${CLICKHOUSE_USER_FILES_PATH}/tmp.csv ${CLICKHOUSE_USER_FILES_PATH}/tmp/tmp.csv +cp ${CLICKHOUSE_USER_FILES_PATH}/tmp.csv ${CLICKHOUSE_USER_FILES_PATH}/tmp.myext ################# echo "Test 1: create filesystem database and check implicit calls" @@ -35,24 +36,35 @@ ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp/tmp.csv\`;" ${CLICKHOUSE_LOCAL} -q "SELECT COUNT(*) FROM \"${tmp_dir}/tmp.csv\"" ################# -echo "Test 2: check DatabaseFilesystem access rights on server" -# Allows list files only inside user_files +echo "Test 2: check DatabaseFilesystem access rights and errors handling on server" +# DATABASE_ACCESS_DENIED: Allows list files only inside user_files ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../tmp.csv\`;" 2>&1| grep -F "Code: 291" > /dev/null && echo "OK" ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`/tmp/tmp.csv\`;" 2>&1| grep -F "Code: 291" > /dev/null && echo "OK" - ${CLICKHOUSE_CLIENT} --multiline --multiquery --query """ USE test1; SELECT COUNT(*) FROM \"../${tmp_dir}/tmp.csv\"; """ 2>&1| grep -F "Code: 291" > /dev/null && echo "OK" ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../../../../../../tmp.csv\`;" 2>&1| grep -F "Code: 291" > /dev/null && echo "OK" + +# BAD_ARGUMENTS: path should be inside user_files ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test2; CREATE DATABASE test2 ENGINE = Filesystem('/tmp'); -SELECT COUNT(*) FROM test2.\`tmp.csv\`; -""" 2>&1| grep -F "Code: 291" > /dev/null && echo "OK" +""" 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" + +# BAD_ARGUMENTS: .../user_files/relative_unknown_dir does not exists +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +DROP DATABASE IF EXISTS test2; +CREATE DATABASE test2 ENGINE = Filesystem('relative_unknown_dir'); +""" 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" + +# FILE_DOESNT_EXIST: unknown file +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp2.csv\`;" 2>&1| grep -F "Code: 107" > /dev/null && echo "OK" + +# BAD_ARGUMENTS: Cannot determine the file format by it's extension +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp.myext\`;" 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" # Clean ${CLICKHOUSE_CLIENT} --query "DROP DATABASE test1;" -${CLICKHOUSE_CLIENT} --query "DROP DATABASE test2;" rm -rd $tmp_dir rm -rd $CLICKHOUSE_USER_FILES_PATH From 6831eb20013aadeec451ac8fb94d894abbfccef9 Mon Sep 17 00:00:00 2001 From: Aleksei Golub Date: Sun, 30 Apr 2023 14:51:04 +0300 Subject: [PATCH 0099/1997] fix style --- src/Databases/DatabaseFilesystem.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index 7f22b8a16a0..8de609f0ca2 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -1,6 +1,5 @@ #include -#include #include #include #include @@ -11,6 +10,7 @@ #include #include #include +#include #include @@ -67,7 +67,8 @@ void DatabaseFilesystem::addTable(const std::string & table_name, StoragePtr tab getEngineName()); } -bool DatabaseFilesystem::checkTableFilePath(const std::string & table_path, ContextPtr context_, bool throw_on_error) const { +bool DatabaseFilesystem::checkTableFilePath(const std::string & table_path, ContextPtr context_, bool throw_on_error) const +{ // If run in Local mode, no need for path checking. bool need_check_path = context_->getApplicationType() != Context::ApplicationType::LOCAL; std::string user_files_path = fs::canonical(fs::path(context_->getUserFilesPath())).string(); @@ -82,7 +83,8 @@ bool DatabaseFilesystem::checkTableFilePath(const std::string & table_path, Cont } // Check if the corresponding file exists - if (!fs::exists(table_path) || !fs::is_regular_file(table_path)) { + if (!fs::exists(table_path) || !fs::is_regular_file(table_path)) + { if (throw_on_error) throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File does not exist ({})", table_path); else From 1846b76982828ed3223b25e2e5d6f5c8cee937eb Mon Sep 17 00:00:00 2001 From: Aleksei Golub Date: Sun, 30 Apr 2023 23:13:42 +0300 Subject: [PATCH 0100/1997] Added DatabaseS3 with test --- programs/local/LocalServer.cpp | 9 + src/Databases/DatabaseFactory.cpp | 29 ++- src/Databases/DatabaseS3.cpp | 199 ++++++++++++++++++ src/Databases/DatabaseS3.h | 63 ++++++ .../0_stateless/02724_database_s3.reference | 18 ++ .../queries/0_stateless/02724_database_s3.sh | 51 +++++ 6 files changed, 367 insertions(+), 2 deletions(-) create mode 100644 src/Databases/DatabaseS3.cpp create mode 100644 src/Databases/DatabaseS3.h create mode 100644 tests/queries/0_stateless/02724_database_s3.reference create mode 100755 tests/queries/0_stateless/02724_database_s3.sh diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 4939997b323..215a92e1944 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -51,6 +51,8 @@ #include #include +#include "config.h" + #if defined(FUZZING_MODE) #include #endif @@ -59,6 +61,10 @@ # include #endif +#if USE_AWS_S3 +#include +#endif + namespace fs = std::filesystem; @@ -155,6 +161,9 @@ static DatabasePtr createClickHouseLocalDatabaseOverlay(const String & name_, Co auto databaseCombiner = std::make_shared(name_, context_); databaseCombiner->registerNextDatabase(std::make_shared(name_, "", context_)); databaseCombiner->registerNextDatabase(std::make_shared(name_, context_)); +#if USE_AWS_S3 + databaseCombiner->registerNextDatabase(std::make_shared(name_, "", "", context_)); +#endif return databaseCombiner; } diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 8a50c31efc8..b21435527a5 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -49,6 +49,10 @@ #include #endif +#if USE_AWS_S3 +#include +#endif + namespace fs = std::filesystem; namespace DB @@ -133,13 +137,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String static const std::unordered_set database_engines{"Ordinary", "Atomic", "Memory", "Dictionary", "Lazy", "Replicated", "MySQL", "MaterializeMySQL", "MaterializedMySQL", - "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem"}; + "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem", "S3"}; if (!database_engines.contains(engine_name)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine name `{}` does not exist", engine_name); static const std::unordered_set engines_with_arguments{"MySQL", "MaterializeMySQL", "MaterializedMySQL", - "Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem"}; + "Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem", "S3"}; static const std::unordered_set engines_with_table_overrides{"MaterializeMySQL", "MaterializedMySQL", "MaterializedPostgreSQL"}; bool engine_may_have_arguments = engines_with_arguments.contains(engine_name); @@ -451,6 +455,27 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String return std::make_shared(database_name, init_path, context); } +#if USE_AWS_S3 + else if (engine_name == "S3") + { + const ASTFunction * engine = engine_define->engine; + + std::string key_id; + std::string secret_key; + + if (engine->arguments && !engine->arguments->children.empty()) + { + if (engine->arguments->children.size() != 2) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 database requires 0 or 2 argument: [access_key_id, secret_access_key]"); + + const auto & arguments = engine->arguments->children; + key_id = safeGetLiteralValue(arguments[0], engine_name); + secret_key = safeGetLiteralValue(arguments[1], engine_name); + } + + return std::make_shared(database_name, key_id, secret_key, context); + } +#endif throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Unknown database engine: {}", engine_name); } diff --git a/src/Databases/DatabaseS3.cpp b/src/Databases/DatabaseS3.cpp new file mode 100644 index 00000000000..d4412ba7973 --- /dev/null +++ b/src/Databases/DatabaseS3.cpp @@ -0,0 +1,199 @@ +#include "config.h" + +#if USE_AWS_S3 + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int UNKNOWN_TABLE; + extern const int BAD_ARGUMENTS; + extern const int FILE_DOESNT_EXIST; + extern const int UNACCEPTABLE_URL; + extern const int S3_ERROR; +} + +DatabaseS3::DatabaseS3(const String & name_, const String & key_id, const String & secret_key, ContextPtr context_) + : IDatabase(name_) + , WithContext(context_->getGlobalContext()) + , access_key_id(key_id) + , secret_access_key(secret_key) + , log(&Poco::Logger::get("DatabaseS3(" + name_ + ")")) +{ +} + +void DatabaseS3::addTable(const std::string & table_name, StoragePtr table_storage) const +{ + std::lock_guard lock(mutex); + auto [_, inserted] = loaded_tables.emplace(table_name, table_storage); + if (!inserted) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Table with name `{}` already exists in database `{}` (engine {})", + table_name, + getDatabaseName(), + getEngineName()); +} + +bool DatabaseS3::checkUrl(const std::string & url, ContextPtr context_, bool throw_on_error) const +{ + try + { + S3::URI uri(url); + context_->getGlobalContext()->getRemoteHostFilter().checkURL(uri.uri); + } + catch (...) + { + if (throw_on_error) + throw; + return false; + } + return true; +} + +bool DatabaseS3::isTableExist(const String & name, ContextPtr context_) const +{ + std::lock_guard lock(mutex); + if (loaded_tables.find(name) != loaded_tables.end()) + return true; + + return checkUrl(name, context_, false); +} + +StoragePtr DatabaseS3::getTableImpl(const String & url, ContextPtr context_) const +{ + // Check if the table exists in the loaded tables map + { + std::lock_guard lock(mutex); + auto it = loaded_tables.find(url); + if (it != loaded_tables.end()) + return it->second; + } + + checkUrl(url, context_, true); + + // call TableFunctionS3 + auto args = makeASTFunction( + "s3", + std::make_shared(url), + std::make_shared(access_key_id), + std::make_shared(secret_access_key)); + + auto table_function = TableFunctionFactory::instance().get(args, context_); + if (!table_function) + return nullptr; + + // TableFunctionS3 throws exceptions, if table cannot be created + auto table_storage = table_function->execute(args, context_, url); + if (table_storage) + addTable(url, table_storage); + + return table_storage; +} + +StoragePtr DatabaseS3::getTable(const String & name, ContextPtr context_) const +{ + // rethrow all exceptions from TableFunctionS3 to show correct error to user + if (auto storage = getTableImpl(name, context_)) + return storage; + throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name)); +} + +StoragePtr DatabaseS3::tryGetTable(const String & name, ContextPtr context_) const +{ + try + { + return getTableImpl(name, context_); + } + catch (const Exception & e) + { + // Ignore exceptions thrown by TableFunctionS3, which indicate that there is no table + if (e.code() == ErrorCodes::BAD_ARGUMENTS) + return nullptr; + if (e.code() == ErrorCodes::S3_ERROR) + return nullptr; + if (e.code() == ErrorCodes::FILE_DOESNT_EXIST) + return nullptr; + if (e.code() == ErrorCodes::UNACCEPTABLE_URL) + return nullptr; + throw; + } + catch (const Poco::URISyntaxException &) + { + return nullptr; + } +} + +ASTPtr DatabaseS3::getCreateDatabaseQuery() const +{ + auto settings = getContext()->getSettingsRef(); + ParserCreateQuery parser; + + const String query = fmt::format("CREATE DATABASE {} ENGINE = S3('{}', '{}')", + backQuoteIfNeed(getDatabaseName()), + access_key_id, + secret_access_key); + ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth); + + if (const auto database_comment = getDatabaseComment(); !database_comment.empty()) + { + auto & ast_create_query = ast->as(); + ast_create_query.set(ast_create_query.comment, std::make_shared(database_comment)); + } + + return ast; +} + +void DatabaseS3::shutdown() +{ + Tables tables_snapshot; + { + std::lock_guard lock(mutex); + tables_snapshot = loaded_tables; + } + + for (const auto & kv : tables_snapshot) + { + auto table_id = kv.second->getStorageID(); + kv.second->flushAndShutdown(); + } + + std::lock_guard lock(mutex); + loaded_tables.clear(); +} + +/** + * Returns an empty vector because the database is read-only and no tables can be backed up + */ +std::vector> DatabaseS3::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const +{ + return {}; +} + +/** + * + * Returns an empty iterator because the database does not have its own tables + * But only caches them for quick access + */ +DatabaseTablesIteratorPtr DatabaseS3::getTablesIterator(ContextPtr, const FilterByNameFunction &) const +{ + return std::make_unique(Tables{}, getDatabaseName()); +} + +} // DB + +#endif diff --git a/src/Databases/DatabaseS3.h b/src/Databases/DatabaseS3.h new file mode 100644 index 00000000000..d5269e57f5a --- /dev/null +++ b/src/Databases/DatabaseS3.h @@ -0,0 +1,63 @@ +#pragma once + +#include "config.h" + +#if USE_AWS_S3 + +#include +#include +#include +#include +#include + +namespace DB +{ + +class Context; + +/** + * DatabaseS3 provides access to data stored in S3 + * Uses TableFunctionS3 to implicitly load file when a user requests the table, and provides read-only access to the data in the file + * Tables are cached inside the database for quick access + */ +class DatabaseS3 : public IDatabase, protected WithContext +{ +public: + DatabaseS3(const String & name, const String & key_id, const String & secret_key, ContextPtr context); + + String getEngineName() const override { return "S3"; } + + bool isTableExist(const String & name, ContextPtr context) const override; + + StoragePtr getTable(const String & name, ContextPtr context) const override; + + StoragePtr tryGetTable(const String & name, ContextPtr context) const override; + + bool empty() const override { return true; } + + bool isReadOnly() const override { return true; } + + ASTPtr getCreateDatabaseQuery() const override; + + void shutdown() override; + + std::vector> getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override; + +protected: + StoragePtr getTableImpl(const String & url, ContextPtr context) const; + + void addTable(const std::string & table_name, StoragePtr table_storage) const; + + bool checkUrl(const std::string & url, ContextPtr context_, bool throw_on_error) const; + +private: + const String access_key_id; + const String secret_access_key; + mutable Tables loaded_tables TSA_GUARDED_BY(mutex); + Poco::Logger * log; +}; + +} // DB + +#endif diff --git a/tests/queries/0_stateless/02724_database_s3.reference b/tests/queries/0_stateless/02724_database_s3.reference new file mode 100644 index 00000000000..8a985913ff9 --- /dev/null +++ b/tests/queries/0_stateless/02724_database_s3.reference @@ -0,0 +1,18 @@ +Test 1: select from s3 +1 2 3 +4 5 6 +7 8 9 +0 0 0 +test1 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +20 21 22 +23 24 25 +26 27 28 +0 0 0 +Test 2: check exceptions +OK +OK +OK diff --git a/tests/queries/0_stateless/02724_database_s3.sh b/tests/queries/0_stateless/02724_database_s3.sh new file mode 100755 index 00000000000..4f9df402040 --- /dev/null +++ b/tests/queries/0_stateless/02724_database_s3.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: Depends on AWS + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +################# +echo "Test 1: select from s3" +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +DROP DATABASE IF EXISTS test1; +CREATE DATABASE test1 ENGINE = S3; +USE test1; +SELECT * FROM \"http://localhost:11111/test/a.tsv\" +""" +${CLICKHOUSE_CLIENT} -q "SHOW DATABASES;" | grep test1 +${CLICKHOUSE_CLIENT} -q "DROP DATABASE test1;" + +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +DROP DATABASE IF EXISTS test2; +CREATE DATABASE test2 ENGINE = S3('test', 'testtest'); +USE test2; +SELECT * FROM \"http://localhost:11111/test/b.tsv\" +""" +${CLICKHOUSE_CLIENT} -q "DROP DATABASE test2;" + +${CLICKHOUSE_LOCAL} --query "SELECT * FROM \"http://localhost:11111/test/c.tsv\"" + +################# +echo "Test 2: check exceptions" +${CLICKHOUSE_LOCAL} --query "SELECT * FROM \"http://localhost:11111/test/c.myext\"" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK" + +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +DROP DATABASE IF EXISTS test3; +CREATE DATABASE test3 ENGINE = S3; +USE test3; +SELECT * FROM \"http://localhost:11111/test/a.myext\" +""" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK" + +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +USE test3; +SELECT * FROM \"abacaba\" +""" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK" + +# Cleanup +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +DROP DATABASE IF EXISTS test1; +DROP DATABASE IF EXISTS test2; +DROP DATABASE IF EXISTS test3; +""" From 3d1affbddb3de6c464f05459c1e9e5f34b6ff957 Mon Sep 17 00:00:00 2001 From: Aleksei Golub Date: Mon, 1 May 2023 12:17:10 +0300 Subject: [PATCH 0101/1997] retrigger checks From c1c69553741af4789170590f8a669d17f2dffbeb Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Wed, 26 Apr 2023 14:06:22 +0200 Subject: [PATCH 0102/1997] Deprecate delete-on-destroy.txt, do not create it any more --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 17 +---------------- src/Storages/MergeTree/IMergeTreeDataPart.h | 7 +++++-- src/Storages/MergeTree/MergeTreeData.cpp | 7 +++---- 3 files changed, 9 insertions(+), 22 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 148cbf93948..d7f2f3ca7c7 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -953,24 +953,9 @@ void IMergeTreeDataPart::writeVersionMetadata(const VersionMetadata & version_, } } -void IMergeTreeDataPart::writeDeleteOnDestroyMarker() -{ - static constexpr auto marker_path = "delete-on-destroy.txt"; - - try - { - getDataPartStorage().createFile(marker_path); - } - catch (Poco::Exception & e) - { - LOG_ERROR(storage.log, "{} (while creating DeleteOnDestroy marker: {})", - e.what(), (fs::path(getDataPartStorage().getFullPath()) / marker_path).string()); - } -} - void IMergeTreeDataPart::removeDeleteOnDestroyMarker() { - getDataPartStorage().removeFileIfExists("delete-on-destroy.txt"); + getDataPartStorage().removeFileIfExists(DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED); } void IMergeTreeDataPart::removeVersionMetadata() diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index a36634d2cf9..f7bcaa263d6 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -381,7 +381,8 @@ public: /// default will be stored in this file. static inline constexpr auto DEFAULT_COMPRESSION_CODEC_FILE_NAME = "default_compression_codec.txt"; - static inline constexpr auto DELETE_ON_DESTROY_MARKER_FILE_NAME = "delete-on-destroy.txt"; + /// "delete-on-destroy.txt" is deprecated. It is no longer being created, only is removed. + static inline constexpr auto DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED = "delete-on-destroy.txt"; static inline constexpr auto UUID_FILE_NAME = "uuid.txt"; @@ -456,8 +457,10 @@ public: void writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings); - void writeDeleteOnDestroyMarker(); + /// "delete-on-destroy.txt" is deprecated. It is no longer being created, only is removed. + /// TODO: remove this method after some time. void removeDeleteOnDestroyMarker(); + /// It may look like a stupid joke. but these two methods are absolutely unrelated. /// This one is about removing file with metadata about part version (for transactions) void removeVersionMetadata(); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 83f5c0d359c..2def6fb08d3 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1214,7 +1214,7 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( .build(); String part_path = fs::path(relative_data_path) / part_name; - String marker_path = fs::path(part_path) / IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME; + String marker_path = fs::path(part_path) / IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED; if (part_disk_ptr->exists(marker_path)) { @@ -4410,7 +4410,6 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy) /// All other locks are taken in StorageReplicatedMergeTree lockSharedData(*part_copy); - asMutableDeletingPart(original_active_part)->writeDeleteOnDestroyMarker(); return; } } @@ -7174,7 +7173,7 @@ std::pair MergeTreeData::cloneAn for (auto it = src_part->getDataPartStorage().iterate(); it->isValid(); it->next()) { if (!files_to_copy_instead_of_hardlinks.contains(it->name()) - && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME + && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME) { hardlinked_files->hardlinks_from_source_part.insert(it->name()); @@ -7189,7 +7188,7 @@ std::pair MergeTreeData::cloneAn { auto file_name_with_projection_prefix = fs::path(projection_storage.getPartDirectory()) / it->name(); if (!files_to_copy_instead_of_hardlinks.contains(file_name_with_projection_prefix) - && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME + && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME) { hardlinked_files->hardlinks_from_source_part.insert(file_name_with_projection_prefix); From 64d232f1aa584b3eba5abf9fe02bfa9b0535701c Mon Sep 17 00:00:00 2001 From: alekseygolub Date: Mon, 1 May 2023 18:00:26 +0000 Subject: [PATCH 0103/1997] Fix memory leak --- src/Interpreters/DatabaseCatalog.cpp | 2 +- tests/queries/0_stateless/02724_database_s3.reference | 6 +++--- tests/queries/0_stateless/02724_database_s3.sh | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index f9e74fadcbd..129323cd6b3 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -346,7 +346,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( catch (const Exception & e) { if (exception) - exception->emplace(*e.clone()); + exception->emplace(e); } if (!table) diff --git a/tests/queries/0_stateless/02724_database_s3.reference b/tests/queries/0_stateless/02724_database_s3.reference index 8a985913ff9..b3800a27305 100644 --- a/tests/queries/0_stateless/02724_database_s3.reference +++ b/tests/queries/0_stateless/02724_database_s3.reference @@ -8,9 +8,9 @@ test1 13 14 15 16 17 18 0 0 0 -20 21 22 -23 24 25 -26 27 28 +10 11 12 +13 14 15 +16 17 18 0 0 0 Test 2: check exceptions OK diff --git a/tests/queries/0_stateless/02724_database_s3.sh b/tests/queries/0_stateless/02724_database_s3.sh index 4f9df402040..9b539407884 100755 --- a/tests/queries/0_stateless/02724_database_s3.sh +++ b/tests/queries/0_stateless/02724_database_s3.sh @@ -25,7 +25,7 @@ SELECT * FROM \"http://localhost:11111/test/b.tsv\" """ ${CLICKHOUSE_CLIENT} -q "DROP DATABASE test2;" -${CLICKHOUSE_LOCAL} --query "SELECT * FROM \"http://localhost:11111/test/c.tsv\"" +${CLICKHOUSE_LOCAL} --query "SELECT * FROM \"http://localhost:11111/test/b.tsv\"" ################# echo "Test 2: check exceptions" From 95522ad7a6486bdbe5861c4f65c3a0ffe9610372 Mon Sep 17 00:00:00 2001 From: alekseygolub Date: Mon, 1 May 2023 21:46:17 +0000 Subject: [PATCH 0104/1997] Added DatabaseHDFS --- programs/local/LocalServer.cpp | 7 + src/Databases/DatabaseFactory.cpp | 31 ++- src/Databases/DatabaseHDFS.cpp | 228 ++++++++++++++++++ src/Databases/DatabaseHDFS.h | 65 +++++ .../0_stateless/02725_database_hdfs.reference | 16 ++ .../0_stateless/02725_database_hdfs.sh | 66 +++++ 6 files changed, 411 insertions(+), 2 deletions(-) create mode 100644 src/Databases/DatabaseHDFS.cpp create mode 100644 src/Databases/DatabaseHDFS.h create mode 100644 tests/queries/0_stateless/02725_database_hdfs.reference create mode 100755 tests/queries/0_stateless/02725_database_hdfs.sh diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 215a92e1944..0cf94892171 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -65,6 +65,10 @@ #include #endif +#if USE_HDFS +#include +#endif + namespace fs = std::filesystem; @@ -163,6 +167,9 @@ static DatabasePtr createClickHouseLocalDatabaseOverlay(const String & name_, Co databaseCombiner->registerNextDatabase(std::make_shared(name_, context_)); #if USE_AWS_S3 databaseCombiner->registerNextDatabase(std::make_shared(name_, "", "", context_)); +#endif +#if USE_HDFS + databaseCombiner->registerNextDatabase(std::make_shared(name_, "", context_)); #endif return databaseCombiner; } diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index b21435527a5..5c4256c8a9f 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -53,6 +53,10 @@ #include #endif +#if USE_HDFS +#include +#endif + namespace fs = std::filesystem; namespace DB @@ -137,13 +141,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String static const std::unordered_set database_engines{"Ordinary", "Atomic", "Memory", "Dictionary", "Lazy", "Replicated", "MySQL", "MaterializeMySQL", "MaterializedMySQL", - "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem", "S3"}; + "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem", "S3", "HDFS"}; if (!database_engines.contains(engine_name)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine name `{}` does not exist", engine_name); static const std::unordered_set engines_with_arguments{"MySQL", "MaterializeMySQL", "MaterializedMySQL", - "Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem", "S3"}; + "Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem", "S3", "HDFS"}; static const std::unordered_set engines_with_table_overrides{"MaterializeMySQL", "MaterializedMySQL", "MaterializedPostgreSQL"}; bool engine_may_have_arguments = engines_with_arguments.contains(engine_name); @@ -437,6 +441,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String return std::make_shared(context, engine_define, create.attach, database_path); } #endif + else if (engine_name == "Filesystem") { const ASTFunction * engine = engine_define->engine; @@ -455,6 +460,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String return std::make_shared(database_name, init_path, context); } + #if USE_AWS_S3 else if (engine_name == "S3") { @@ -477,6 +483,27 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String } #endif +#if USE_HDFS + else if (engine_name == "HDFS") + { + const ASTFunction * engine = engine_define->engine; + + /// If source_url is empty, then table name must contain full url + std::string source_url; + + if (engine->arguments && !engine->arguments->children.empty()) + { + if (engine->arguments->children.size() != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS database requires at most 1 argument: source_url"); + + const auto & arguments = engine->arguments->children; + source_url = safeGetLiteralValue(arguments[0], engine_name); + } + + return std::make_shared(database_name, source_url, context); + } +#endif + throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Unknown database engine: {}", engine_name); } diff --git a/src/Databases/DatabaseHDFS.cpp b/src/Databases/DatabaseHDFS.cpp new file mode 100644 index 00000000000..39c3f955bf5 --- /dev/null +++ b/src/Databases/DatabaseHDFS.cpp @@ -0,0 +1,228 @@ +#include "config.h" + +#if USE_HDFS + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +namespace fs = std::filesystem; + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int UNKNOWN_TABLE; + extern const int BAD_ARGUMENTS; + extern const int FILE_DOESNT_EXIST; + extern const int UNACCEPTABLE_URL; + extern const int ACCESS_DENIED; + extern const int DATABASE_ACCESS_DENIED; + extern const int HDFS_ERROR; + extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; +} + +static constexpr std::string_view HDFS_HOST_REGEXP = "^hdfs://[^/]*"; + + +DatabaseHDFS::DatabaseHDFS(const String & name_, const String & source_url, ContextPtr context_) + : IDatabase(name_) + , WithContext(context_->getGlobalContext()) + , source(source_url) + , log(&Poco::Logger::get("DatabaseHDFS(" + name_ + ")")) +{ + if (!source.empty()) + { + if (!re2::RE2::FullMatch(source, std::string(HDFS_HOST_REGEXP))) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs host: {}. It should have structure 'hdfs://:'", source); + context_->getGlobalContext()->getRemoteHostFilter().checkURL(Poco::URI(source)); + } +} + +void DatabaseHDFS::addTable(const std::string & table_name, StoragePtr table_storage) const +{ + std::lock_guard lock(mutex); + auto [_, inserted] = loaded_tables.emplace(table_name, table_storage); + if (!inserted) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Table with name `{}` already exists in database `{}` (engine {})", + table_name, + getDatabaseName(), + getEngineName()); +} + +std::string DatabaseHDFS::getTablePath(const std::string & table_name) const +{ + if (table_name.starts_with("hdfs://")) + return table_name; + if (source.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs url: {}. It should have structure 'hdfs://:/path'", table_name); + return (fs::path(source) / table_name).string(); +} + +bool DatabaseHDFS::checkUrl(const std::string & url, ContextPtr context_, bool throw_on_error) const +{ + try + { + checkHDFSURL(url); + context_->getGlobalContext()->getRemoteHostFilter().checkURL(Poco::URI(url)); + } + catch (...) + { + if (throw_on_error) + throw; + return false; + } + + return true; +} + +bool DatabaseHDFS::isTableExist(const String & name, ContextPtr context_) const +{ + std::lock_guard lock(mutex); + if (loaded_tables.find(name) != loaded_tables.end()) + return true; + + return checkUrl(name, context_, false); +} + +StoragePtr DatabaseHDFS::getTableImpl(const String & name, ContextPtr context_) const +{ + // Check if the table exists in the loaded tables map + { + std::lock_guard lock(mutex); + auto it = loaded_tables.find(name); + if (it != loaded_tables.end()) + return it->second; + } + + auto url = getTablePath(name); + + checkUrl(url, context_, true); + + // call TableFunctionHDFS + auto args = makeASTFunction("hdfs", std::make_shared(url)); + + auto table_function = TableFunctionFactory::instance().get(args, context_); + if (!table_function) + return nullptr; + + // TableFunctionHDFS throws exceptions, if table cannot be created + auto table_storage = table_function->execute(args, context_, name); + if (table_storage) + addTable(name, table_storage); + + return table_storage; +} + +StoragePtr DatabaseHDFS::getTable(const String & name, ContextPtr context_) const +{ + // rethrow all exceptions from TableFunctionHDFS to show correct error to user + if (auto storage = getTableImpl(name, context_)) + return storage; + throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name)); +} + +StoragePtr DatabaseHDFS::tryGetTable(const String & name, ContextPtr context_) const +{ + try + { + return getTableImpl(name, context_); + } + catch (const Exception & e) + { + // Ignore exceptions thrown by TableFunctionHDFS, which indicate that there is no table + if (e.code() == ErrorCodes::BAD_ARGUMENTS) + return nullptr; + if (e.code() == ErrorCodes::ACCESS_DENIED) + return nullptr; + if (e.code() == ErrorCodes::DATABASE_ACCESS_DENIED) + return nullptr; + if (e.code() == ErrorCodes::FILE_DOESNT_EXIST) + return nullptr; + if (e.code() == ErrorCodes::UNACCEPTABLE_URL) + return nullptr; + if (e.code() == ErrorCodes::HDFS_ERROR) + return nullptr; + if (e.code() == ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE) + return nullptr; + throw; + } + catch (const Poco::URISyntaxException &) + { + return nullptr; + } +} + +ASTPtr DatabaseHDFS::getCreateDatabaseQuery() const +{ + auto settings = getContext()->getSettingsRef(); + ParserCreateQuery parser; + + const String query = fmt::format("CREATE DATABASE {} ENGINE = HDFS('{}')", backQuoteIfNeed(getDatabaseName()), source); + ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth); + + if (const auto database_comment = getDatabaseComment(); !database_comment.empty()) + { + auto & ast_create_query = ast->as(); + ast_create_query.set(ast_create_query.comment, std::make_shared(database_comment)); + } + + return ast; +} + +void DatabaseHDFS::shutdown() +{ + Tables tables_snapshot; + { + std::lock_guard lock(mutex); + tables_snapshot = loaded_tables; + } + + for (const auto & kv : tables_snapshot) + { + auto table_id = kv.second->getStorageID(); + kv.second->flushAndShutdown(); + } + + std::lock_guard lock(mutex); + loaded_tables.clear(); +} + +/** + * Returns an empty vector because the database is read-only and no tables can be backed up + */ +std::vector> DatabaseHDFS::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const +{ + return {}; +} + +/** + * + * Returns an empty iterator because the database does not have its own tables + * But only caches them for quick access + */ +DatabaseTablesIteratorPtr DatabaseHDFS::getTablesIterator(ContextPtr, const FilterByNameFunction &) const +{ + return std::make_unique(Tables{}, getDatabaseName()); +} + +} // DB + +#endif diff --git a/src/Databases/DatabaseHDFS.h b/src/Databases/DatabaseHDFS.h new file mode 100644 index 00000000000..4e2b8578fcd --- /dev/null +++ b/src/Databases/DatabaseHDFS.h @@ -0,0 +1,65 @@ +#pragma once + +#include "config.h" + +#if USE_HDFS + +#include +#include +#include +#include +#include + +namespace DB +{ + +class Context; + +/** + * DatabaseHDFS allows to interact with files stored on the file system + * Uses TableFunctionHDFS to implicitly load file when a user requests the table, and provides read-only access to the data in the file + * Tables are cached inside the database for quick access + */ +class DatabaseHDFS : public IDatabase, protected WithContext +{ +public: + DatabaseHDFS(const String & name, const String & source_url, ContextPtr context); + + String getEngineName() const override { return "S3"; } + + bool isTableExist(const String & name, ContextPtr context) const override; + + StoragePtr getTable(const String & name, ContextPtr context) const override; + + StoragePtr tryGetTable(const String & name, ContextPtr context) const override; + + bool empty() const override { return true; } + + bool isReadOnly() const override { return true; } + + ASTPtr getCreateDatabaseQuery() const override; + + void shutdown() override; + + std::vector> getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override; + +protected: + StoragePtr getTableImpl(const String & url, ContextPtr context) const; + + void addTable(const std::string & table_name, StoragePtr table_storage) const; + + bool checkUrl(const std::string & name, ContextPtr context_, bool throw_on_error) const; + + std::string getTablePath(const std::string & table_name) const; + +private: + const String source; + + mutable Tables loaded_tables TSA_GUARDED_BY(mutex); + Poco::Logger * log; +}; + +} // DB + +#endif diff --git a/tests/queries/0_stateless/02725_database_hdfs.reference b/tests/queries/0_stateless/02725_database_hdfs.reference new file mode 100644 index 00000000000..2a2e6c20aaa --- /dev/null +++ b/tests/queries/0_stateless/02725_database_hdfs.reference @@ -0,0 +1,16 @@ +Test 1: select from hdfs database +1 2 3 +test1 +1 2 3 +test2 +4 5 6 +Test 2: check exceptions +OK0 +OK1 +OK2 +OK3 +OK4 +OK5 +OK6 +OK7 +OK8 diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh new file mode 100755 index 00000000000..ea16dd4024c --- /dev/null +++ b/tests/queries/0_stateless/02725_database_hdfs.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, use-hdfs + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Prepare data +${CLICKHOUSE_CLIENT} -q "insert into table function hdfs('hdfs://localhost:12222/test_02725_1.tsv', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32') select 1, 2, 3 settings hdfs_truncate_on_insert=1;" +${CLICKHOUSE_CLIENT} -q "insert into table function hdfs('hdfs://localhost:12222/test_02725_2.tsv', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32') select 4, 5, 6 settings hdfs_truncate_on_insert=1;" + +################# +echo "Test 1: select from hdfs database" + +# Database without specific host +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +DROP DATABASE IF EXISTS test1; +CREATE DATABASE test1 ENGINE = HDFS; +USE test1; +SELECT * FROM \"hdfs://localhost:12222/test_02725_1.tsv\" +""" +${CLICKHOUSE_CLIENT} -q "SHOW DATABASES;" | grep test1 + +# Database with host +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +DROP DATABASE IF EXISTS test2; +CREATE DATABASE test2 ENGINE = HDFS('hdfs://localhost:12222'); +USE test2; +SELECT * FROM \"test_02725_1.tsv\" +""" +${CLICKHOUSE_CLIENT} -q "SHOW DATABASES;" | grep test2 + +# Check implicit call in clickhouse-local +${CLICKHOUSE_LOCAL} --query "SELECT * FROM \"hdfs://localhost:12222/test_02725_2.tsv\"" + +################# +echo "Test 2: check exceptions" +${CLICKHOUSE_LOCAL} --query "SELECT * FROM \"hdfs://localhost:12222/file.myext\"" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK0" +${CLICKHOUSE_LOCAL} --query "SELECT * FROM \"hdfs://localhost:12222/test_02725_3.tsv\"" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK1" +${CLICKHOUSE_LOCAL} --query "SELECT * FROM \"hdfs://localhost:12222\"" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK2" + +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +DROP DATABASE IF EXISTS test3; +CREATE DATABASE test3 ENGINE = HDFS('abacaba'); +""" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK3" + +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +DROP DATABASE IF EXISTS test4; +CREATE DATABASE test4 ENGINE = HDFS; +USE test4; +SELECT * FROM \"abacaba/file.tsv\" +""" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK4" + +${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK5" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK6" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1| grep -F "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK7" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222\`" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK8" + + +# Cleanup +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ +DROP DATABASE IF EXISTS test1; +DROP DATABASE IF EXISTS test2; +DROP DATABASE IF EXISTS test3; +DROP DATABASE IF EXISTS test4; +""" \ No newline at end of file From 82bb1e8bf2a3183179938629cc8f6aab3d876e87 Mon Sep 17 00:00:00 2001 From: alekseygolub Date: Tue, 2 May 2023 18:51:35 +0000 Subject: [PATCH 0105/1997] Fix build and try fix tests --- src/Databases/DatabaseHDFS.h | 4 ++-- tests/queries/0_stateless/02724_database_s3.sh | 2 +- tests/queries/0_stateless/02725_database_hdfs.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Databases/DatabaseHDFS.h b/src/Databases/DatabaseHDFS.h index 4e2b8578fcd..9a506c5c8ac 100644 --- a/src/Databases/DatabaseHDFS.h +++ b/src/Databases/DatabaseHDFS.h @@ -45,11 +45,11 @@ public: DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override; protected: - StoragePtr getTableImpl(const String & url, ContextPtr context) const; + StoragePtr getTableImpl(const String & name, ContextPtr context) const; void addTable(const std::string & table_name, StoragePtr table_storage) const; - bool checkUrl(const std::string & name, ContextPtr context_, bool throw_on_error) const; + bool checkUrl(const std::string & url, ContextPtr context_, bool throw_on_error) const; std::string getTablePath(const std::string & table_name) const; diff --git a/tests/queries/0_stateless/02724_database_s3.sh b/tests/queries/0_stateless/02724_database_s3.sh index 9b539407884..af858d140d7 100755 --- a/tests/queries/0_stateless/02724_database_s3.sh +++ b/tests/queries/0_stateless/02724_database_s3.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-parallel # Tag no-fasttest: Depends on AWS CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh index ea16dd4024c..8d4e982504a 100755 --- a/tests/queries/0_stateless/02725_database_hdfs.sh +++ b/tests/queries/0_stateless/02725_database_hdfs.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, use-hdfs +# Tags: no-fasttest, use-hdfs, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 58cb6c7837872ae4eb46eed84d5aa0d75607d661 Mon Sep 17 00:00:00 2001 From: alekseygolub Date: Tue, 2 May 2023 19:57:36 +0000 Subject: [PATCH 0106/1997] S3, HDFS only for explicit creation --- programs/local/LocalServer.cpp | 14 -------------- .../0_stateless/02724_database_s3.reference | 5 ----- tests/queries/0_stateless/02724_database_s3.sh | 4 ---- .../0_stateless/02725_database_hdfs.reference | 4 ---- .../queries/0_stateless/02725_database_hdfs.sh | 18 ++++++------------ 5 files changed, 6 insertions(+), 39 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 0cf94892171..b413483686a 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -61,14 +61,6 @@ # include #endif -#if USE_AWS_S3 -#include -#endif - -#if USE_HDFS -#include -#endif - namespace fs = std::filesystem; @@ -165,12 +157,6 @@ static DatabasePtr createClickHouseLocalDatabaseOverlay(const String & name_, Co auto databaseCombiner = std::make_shared(name_, context_); databaseCombiner->registerNextDatabase(std::make_shared(name_, "", context_)); databaseCombiner->registerNextDatabase(std::make_shared(name_, context_)); -#if USE_AWS_S3 - databaseCombiner->registerNextDatabase(std::make_shared(name_, "", "", context_)); -#endif -#if USE_HDFS - databaseCombiner->registerNextDatabase(std::make_shared(name_, "", context_)); -#endif return databaseCombiner; } diff --git a/tests/queries/0_stateless/02724_database_s3.reference b/tests/queries/0_stateless/02724_database_s3.reference index b3800a27305..72ba0e240b1 100644 --- a/tests/queries/0_stateless/02724_database_s3.reference +++ b/tests/queries/0_stateless/02724_database_s3.reference @@ -8,11 +8,6 @@ test1 13 14 15 16 17 18 0 0 0 -10 11 12 -13 14 15 -16 17 18 -0 0 0 Test 2: check exceptions OK OK -OK diff --git a/tests/queries/0_stateless/02724_database_s3.sh b/tests/queries/0_stateless/02724_database_s3.sh index af858d140d7..2758580a355 100755 --- a/tests/queries/0_stateless/02724_database_s3.sh +++ b/tests/queries/0_stateless/02724_database_s3.sh @@ -25,12 +25,8 @@ SELECT * FROM \"http://localhost:11111/test/b.tsv\" """ ${CLICKHOUSE_CLIENT} -q "DROP DATABASE test2;" -${CLICKHOUSE_LOCAL} --query "SELECT * FROM \"http://localhost:11111/test/b.tsv\"" - ################# echo "Test 2: check exceptions" -${CLICKHOUSE_LOCAL} --query "SELECT * FROM \"http://localhost:11111/test/c.myext\"" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK" - ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test3; CREATE DATABASE test3 ENGINE = S3; diff --git a/tests/queries/0_stateless/02725_database_hdfs.reference b/tests/queries/0_stateless/02725_database_hdfs.reference index 2a2e6c20aaa..ef8adae2bbc 100644 --- a/tests/queries/0_stateless/02725_database_hdfs.reference +++ b/tests/queries/0_stateless/02725_database_hdfs.reference @@ -3,7 +3,6 @@ Test 1: select from hdfs database test1 1 2 3 test2 -4 5 6 Test 2: check exceptions OK0 OK1 @@ -11,6 +10,3 @@ OK2 OK3 OK4 OK5 -OK6 -OK7 -OK8 diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh index 8d4e982504a..a78f3e6bbdc 100755 --- a/tests/queries/0_stateless/02725_database_hdfs.sh +++ b/tests/queries/0_stateless/02725_database_hdfs.sh @@ -30,31 +30,25 @@ SELECT * FROM \"test_02725_1.tsv\" """ ${CLICKHOUSE_CLIENT} -q "SHOW DATABASES;" | grep test2 -# Check implicit call in clickhouse-local -${CLICKHOUSE_LOCAL} --query "SELECT * FROM \"hdfs://localhost:12222/test_02725_2.tsv\"" - ################# echo "Test 2: check exceptions" -${CLICKHOUSE_LOCAL} --query "SELECT * FROM \"hdfs://localhost:12222/file.myext\"" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK0" -${CLICKHOUSE_LOCAL} --query "SELECT * FROM \"hdfs://localhost:12222/test_02725_3.tsv\"" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK1" -${CLICKHOUSE_LOCAL} --query "SELECT * FROM \"hdfs://localhost:12222\"" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK2" ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test3; CREATE DATABASE test3 ENGINE = HDFS('abacaba'); -""" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK3" +""" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK0" ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test4; CREATE DATABASE test4 ENGINE = HDFS; USE test4; SELECT * FROM \"abacaba/file.tsv\" -""" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK4" +""" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK1" -${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK5" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK6" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1| grep -F "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK7" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222\`" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK8" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK2" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK3" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1| grep -F "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK4" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222\`" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK5" # Cleanup From f43e916d42db350545d330bfb16ea57a9bd5e0d4 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 2 May 2023 22:07:30 +0000 Subject: [PATCH 0107/1997] impl --- .../hdfs_configs/bootstrap.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh b/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh index 687ddd8fb46..db6921bc1c8 100755 --- a/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh +++ b/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh @@ -111,6 +111,23 @@ cat > /usr/local/hadoop/etc/hadoop/hdfs-site.xml << EOF dfs.datanode.http.address 0.0.0.0:1006 + + + dfs.datanode.ipc.address + 0.0.0.0:0 + + + dfs.namenode.secondary.http-address + 0.0.0.0:0 + + + dfs.namenode.backup.address + 0.0.0.0:0 + + + dfs.namenode.backup.http-address + 0.0.0.0:0 + cache @@ -60,54 +20,6 @@ 0 100 - - cache - s3_disk_2 - s3_cache_2/ - 128Mi - 0 - 100Mi - 100 - - - cache - s3_disk_3 - s3_disk_3_cache/ - 128Mi - 22548578304 - 1 - 1 - 0 - 100 - - - cache - s3_disk_4 - s3_cache_4/ - 128Mi - 1 - 1 - 0 - 100 - - - cache - s3_disk_5 - s3_cache_5/ - 128Mi - 0 - 100 - - - cache - s3_disk_6 - s3_cache_6/ - 128Mi - 0 - 1 - 100 - 100 - cache s3_disk_6 @@ -116,16 +28,6 @@ 1 100 - - cache - s3_disk_6 - s3_cache_small_segment_size/ - 128Mi - 10Ki - 0 - 1 - 100 - local_blob_storage @@ -193,34 +95,6 @@ - - -
- s3_cache_2 -
-
-
- - -
- s3_cache_3 -
-
-
- - -
- s3_cache_4 -
-
-
- - -
- s3_cache_6 -
-
-
@@ -256,13 +130,6 @@
- - -
- s3_cache_small_segment_size -
-
-
diff --git a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql index f6671b82291..ae2cd1b8cd1 100644 --- a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql +++ b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql @@ -6,7 +6,24 @@ SYSTEM DROP FILESYSTEM CACHE; SET enable_filesystem_cache_on_write_operations=0; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_6', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; +CREATE TABLE test (key UInt32, value String) +Engine=MergeTree() +ORDER BY key +SETTINGS min_bytes_for_wide_part = 10485760, + compress_marks=false, + compress_primary_key=false, + disk = disk( + type = cache, + max_size = '128Mi', + path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + enable_bypass_cache_with_threashold = 1, + bypass_cache_threashold = 100, + cache_on_write_operations = 1, + enable_filesystem_query_cache_limit = 1, + do_not_evict_index_and_mark_files = 0, + delayed_cleanup_interval_ms = 100, + disk = 's3_disk'); + INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.sql b/tests/queries/0_stateless/02240_filesystem_query_cache.sql index 94eb4bc5ccd..f2664de5c63 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.sql +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.sql @@ -8,7 +8,23 @@ SET skip_download_if_exceeds_query_cache=1; SET filesystem_cache_max_download_size=128; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_4', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; + +CREATE TABLE test (key UInt32, value String) +Engine=MergeTree() +ORDER BY key +SETTINGS min_bytes_for_wide_part = 10485760, + compress_marks=false, + compress_primary_key=false, + disk = disk( + type = cache, + max_size = '128Mi', + path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + cache_on_write_operations= 1, + enable_filesystem_query_cache_limit = 1, + do_not_evict_index_and_mark_files = 0, + delayed_cleanup_interval_ms = 100, + disk = 's3_disk'); + INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; diff --git a/tests/queries/0_stateless/02344_describe_cache.reference b/tests/queries/0_stateless/02344_describe_cache.reference index 7561b32bae1..4302b05e136 100644 --- a/tests/queries/0_stateless/02344_describe_cache.reference +++ b/tests/queries/0_stateless/02344_describe_cache.reference @@ -1,2 +1 @@ 134217728 1048576 104857600 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ 0 -134217728 1048576 104857600 0 0 0 0 /var/lib/clickhouse/caches/s3_cache_2/ 0 diff --git a/tests/queries/0_stateless/02344_describe_cache.sql b/tests/queries/0_stateless/02344_describe_cache.sql index a687ad01394..9c5c5c10952 100644 --- a/tests/queries/0_stateless/02344_describe_cache.sql +++ b/tests/queries/0_stateless/02344_describe_cache.sql @@ -1,7 +1,4 @@ -- Tags: no-fasttest, no-parallel SYSTEM DROP FILESYSTEM CACHE 's3_cache'; -SYSTEM DROP FILESYSTEM CACHE 's3_cache_2'; - DESCRIBE FILESYSTEM CACHE 's3_cache'; -DESCRIBE FILESYSTEM CACHE 's3_cache_2'; diff --git a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh index ed66c36b823..2c526d10cc9 100755 --- a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh +++ b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh @@ -13,7 +13,22 @@ function random { ${CLICKHOUSE_CLIENT} --multiline --multiquery -q " drop table if exists ttt; -create table ttt (id Int32, value String) engine=MergeTree() order by tuple() settings storage_policy='s3_cache_small_segment_size', min_bytes_for_wide_part=0; + +CREATE TABLE ttt (id Int32, value String) +Engine=MergeTree() +ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0, + disk = disk( + type = cache, + max_size = '128Mi', + max_file_segment_size = '10Ki', + path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + cache_on_write_operations = 1, + enable_filesystem_query_cache_limit = 1, + do_not_evict_index_and_mark_files = 0, + delayed_cleanup_interval_ms = 100, + disk = 's3_disk'); + insert into ttt select number, toString(number) from numbers(100000) settings throw_on_error_from_cache_on_write_operations = 1; " From 2b08801ae9bc1ca456247282ebfe060a9df0bce4 Mon Sep 17 00:00:00 2001 From: zvonand Date: Fri, 5 May 2023 15:50:19 +0200 Subject: [PATCH 0115/1997] add timezone param --- src/IO/WriteHelpers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 9ee11d3cc9f..8a7cd72f79a 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -883,7 +883,7 @@ inline void writeText(is_enum auto x, WriteBuffer & buf) { writeText(magic_enum: inline void writeText(std::string_view x, WriteBuffer & buf) { writeString(x.data(), x.size(), buf); } -inline void writeText(const DayNum & x, WriteBuffer & buf) { writeDateText(LocalDate(x), buf); } +inline void writeText(const DayNum & x, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) { writeDateText(LocalDate(x, time_zone), buf); } inline void writeText(const LocalDate & x, WriteBuffer & buf) { writeDateText(x, buf); } inline void writeText(const LocalDateTime & x, WriteBuffer & buf) { writeDateTimeText(x, buf); } inline void writeText(const UUID & x, WriteBuffer & buf) { writeUUIDText(x, buf); } From 431b2e94a936f5021b6d834994614d8d03ddafcb Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 5 May 2023 19:10:12 +0200 Subject: [PATCH 0116/1997] Update storage_conf.xml --- tests/config/config.d/storage_conf.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 98798d5746e..923240d5a91 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -22,7 +22,7 @@ cache - s3_disk_6 + s3_disk s3_cache_small/ 1000 1 From 54ef6769f1131e49c87235c6948a39951adeae49 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 5 May 2023 18:30:08 +0000 Subject: [PATCH 0117/1997] Refactor PreparedSets [4] --- src/Interpreters/PreparedSets.cpp | 8 +- src/Interpreters/PreparedSets.h | 3 +- src/Planner/PlannerActionsVisitor.cpp | 20 +- .../optimizePrimaryKeyCondition.cpp | 2 + .../QueryPlan/ReadFromMergeTree.cpp | 173 ++++++++++++------ src/Processors/QueryPlan/ReadFromMergeTree.h | 11 +- .../QueryPlan/SourceStepWithFilter.h | 2 + .../Transforms/CreatingSetsTransform.cpp | 1 + src/Storages/MergeTree/KeyCondition.cpp | 10 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 4 +- 10 files changed, 160 insertions(+), 74 deletions(-) diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index 733eb1c24bb..5d9a0f27496 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -221,7 +221,7 @@ std::unique_ptr FutureSetFromSubquery::buildPlan(const ContextPtr & c if (set) return nullptr; - std::cerr << StackTrace().toString() << std::endl; + // std::cerr << StackTrace().toString() << std::endl; auto set_cache = context->getPreparedSetsCache(); if (set_cache) @@ -248,6 +248,10 @@ std::unique_ptr FutureSetFromSubquery::buildPlan(const ContextPtr & c auto plan = subquery.detachSource(); auto description = subquery.key; + // WriteBufferFromOwnString buf; + // plan->explainPlan(buf, {.header=true}); + // std::cerr << buf.str() << std::endl; + auto creating_set = std::make_unique( plan->getCurrentDataStream(), description, @@ -279,7 +283,7 @@ SizeLimits FutureSet::getSizeLimitsForSet(const Settings & settings, bool ordere return ordered_set ? getSizeLimitsForOrderedSet(settings) : getSizeLimitsForUnorderedSet(settings); } -FutureSetFromTuple::FutureSetFromTuple(Block block_) : block(std::move(block_)) {} +FutureSetFromTuple::FutureSetFromTuple(Block block_) : block(std::move(block_)) { std::cerr << block.dumpStructure() << std::endl; } FutureSetFromSubquery::FutureSetFromSubquery(SubqueryForSet subquery_) : subquery(std::move(subquery_)) {} diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index 8661d81a96a..b4d01754ea8 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -16,6 +16,7 @@ #include "Processors/Executors/CompletedPipelineExecutor.h" #include "Processors/QueryPlan/BuildQueryPipelineSettings.h" #include "Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h" +#include "Processors/Sinks/EmptySink.h" #include "Processors/Sinks/NullSink.h" #include @@ -171,7 +172,7 @@ public: auto builder = plan->buildQueryPipeline(QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder)); - pipeline.complete(std::make_shared(Block())); + pipeline.complete(std::make_shared(Block())); CompletedPipelineExecutor executor(pipeline); executor.execute(); diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 453b02a2f8f..e0844a6d2b1 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -629,14 +629,20 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma //auto set_key = planner_context->createSetKey(in_second_argument); - DataTypes set_element_types = {in_first_argument->getResultType()}; - const auto * left_tuple_type = typeid_cast(set_element_types.front().get()); - if (left_tuple_type && left_tuple_type->getElements().size() != 1) - set_element_types = left_tuple_type->getElements(); + DataTypes set_element_types; - for (auto & element_type : set_element_types) - if (const auto * low_cardinality_type = typeid_cast(element_type.get())) - element_type = low_cardinality_type->getDictionaryType(); + auto in_second_argument_node_type = in_second_argument->getNodeType(); + if (!(in_second_argument_node_type == QueryTreeNodeType::QUERY || in_second_argument_node_type == QueryTreeNodeType::UNION)) + { + set_element_types = {in_first_argument->getResultType()}; + const auto * left_tuple_type = typeid_cast(set_element_types.front().get()); + if (left_tuple_type && left_tuple_type->getElements().size() != 1) + set_element_types = left_tuple_type->getElements(); + + for (auto & element_type : set_element_types) + if (const auto * low_cardinality_type = typeid_cast(element_type.get())) + element_type = low_cardinality_type->getDictionaryType(); + } auto set_key = PreparedSetKey::forLiteral(in_second_argument->getTreeHash(), set_element_types); diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp index e98386a6ee9..5ef786ff975 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp @@ -28,6 +28,8 @@ void optimizePrimaryKeyCondition(const Stack & stack) else break; } + + source_step_with_filter->onAddFilterFinish(); } } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 185ec9bace8..ad63b486c7c 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -247,7 +247,7 @@ ReadFromMergeTree::ReadFromMergeTree( { /// build sort description for output stream SortDescription sort_description; - const Names & sorting_key_columns = storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(); + const Names & sorting_key_columns = metadata_for_reading->getSortingKeyColumns(); const Block & header = output_stream->header; const int sort_direction = getSortDirection(); for (const auto & column_name : sorting_key_columns) @@ -1118,7 +1118,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead(Merge prewhere_info, filter_nodes, storage_snapshot->metadata, - storage_snapshot->getMetadataForQuery(), + metadata_for_reading, query_info, context, requested_num_streams, @@ -1126,7 +1126,90 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead(Merge data, real_column_names, sample_factor_column_queried, - log); + log, + key_condition); +} + +static ActionsDAGPtr buildFilterDAG( + const ContextPtr & context, + const PrewhereInfoPtr & prewhere_info, + const ActionDAGNodes & added_filter_nodes, + const SelectQueryInfo & query_info) +{ + const auto & settings = context->getSettingsRef(); + ActionsDAG::NodeRawConstPtrs nodes; + + if (prewhere_info) + { + { + const auto & node = prewhere_info->prewhere_actions->findInOutputs(prewhere_info->prewhere_column_name); + nodes.push_back(&node); + } + + if (prewhere_info->row_level_filter) + { + const auto & node = prewhere_info->row_level_filter->findInOutputs(prewhere_info->row_level_column_name); + nodes.push_back(&node); + } + } + + for (const auto & node : added_filter_nodes.nodes) + nodes.push_back(node); + + std::unordered_map node_name_to_input_node_column; + + if (settings.allow_experimental_analyzer && query_info.planner_context) + { + const auto & table_expression_data = query_info.planner_context->getTableExpressionDataOrThrow(query_info.table_expression); + for (const auto & [column_identifier, column_name] : table_expression_data.getColumnIdentifierToColumnName()) + { + const auto & column = table_expression_data.getColumnOrThrow(column_name); + node_name_to_input_node_column.emplace(column_identifier, ColumnWithTypeAndName(column.type, column_name)); + } + } + + return ActionsDAG::buildFilterActionsDAG(nodes, node_name_to_input_node_column, context); +} + +static void buildKeyCondition( + std::optional & key_condition, + ActionsDAGPtr filter_actions_dag, + const ContextPtr & context, + const SelectQueryInfo & query_info, + const StorageMetadataPtr & metadata_snapshot) +{ + key_condition.reset(); + + // Build and check if primary key is used when necessary + const auto & primary_key = metadata_snapshot->getPrimaryKey(); + const Names & primary_key_column_names = primary_key.column_names; + + const auto & settings = context->getSettingsRef(); + if (settings.query_plan_optimize_primary_key) + { + NameSet array_join_name_set; + if (query_info.syntax_analyzer_result) + array_join_name_set = query_info.syntax_analyzer_result->getArrayJoinSourceNameSet(); + + key_condition.emplace(filter_actions_dag, + context, + primary_key_column_names, + primary_key.expression, + array_join_name_set); + } + else + { + key_condition.emplace(query_info, context, primary_key_column_names, primary_key.expression); + } +} + +void ReadFromMergeTree::onAddFilterFinish() +{ + if (!filter_nodes.nodes.empty()) + { + auto filter_actions_dag = buildFilterDAG(context, prewhere_info, filter_nodes, query_info); + buildKeyCondition(key_condition, filter_actions_dag, context, query_info, metadata_for_reading); + } } MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( @@ -1142,44 +1225,14 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( const MergeTreeData & data, const Names & real_column_names, bool sample_factor_column_queried, - Poco::Logger * log) + Poco::Logger * log, + std::optional & key_condition) { const auto & settings = context->getSettingsRef(); if (settings.allow_experimental_analyzer || settings.query_plan_optimize_primary_key) { - ActionsDAG::NodeRawConstPtrs nodes; - - if (prewhere_info) - { - { - const auto & node = prewhere_info->prewhere_actions->findInOutputs(prewhere_info->prewhere_column_name); - nodes.push_back(&node); - } - - if (prewhere_info->row_level_filter) - { - const auto & node = prewhere_info->row_level_filter->findInOutputs(prewhere_info->row_level_column_name); - nodes.push_back(&node); - } - } - - for (const auto & node : added_filter_nodes.nodes) - nodes.push_back(node); - - std::unordered_map node_name_to_input_node_column; - - if (settings.allow_experimental_analyzer && query_info.planner_context) - { - const auto & table_expression_data = query_info.planner_context->getTableExpressionDataOrThrow(query_info.table_expression); - for (const auto & [column_identifier, column_name] : table_expression_data.getColumnIdentifierToColumnName()) - { - const auto & column = table_expression_data.getColumnOrThrow(column_name); - node_name_to_input_node_column.emplace(column_identifier, ColumnWithTypeAndName(column.type, column_name)); - } - } - auto updated_query_info_with_filter_dag = query_info; - updated_query_info_with_filter_dag.filter_actions_dag = ActionsDAG::buildFilterActionsDAG(nodes, node_name_to_input_node_column, context); + updated_query_info_with_filter_dag.filter_actions_dag = buildFilterDAG(context, prewhere_info, added_filter_nodes, query_info); return selectRangesToReadImpl( parts, @@ -1192,7 +1245,8 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( data, real_column_names, sample_factor_column_queried, - log); + log, + key_condition); } return selectRangesToReadImpl( @@ -1206,7 +1260,8 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( data, real_column_names, sample_factor_column_queried, - log); + log, + key_condition); } MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( @@ -1220,7 +1275,8 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( const MergeTreeData & data, const Names & real_column_names, bool sample_factor_column_queried, - Poco::Logger * log) + Poco::Logger * log, + std::optional & key_condition) { AnalysisResult result; const auto & settings = context->getSettingsRef(); @@ -1246,24 +1302,29 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( // Build and check if primary key is used when necessary const auto & primary_key = metadata_snapshot->getPrimaryKey(); const Names & primary_key_column_names = primary_key.column_names; - std::optional key_condition; - if (settings.query_plan_optimize_primary_key) - { - NameSet array_join_name_set; - if (query_info.syntax_analyzer_result) - array_join_name_set = query_info.syntax_analyzer_result->getArrayJoinSourceNameSet(); + // if (!key_condition) + // { + // if (settings.query_plan_optimize_primary_key) + // { + // NameSet array_join_name_set; + // if (query_info.syntax_analyzer_result) + // array_join_name_set = query_info.syntax_analyzer_result->getArrayJoinSourceNameSet(); - key_condition.emplace(query_info.filter_actions_dag, - context, - primary_key_column_names, - primary_key.expression, - array_join_name_set); - } - else - { - key_condition.emplace(query_info, context, primary_key_column_names, primary_key.expression); - } + // key_condition.emplace(query_info.filter_actions_dag, + // context, + // primary_key_column_names, + // primary_key.expression, + // array_join_name_set); + // } + // else + // { + // key_condition.emplace(query_info, context, primary_key_column_names, primary_key.expression); + // } + // } + + if (!key_condition) + buildKeyCondition(key_condition, query_info.filter_actions_dag, context, query_info, metadata_snapshot); if (settings.force_primary_key && key_condition->alwaysUnknownOrTrue()) { @@ -1395,7 +1456,7 @@ bool ReadFromMergeTree::requestReadingInOrder(size_t prefix_size, int direction, /// update sort info for output stream SortDescription sort_description; - const Names & sorting_key_columns = storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(); + const Names & sorting_key_columns = metadata_for_reading->getSortingKeyColumns(); const Block & header = output_stream->header; const int sort_direction = getSortDirection(); for (const auto & column_name : sorting_key_columns) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 5e4ba117967..121970f2ca9 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -145,7 +145,8 @@ public: const MergeTreeData & data, const Names & real_column_names, bool sample_factor_column_queried, - Poco::Logger * log); + Poco::Logger * log, + std::optional & key_condition); MergeTreeDataSelectAnalysisResultPtr selectRangesToRead(MergeTreeData::DataPartsVector parts) const; @@ -177,6 +178,8 @@ public: size_t getNumStreams() const { return requested_num_streams; } bool isParallelReadingEnabled() const { return read_task_callback != std::nullopt; } + void onAddFilterFinish() override; + private: static MergeTreeDataSelectAnalysisResultPtr selectRangesToReadImpl( MergeTreeData::DataPartsVector parts, @@ -189,7 +192,8 @@ private: const MergeTreeData & data, const Names & real_column_names, bool sample_factor_column_queried, - Poco::Logger * log); + Poco::Logger * log, + std::optional & key_condition); int getSortDirection() const { @@ -228,6 +232,9 @@ private: std::shared_ptr max_block_numbers_to_read; + /// Pre-computed value, needed to trigger sets creatin for PK + mutable std::optional key_condition; + Poco::Logger * log; UInt64 selected_parts = 0; UInt64 selected_rows = 0; diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.h b/src/Processors/QueryPlan/SourceStepWithFilter.h index a363451fff2..34b6e3c6a7b 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.h +++ b/src/Processors/QueryPlan/SourceStepWithFilter.h @@ -37,6 +37,8 @@ public: filter_dags.push_back(std::move(filter_dag)); } + virtual void onAddFilterFinish() {} + protected: std::vector filter_dags; ActionDAGNodes filter_nodes; diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp index de10be599c8..6626d4b9795 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.cpp +++ b/src/Processors/Transforms/CreatingSetsTransform.cpp @@ -133,6 +133,7 @@ void CreatingSetsTransform::init() if (subquery.set) { + //std::cerr << "=========== " << getInputPort().getHeader().dumpStructure() << std::endl; subquery.set->setHeader(getInputPort().getHeader().getColumnsWithTypeAndName()); } diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 7a1c3b10c8a..efeb9e40dd4 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1204,17 +1204,17 @@ bool KeyCondition::tryPrepareSetIndex( const auto right_arg = func.getArgumentAt(1); - LOG_TRACE(&Poco::Logger::get("KK"), "Trying to get set for {}", right_arg.getColumnName()); + // LOG_TRACE(&Poco::Logger::get("KK"), "Trying to get set for {}", right_arg.getColumnName()); auto future_set = right_arg.tryGetPreparedSet(indexes_mapping, data_types); if (!future_set) return false; - LOG_TRACE(&Poco::Logger::get("KK"), "Found set for {}", right_arg.getColumnName()); + // LOG_TRACE(&Poco::Logger::get("KK"), "Found set for {}", right_arg.getColumnName()); if (!future_set->isReady()) { - LOG_TRACE(&Poco::Logger::get("KK"), "Building set inplace for {}", right_arg.getColumnName()); + // LOG_TRACE(&Poco::Logger::get("KK"), "Building set inplace for {}", right_arg.getColumnName()); future_set->buildOrderedSetInplace(right_arg.getTreeContext().getQueryContext()); } @@ -1222,13 +1222,13 @@ bool KeyCondition::tryPrepareSetIndex( if (!prepared_set) return false; - LOG_TRACE(&Poco::Logger::get("KK"), "Set if ready for {}", right_arg.getColumnName()); + // LOG_TRACE(&Poco::Logger::get("KK"), "Set if ready for {}", right_arg.getColumnName()); /// The index can be prepared if the elements of the set were saved in advance. if (!prepared_set->hasExplicitSetElements()) return false; - LOG_TRACE(&Poco::Logger::get("KK"), "Has explicit elements for {}", right_arg.getColumnName()); + // LOG_TRACE(&Poco::Logger::get("KK"), "Has explicit elements for {}", right_arg.getColumnName()); prepared_set->checkColumnsNumber(left_args_count); for (size_t i = 0; i < indexes_mapping.size(); ++i) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 22df8f298c4..aae8f843a3c 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1291,6 +1291,7 @@ MergeTreeDataSelectAnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar selectColumnNames(column_names_to_return, data, real_column_names, virt_column_names, sample_factor_column_queried); + std::optional key_condition; return ReadFromMergeTree::selectRangesToRead( std::move(parts), prewhere_info, @@ -1304,7 +1305,8 @@ MergeTreeDataSelectAnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar data, real_column_names, sample_factor_column_queried, - log); + log, + key_condition); } QueryPlanStepPtr MergeTreeDataSelectExecutor::readFromParts( From e1151f150f23e0bbcb52ae0a1a3ef01a0ecb97da Mon Sep 17 00:00:00 2001 From: alekseygolub Date: Fri, 5 May 2023 18:37:25 +0000 Subject: [PATCH 0118/1997] Fix clang build errors --- src/Databases/DatabaseS3.cpp | 2 +- src/Databases/DatabaseS3.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Databases/DatabaseS3.cpp b/src/Databases/DatabaseS3.cpp index bc318ecd9bf..96616426475 100644 --- a/src/Databases/DatabaseS3.cpp +++ b/src/Databases/DatabaseS3.cpp @@ -247,7 +247,7 @@ DatabaseS3::Configuration DatabaseS3::parseArguments(ASTs engine_args, ContextPt } else { - auto supported_signature = + const std::string supported_signature = " - S3()\n" " - S3('url')\n" " - S3('url', 'NOSIGN')\n" diff --git a/src/Databases/DatabaseS3.h b/src/Databases/DatabaseS3.h index 65f80dca2ba..4e6910566df 100644 --- a/src/Databases/DatabaseS3.h +++ b/src/Databases/DatabaseS3.h @@ -57,7 +57,7 @@ public: static Configuration parseArguments(ASTs engine_args, ContextPtr context); protected: - StoragePtr getTableImpl(const String & url, ContextPtr context) const; + StoragePtr getTableImpl(const String & name, ContextPtr context) const; void addTable(const std::string & table_name, StoragePtr table_storage) const; From 18d1a4356d2ba1e7502d0ba207e6ac8f53fc3e02 Mon Sep 17 00:00:00 2001 From: tpanetti Date: Fri, 5 May 2023 12:19:35 -0700 Subject: [PATCH 0119/1997] Change SHOW COLUMNS query to display MySQL types in MySQL Compatibility mode This updates the SHOW COLUMN SQL query to display MySQL types when this query is issued by a client connected via MySQL Compatibility port --- .../InterpreterShowColumnsQuery.cpp | 78 ++++++- .../InterpreterShowColumnsQuery.h | 1 + ...show_columns_mysql_compatibility.reference | 213 ++++++++++++++++++ .../02726_show_columns_mysql_compatibility.sh | 115 ++++++++++ 4 files changed, 405 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02726_show_columns_mysql_compatibility.reference create mode 100755 tests/queries/0_stateless/02726_show_columns_mysql_compatibility.sh diff --git a/src/Interpreters/InterpreterShowColumnsQuery.cpp b/src/Interpreters/InterpreterShowColumnsQuery.cpp index 4474be21d8b..17ccafdd1ce 100644 --- a/src/Interpreters/InterpreterShowColumnsQuery.cpp +++ b/src/Interpreters/InterpreterShowColumnsQuery.cpp @@ -26,11 +26,17 @@ String InterpreterShowColumnsQuery::getRewrittenQuery() WriteBufferFromOwnString rewritten_query; - rewritten_query << "SELECT name AS field, type AS type, startsWith(type, 'Nullable') AS null, trim(concatWithSeparator(' ', if(is_in_primary_key, 'PRI', ''), if (is_in_sorting_key, 'SOR', ''))) AS key, if(default_kind IN ('ALIAS', 'DEFAULT', 'MATERIALIZED'), default_expression, NULL) AS default, '' AS extra "; - // TODO Interpret query.extended. It is supposed to show internal/virtual columns. Need to fetch virtual column names, see // IStorage::getVirtuals(). We can't easily do that via SQL. + // If connected via MySQL Compatibility mode, convert ClickHouse types to MySQL + if (getContext()->getClientInfo().interface == DB::ClientInfo::Interface::MYSQL) + { + rewritten_query << getMySQLQuery(); + } + else { + rewritten_query << "SELECT name AS field, type AS type, startsWith(type, 'Nullable') AS null, trim(concatWithSeparator(' ', if(is_in_primary_key, 'PRI', ''), if (is_in_sorting_key, 'SOR', ''))) AS key, if(default_kind IN ('ALIAS', 'DEFAULT', 'MATERIALIZED'), default_expression, NULL) AS default, '' AS extra "; + } if (query.full) { /// "Full" mode is mostly for MySQL compat @@ -93,6 +99,74 @@ String InterpreterShowColumnsQuery::getRewrittenQuery() } +String InterpreterShowColumnsQuery::getMySQLQuery() +{ + WriteBufferFromOwnString mysql_specific_query; + + mysql_specific_query << "SELECT name AS field, " + << "CASE " + << " WHEN startsWith(type, 'Nullable') THEN " + << " CASE " + << " WHEN substring(type, 10, length(type) - 10) IN ('UInt8', 'Int8') THEN 'tinyint' " + << " WHEN substring(type, 10, length(type) - 10) IN ('UInt16', 'Int16') THEN 'smallint' " + << " WHEN substring(type, 10, length(type) - 10) IN ('UInt32', 'Int32') THEN 'int' " + << " WHEN substring(type, 10, length(type) - 10) IN ('UInt64', 'Int64', 'UInt128', 'Int128', 'UInt256', 'Int256') THEN 'bigint' " + << " WHEN substring(type, 10, length(type) - 10) = 'Float32' THEN 'float' " + << " WHEN substring(type, 10, length(type) - 10) = 'Float64' THEN 'double' " + << " WHEN substring(type, 10, length(type) - 10) LIKE 'Decimal%' THEN 'decimal' " + << " WHEN substring(type, 10, length(type) - 10) = 'Boolean' THEN 'tinyint' " + << " WHEN substring(type, 10, length(type) - 10) = 'String' THEN 'text' " + << " WHEN substring(type, 10, length(type) - 10) LIKE 'FixedString%' THEN 'text' " + << " WHEN substring(type, 10, length(type) - 10) LIKE 'Date%' THEN 'date' " + << " WHEN substring(type, 10, length(type) - 10) LIKE 'DateTime%' THEN 'datetime' " + << " WHEN substring(type, 10, length(type) - 10) = 'JSON' THEN 'json' " + << " WHEN substring(type, 10, length(type) - 10) = 'UUID' THEN 'binary' " + << " WHEN substring(type, 10, length(type) - 10) LIKE 'Enum%' THEN 'enum' " + << " WHEN substring(type, 10, length(type) - 10) LIKE 'LowCardinality%' THEN 'text' " + << " WHEN substring(type, 10, length(type) - 10) LIKE 'Array%' THEN 'json' " + << " WHEN substring(type, 10, length(type) - 10) LIKE 'Map%' THEN 'json' " + << " WHEN substring(type, 10, length(type) - 10) IN ('SimpleAggregateFunction', 'AggregateFunction') THEN 'text' " + << " WHEN substring(type, 10, length(type) - 10) = 'Nested' THEN 'json' " + << " WHEN substring(type, 10, length(type) - 10) LIKE 'Tuple%' THEN 'json' " + << " WHEN substring(type, 10, length(type) - 10) LIKE 'IPv%' THEN 'text' " + << " WHEN substring(type, 10, length(type) - 10) IN ('Expression', 'Set', 'Nothing', 'Interval') THEN 'text' " + << " ELSE substring(type, 10, length(type) - 10) " + << " END " + << " ELSE " + << " CASE " + << " WHEN type IN ('UInt8', 'Int8') THEN 'tinyint' " + << " WHEN type IN ('UInt16', 'Int16') THEN 'smallint' " + << " WHEN type IN ('UInt32', 'Int32') THEN 'int' " + << " WHEN type IN ('UInt64', 'Int64', 'UInt128', 'Int128', 'UInt256', 'Int256') THEN 'bigint' " + << " WHEN type = 'Float32' THEN 'float' " + << " WHEN type = 'Float64' THEN 'double' " + << " WHEN type LIKE 'Decimal%' THEN 'decimal' " + << " WHEN type = 'Boolean' THEN 'tinyint' " + << " WHEN type = 'String' THEN 'text' " + << " WHEN type LIKE 'FixedString%' THEN 'text' " + << " WHEN type LIKE 'Date%' THEN 'date' " + << " WHEN type LIKE 'DateTime%' THEN 'datetime' " + << " WHEN type = 'JSON' THEN 'json' " + << " WHEN type = 'UUID' THEN 'binary' " + << " WHEN type LIKE 'Enum%' THEN 'enum' " + << " WHEN type LIKE 'LowCardinality%' THEN 'text' " + << " WHEN type LIKE 'Array%' THEN 'json' " + << " WHEN type LIKE 'Map%' THEN 'json' " + << " WHEN type IN ('SimpleAggregateFunction', 'AggregateFunction') THEN 'text' " + << " WHEN type = 'Nested' THEN 'json' " + << " WHEN type LIKE 'Tuple%' THEN 'json' " + << " WHEN type LIKE 'IPv%' THEN 'text' " + << " WHEN type IN ('Expression', 'Set', 'Nothing', 'Interval') THEN 'text' " + << " ELSE type " + << " END " + << "END AS type, " + << "startsWith(type, 'Nullable') AS null, " + << "trim(concatWithSeparator(' ', if(is_in_primary_key, 'PRI', ''), if (is_in_sorting_key, 'SOR', ''))) AS key, " + << "if(default_kind IN ('ALIAS', 'DEFAULT', 'MATERIALIZED'), default_expression, NULL) AS default, " + << "'' AS extra "; + + return mysql_specific_query.str(); +} BlockIO InterpreterShowColumnsQuery::execute() { diff --git a/src/Interpreters/InterpreterShowColumnsQuery.h b/src/Interpreters/InterpreterShowColumnsQuery.h index ee6dcabd97b..b843a163978 100644 --- a/src/Interpreters/InterpreterShowColumnsQuery.h +++ b/src/Interpreters/InterpreterShowColumnsQuery.h @@ -26,6 +26,7 @@ private: ASTPtr query_ptr; String getRewrittenQuery(); + String getMySQLQuery(); }; diff --git a/tests/queries/0_stateless/02726_show_columns_mysql_compatibility.reference b/tests/queries/0_stateless/02726_show_columns_mysql_compatibility.reference new file mode 100644 index 00000000000..c9ad94a34c4 --- /dev/null +++ b/tests/queries/0_stateless/02726_show_columns_mysql_compatibility.reference @@ -0,0 +1,213 @@ +Drop tables if they exist +Create tab table +Create pseudo-random database name +Create tab duplicate table +Run MySQL test +field type null key default extra +array_value json 0 NULL +boolean_value tinyint 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value date 0 NULL +datetime_value date 0 NULL +decimal_value decimal 0 NULL +enum_value enum 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +int32 int 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value text 0 NULL +low_cardinality text 0 NULL +map_value json 0 NULL +nested.nested_int json 0 NULL +nested.nested_string json 0 NULL +nullable_value int 0 NULL +string_value text 0 NULL +tuple_value json 0 NULL +uint64 bigint 0 PRI SOR NULL +uuid_value binary 0 NULL +field type null key default extra +array_value json 0 NULL +boolean_value tinyint 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value date 0 NULL +datetime_value date 0 NULL +decimal_value decimal 0 NULL +enum_value enum 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +int32 int 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value text 0 NULL +low_cardinality text 0 NULL +map_value json 0 NULL +nested.nested_int json 0 NULL +nested.nested_string json 0 NULL +nullable_value int 0 NULL +string_value text 0 NULL +tuple_value json 0 NULL +uint64 bigint 0 PRI SOR NULL +uuid_value binary 0 NULL +field type null key default extra collation comment privileges +array_value json 0 NULL NULL +boolean_value tinyint 0 NULL NULL +date32_value date 0 NULL NULL +date_value date 0 NULL NULL +datetime64_value date 0 NULL NULL +datetime_value date 0 NULL NULL +decimal_value decimal 0 NULL NULL +enum_value enum 0 NULL NULL +fixed_string_value text 0 NULL NULL +float32 float 0 NULL NULL +float64 double 0 NULL NULL +int32 int 0 NULL NULL +ipv4_value text 0 NULL NULL +ipv6_value text 0 NULL NULL +json_value text 0 NULL NULL +low_cardinality text 0 NULL NULL +map_value json 0 NULL NULL +nested.nested_int json 0 NULL NULL +nested.nested_string json 0 NULL NULL +nullable_value int 0 NULL NULL +string_value text 0 NULL NULL +tuple_value json 0 NULL NULL +uint64 bigint 0 PRI SOR NULL NULL +uuid_value binary 0 NULL NULL +field type null key default extra +int32 int 0 NULL +nested.nested_int json 0 NULL +uint64 bigint 0 PRI SOR NULL +field type null key default extra +array_value json 0 NULL +boolean_value tinyint 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value date 0 NULL +datetime_value date 0 NULL +decimal_value decimal 0 NULL +enum_value enum 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value text 0 NULL +low_cardinality text 0 NULL +map_value json 0 NULL +nested.nested_string json 0 NULL +nullable_value int 0 NULL +string_value text 0 NULL +tuple_value json 0 NULL +uuid_value binary 0 NULL +field type null key default extra +int32 int 0 NULL +nested.nested_int json 0 NULL +uint64 bigint 0 PRI SOR NULL +field type null key default extra +array_value json 0 NULL +boolean_value tinyint 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value date 0 NULL +datetime_value date 0 NULL +decimal_value decimal 0 NULL +enum_value enum 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value text 0 NULL +low_cardinality text 0 NULL +map_value json 0 NULL +nested.nested_string json 0 NULL +nullable_value int 0 NULL +string_value text 0 NULL +tuple_value json 0 NULL +uuid_value binary 0 NULL +field type null key default extra +int32 int 0 NULL +nested.nested_int json 0 NULL +uint64 bigint 0 PRI SOR NULL +field type null key default extra +array_value json 0 NULL +field type null key default extra +array_value json 0 NULL +boolean_value tinyint 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value date 0 NULL +datetime_value date 0 NULL +decimal_value decimal 0 NULL +enum_value enum 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +int32 int 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value text 0 NULL +low_cardinality text 0 NULL +map_value json 0 NULL +nested.nested_int json 0 NULL +nested.nested_string json 0 NULL +nullable_value int 0 NULL +string_value text 0 NULL +tuple_value json 0 NULL +uint64 bigint 0 PRI SOR NULL +uuid_value binary 0 NULL +field type null key default extra +array_value json 0 NULL +boolean_value tinyint 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value date 0 NULL +datetime_value date 0 NULL +decimal_value decimal 0 NULL +enum_value enum 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +int32 int 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value text 0 NULL +low_cardinality text 0 NULL +map_value json 0 NULL +nested.nested_int json 0 NULL +nested.nested_string json 0 NULL +nullable_value int 0 NULL +string_value text 0 NULL +tuple_value json 0 NULL +uint64 bigint 0 PRI SOR NULL +uuid_value binary 0 NULL +field type null key default extra +array_value json 0 NULL +boolean_value tinyint 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value date 0 NULL +datetime_value date 0 NULL +decimal_value decimal 0 NULL +enum_value enum 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +int32 int 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value text 0 NULL +low_cardinality text 0 NULL +map_value json 0 NULL +nested.nested_int json 0 NULL +nested.nested_string json 0 NULL +nullable_value int 0 NULL +string_value text 0 NULL +tuple_value json 0 NULL +uint64 bigint 0 PRI SOR NULL +uuid_value binary 0 NULL diff --git a/tests/queries/0_stateless/02726_show_columns_mysql_compatibility.sh b/tests/queries/0_stateless/02726_show_columns_mysql_compatibility.sh new file mode 100755 index 00000000000..5324496edd3 --- /dev/null +++ b/tests/queries/0_stateless/02726_show_columns_mysql_compatibility.sh @@ -0,0 +1,115 @@ +#!/bin/bash + +# This script tests the MySQL compatibility of the SHOW COLUMNS command in ClickHouse +USER="default" +PASSWORD="" +HOST="127.0.0.1" +PORT=9004 + +# First run the clickhouse test to create the ClickHouse Tables + +echo "Drop tables if they exist" +${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS tab" +${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS database_123456789abcde" +${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS database_123456789abcde.tab" + +echo "Create tab table " +${CLICKHOUSE_LOCAL} --query " + CREATE TABLE tab + ( + uint64 UInt64, + int32 Nullable(Int32), + float32 Float32, + float64 Float64, + decimal_value Decimal(10, 2), + boolean_value UInt8, -- Use 0 for false, 1 for true + string_value String, + fixed_string_value FixedString(10), + date_value Date, + date32_value Date32, + datetime_value DateTime, + datetime64_value DateTime64(3), + json_value String, -- Store JSON as a string + uuid_value UUID, + enum_value Enum8('apple' = 1, 'banana' = 2, 'orange' = 3), + low_cardinality LowCardinality(String), + array_value Array(Int32), + map_value Map(String, Int32), + tuple_value Tuple(Int32, String), + nullable_value Nullable(Int32), + ipv4_value IPv4, + ipv6_value IPv6, + nested Nested + ( + nested_int Int32, + nested_string String + ) + ) ENGINE = MergeTree + ORDER BY uint64; + " + + +echo "Create pseudo-random database name" +${CLICKHOUSE_LOCAL} --query "CREATE DATABASE database_123456789abcde;" + +echo "Create tab duplicate table" +${CLICKHOUSE_LOCAL} --query " + CREATE TABLE database_123456789abcde.tab + ( + uint64 UInt64, + int32 Nullable(Int32), + float32 Float32, + float64 Float64, + decimal_value Decimal(10, 2), + boolean_value UInt8, -- Use 0 for false, 1 for true + string_value String, + fixed_string_value FixedString(10), + date_value Date, + date32_value Date32, + datetime_value DateTime, + datetime64_value DateTime64(3), + json_value String, -- Store JSON as a string + uuid_value UUID, + enum_value Enum8('apple' = 1, 'banana' = 2, 'orange' = 3), + low_cardinality LowCardinality(String), + array_value Array(Int32), + map_value Map(String, Int32), + tuple_value Tuple(Int32, String), + nullable_value Nullable(Int32), + ipv4_value IPv4, + ipv6_value IPv6, + nested Nested + ( + nested_int Int32, + nested_string String + ) + ) ENGINE = MergeTree + ORDER BY uint64; + " + +# Write sql to temp file +TEMP_FILE=$(mktemp) + +cat < $TEMP_FILE +SHOW COLUMNS FROM tab; +SHOW EXTENDED COLUMNS FROM tab; +SHOW FULL COLUMNS FROM tab; +SHOW COLUMNS FROM tab LIKE '%int%'; +SHOW COLUMNS FROM tab NOT LIKE '%int%'; +SHOW COLUMNS FROM tab ILIKE '%INT%'; +SHOW COLUMNS FROM tab NOT ILIKE '%INT%'; +SHOW COLUMNS FROM tab WHERE field LIKE '%int%'; +SHOW COLUMNS FROM tab LIMIT 1; +SHOW COLUMNS FROM tab; +SHOW COLUMNS FROM tab FROM database_123456789abcde; +SHOW COLUMNS FROM database_123456789abcde.tab; +DROP DATABASE database_123456789abcde; +DROP TABLE tab; +EOT + +# Now run the MySQL test script on the ClickHouse DB +echo "Run MySQL test" +mysql --user="$USER" --password="$PASSWORD" --host="$HOST" --port="$PORT" < $TEMP_FILE + +# Clean up the temp file +rm $TEMP_FILE From b2c36fc3e5a968cd3223261585cb00f89be2c783 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 5 May 2023 20:51:38 +0000 Subject: [PATCH 0120/1997] Fixing style. --- src/Interpreters/PreparedSets.cpp | 7 ++++++- src/Processors/QueryPlan/ReadFromMergeTree.h | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index 5d9a0f27496..1d7d90432b0 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -11,6 +11,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + PreparedSetKey PreparedSetKey::forLiteral(Hash hash, DataTypes types_) { /// Remove LowCardinality types from type list because Set doesn't support LowCardinality keys now, @@ -283,7 +288,7 @@ SizeLimits FutureSet::getSizeLimitsForSet(const Settings & settings, bool ordere return ordered_set ? getSizeLimitsForOrderedSet(settings) : getSizeLimitsForUnorderedSet(settings); } -FutureSetFromTuple::FutureSetFromTuple(Block block_) : block(std::move(block_)) { std::cerr << block.dumpStructure() << std::endl; } +FutureSetFromTuple::FutureSetFromTuple(Block block_) : block(std::move(block_)) {} FutureSetFromSubquery::FutureSetFromSubquery(SubqueryForSet subquery_) : subquery(std::move(subquery_)) {} diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 121970f2ca9..f13f75bfebc 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -232,7 +232,7 @@ private: std::shared_ptr max_block_numbers_to_read; - /// Pre-computed value, needed to trigger sets creatin for PK + /// Pre-computed value, needed to trigger sets creating for PK mutable std::optional key_condition; Poco::Logger * log; From 8c0b634a644ac85832658cc6ca863909ef455795 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sun, 7 May 2023 14:38:27 +0200 Subject: [PATCH 0121/1997] Update storage_conf.xml --- tests/config/config.d/storage_conf.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 923240d5a91..aad93a017c2 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -72,7 +72,7 @@ cache - s3_cache_5 + s3_cache s3_cache_multi/ 22548578304 0 From 726222f1ea69018115642156a06c64ec546244d0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 May 2023 19:33:11 +0200 Subject: [PATCH 0122/1997] Fix tests --- tests/queries/0_stateless/00956_sensitive_data_masking.sh | 2 +- tests/queries/0_stateless/01107_atomic_db_detach_attach.sh | 4 ++-- tests/queries/0_stateless/01114_database_atomic.sh | 6 +++--- .../queries/0_stateless/01192_rename_database_zookeeper.sh | 4 ++-- tests/queries/0_stateless/01238_http_memory_tracking.sh | 2 +- tests/queries/0_stateless/01338_long_select_and_alter.sh | 2 +- .../0_stateless/01338_long_select_and_alter_zookeeper.sh | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/queries/0_stateless/00956_sensitive_data_masking.sh b/tests/queries/0_stateless/00956_sensitive_data_masking.sh index a31a71ce381..926557e4ba6 100755 --- a/tests/queries/0_stateless/00956_sensitive_data_masking.sh +++ b/tests/queries/0_stateless/00956_sensitive_data_masking.sh @@ -65,7 +65,7 @@ echo 5 # run in background rm -f "$tmp_file2" >/dev/null 2>&1 bash -c "$CLICKHOUSE_CLIENT \ - --function_sleep_max_microseconds_per_block 60 \ + --function_sleep_max_microseconds_per_block 60000000 \ --query=\"select sleepEachRow(1) from numbers(10) where ignore('find_me_TOPSECRET=TOPSECRET')=0 and ignore('fwerkh_that_magic_string_make_me_unique') = 0 FORMAT Null\" \ --log_queries=1 --ignore-error --multiquery |& grep -v '^(query: ' > $tmp_file2" & diff --git a/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh b/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh index e2a23258584..bcaa70abbb5 100755 --- a/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh +++ b/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS test_01107" $CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01107 ENGINE=Atomic" $CLICKHOUSE_CLIENT -q "CREATE TABLE test_01107.mt (n UInt64) ENGINE=MergeTree() ORDER BY tuple()" -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(3) FROM numbers(5)" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(3) FROM numbers(5)" & sleep 1 $CLICKHOUSE_CLIENT -q "DETACH TABLE test_01107.mt" --database_atomic_wait_for_drop_and_detach_synchronously=0 @@ -23,7 +23,7 @@ $CLICKHOUSE_CLIENT -q "DETACH DATABASE test_01107" --database_atomic_wait_for_dr $CLICKHOUSE_CLIENT -q "ATTACH DATABASE test_01107" $CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01107.mt" -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(1) FROM numbers(5)" && echo "end" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(1) FROM numbers(5)" && echo "end" & sleep 1 $CLICKHOUSE_CLIENT -q "DROP DATABASE test_01107" --database_atomic_wait_for_drop_and_detach_synchronously=0 && sleep 1 && echo "dropped" wait diff --git a/tests/queries/0_stateless/01114_database_atomic.sh b/tests/queries/0_stateless/01114_database_atomic.sh index 634b19a7624..decbe136fc4 100755 --- a/tests/queries/0_stateless/01114_database_atomic.sh +++ b/tests/queries/0_stateless/01114_database_atomic.sh @@ -49,8 +49,8 @@ $CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW $CLICKHOUSE_CLIENT -q "SELECT name, uuid, create_table_query FROM system.tables WHERE database='test_01114_2'" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g" -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "SELECT count(col), sum(col) FROM (SELECT n + sleepEachRow(1.5) AS col FROM test_01114_1.mt)" & # 33s (1.5s * 22 rows per partition), result: 110, 5995 -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "INSERT INTO test_01114_2.mt SELECT number + sleepEachRow(1.5) FROM numbers(30)" & # 45s (1.5s * 30 rows) +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "SELECT count(col), sum(col) FROM (SELECT n + sleepEachRow(1.5) AS col FROM test_01114_1.mt)" & # 33s (1.5s * 22 rows per partition), result: 110, 5995 +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "INSERT INTO test_01114_2.mt SELECT number + sleepEachRow(1.5) FROM numbers(30)" & # 45s (1.5s * 30 rows) sleep 1 # SELECT and INSERT should start before the following RENAMEs $CLICKHOUSE_CLIENT -nm -q " @@ -74,7 +74,7 @@ INSERT INTO test_01114_1.mt SELECT 's' || toString(number) FROM numbers(5); SELECT count() FROM test_01114_1.mt " # result: 5 -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "SELECT tuple(s, sleepEachRow(3)) FROM test_01114_1.mt" > /dev/null & # 15s (3s * 5 rows) +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "SELECT tuple(s, sleepEachRow(3)) FROM test_01114_1.mt" > /dev/null & # 15s (3s * 5 rows) sleep 1 $CLICKHOUSE_CLIENT -q "DROP DATABASE test_01114_1" --database_atomic_wait_for_drop_and_detach_synchronously=0 && echo "dropped" diff --git a/tests/queries/0_stateless/01192_rename_database_zookeeper.sh b/tests/queries/0_stateless/01192_rename_database_zookeeper.sh index ac516e83c84..6dd7ff3cdc8 100755 --- a/tests/queries/0_stateless/01192_rename_database_zookeeper.sh +++ b/tests/queries/0_stateless/01192_rename_database_zookeeper.sh @@ -20,7 +20,7 @@ $CLICKHOUSE_CLIENT -q "SELECT engine, splitByChar('/', data_path)[-2], uuid, spl # 3. check RENAME don't wait for INSERT $CLICKHOUSE_CLIENT -q "CREATE TABLE test_01192.mt (n UInt64) ENGINE=MergeTree ORDER BY n" -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 15 -q "INSERT INTO test_01192.mt SELECT number + sleepEachRow(1.5) FROM numbers(10)" && echo "inserted" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 15000000 -q "INSERT INTO test_01192.mt SELECT number + sleepEachRow(1.5) FROM numbers(10)" && echo "inserted" & sleep 1 $CLICKHOUSE_CLIENT -q "RENAME DATABASE test_01192 TO default" 2>&1| grep -F "already exists" > /dev/null && echo "ok" @@ -60,7 +60,7 @@ $CLICKHOUSE_CLIENT -q "SELECT database, name, status, origin FROM system.diction $CLICKHOUSE_CLIENT -q "SELECT dictGet('test_01192_atomic.dict', '_part', toUInt64(1))" # 8. check RENAME don't wait for INSERT -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10 -q "INSERT INTO test_01192_atomic.mt SELECT number + sleepEachRow(1) + 10 FROM numbers(10)" && echo "inserted" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 -q "INSERT INTO test_01192_atomic.mt SELECT number + sleepEachRow(1) + 10 FROM numbers(10)" && echo "inserted" & sleep 1 $CLICKHOUSE_CLIENT --check_table_dependencies=0 -q "RENAME DATABASE test_01192 TO test_01192_renamed" 2>&1| grep -F "not supported" > /dev/null && echo "ok" diff --git a/tests/queries/0_stateless/01238_http_memory_tracking.sh b/tests/queries/0_stateless/01238_http_memory_tracking.sh index eb42159ce15..26d3dd8acd4 100755 --- a/tests/queries/0_stateless/01238_http_memory_tracking.sh +++ b/tests/queries/0_stateless/01238_http_memory_tracking.sh @@ -10,7 +10,7 @@ set -o pipefail # This is needed to keep at least one running query for user for the time of test. # (1k http queries takes ~1 second, let's run for 5x more to avoid flaps) -${CLICKHOUSE_CLIENT} --function_sleep_max_microseconds_per_block 5 --format Null -n <<<'SELECT sleepEachRow(1) FROM numbers(5)' & +${CLICKHOUSE_CLIENT} --function_sleep_max_microseconds_per_block 5000000 --format Null -n <<<'SELECT sleepEachRow(1) FROM numbers(5)' & # ignore "yes: standard output: Broken pipe" yes 'SELECT 1' 2>/dev/null | { diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.sh b/tests/queries/0_stateless/01338_long_select_and_alter.sh index 04a10cfe55e..fcdfa2dec82 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENG $CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) FROM numbers(5)" -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & # to be sure that select took all required locks sleep 2 diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh index 829352110f6..50ade3fad45 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENG $CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) FROM numbers(5)" -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & # to be sure that select took all required locks sleep 2 From e159ee84e918c587f873a27665ca346cb3b4f7db Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 May 2023 19:38:30 +0200 Subject: [PATCH 0123/1997] Fix tests --- .../0_stateless/01098_temporary_and_external_tables.sh | 2 +- .../01532_execute_merges_on_single_replica_long.sql | 2 +- tests/queries/0_stateless/02473_optimize_old_parts.sh | 2 +- tests/queries/0_stateless/02530_dictionaries_update_field.sh | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01098_temporary_and_external_tables.sh b/tests/queries/0_stateless/01098_temporary_and_external_tables.sh index 860529a26e5..9ed78fd9f81 100755 --- a/tests/queries/0_stateless/01098_temporary_and_external_tables.sh +++ b/tests/queries/0_stateless/01098_temporary_and_external_tables.sh @@ -25,7 +25,7 @@ echo "SELECT COUNT() FROM $internal_table_name" | ${CLICKHOUSE_CURL} -m 60 -sSgk echo -ne '0\n1\n' | ${CLICKHOUSE_CURL} -m 30 -sSkF 'file=@-' "$url&file_format=CSV&file_types=UInt64&query=SELECT+sum((number+GLOBAL+IN+(SELECT+number+AS+n+FROM+remote('127.0.0.2',+numbers(5))+WHERE+n+GLOBAL+IN+(SELECT+*+FROM+tmp_table)+AND+n+GLOBAL+NOT+IN+(SELECT+*+FROM+file)+))+AS+res),+sum(number*res)+FROM+remote('127.0.0.2',+numbers(10))" -echo -ne '0\n1\n' | ${CLICKHOUSE_CURL} -m 30 -sSkF 'file=@-' "$url&file_format=CSV&file_types=UInt64&query=SELECT+_1%2BsleepEachRow(3)+FROM+file" & +echo -ne '0\n1\n' | ${CLICKHOUSE_CURL} -m 30 -sSkF 'file=@-' "$url&function_sleep_max_microseconds_per_block=0&file_format=CSV&file_types=UInt64&query=SELECT+_1%2BsleepEachRow(3)+FROM+file" & wait ${CLICKHOUSE_CURL} -m 30 -sSk "$url" --data "DROP TEMPORARY TABLE tmp_table" diff --git a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql index 4bd5e79d1b3..30beb29251e 100644 --- a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql +++ b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql @@ -44,7 +44,7 @@ SYSTEM STOP REPLICATION QUEUES execute_on_single_replica_r2; OPTIMIZE TABLE execute_on_single_replica_r1 FINAL SETTINGS replication_alter_partitions_sync=0; /* if we will check immediately we can find the log entry unchecked */ -SET function_sleep_max_microseconds_per_block = 4000000; +SET function_sleep_max_microseconds_per_block = 10000000; SELECT * FROM numbers(4) where sleepEachRow(1); SELECT '****************************'; diff --git a/tests/queries/0_stateless/02473_optimize_old_parts.sh b/tests/queries/0_stateless/02473_optimize_old_parts.sh index 0c2dd04d024..b563bc31b39 100755 --- a/tests/queries/0_stateless/02473_optimize_old_parts.sh +++ b/tests/queries/0_stateless/02473_optimize_old_parts.sh @@ -61,7 +61,7 @@ INSERT INTO test_with_merge SELECT 3;" wait_for_number_of_parts 'test_with_merge' 1 100 $CLICKHOUSE_CLIENT -nmq " -SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; -- Sleep for 9 seconds and verify that we keep the old part because it's the only one +SELECT sleepEachRow(1) FROM numbers(9) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -- Sleep for 9 seconds and verify that we keep the old part because it's the only one SELECT (now() - modification_time) > 5 FROM system.parts WHERE database = currentDatabase() AND table='test_with_merge' AND active; DROP TABLE test_with_merge;" diff --git a/tests/queries/0_stateless/02530_dictionaries_update_field.sh b/tests/queries/0_stateless/02530_dictionaries_update_field.sh index 569466fe606..44000e5d2cd 100755 --- a/tests/queries/0_stateless/02530_dictionaries_update_field.sh +++ b/tests/queries/0_stateless/02530_dictionaries_update_field.sh @@ -53,13 +53,13 @@ for layout in "${layouts[@]}"; do SELECT key, value FROM $dictionary_name ORDER BY key ASC; INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); - SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; + SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM $dictionary_name ORDER BY key ASC; INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); - SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; + SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM $dictionary_name ORDER BY key ASC; -- { echoOff } From fbda7974a5424b79a952fa30b16b7cd3c390bdc8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 May 2023 19:39:23 +0200 Subject: [PATCH 0124/1997] Fix tests --- .../queries/0_stateless/02676_optimize_old_parts_replicated.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02676_optimize_old_parts_replicated.sh b/tests/queries/0_stateless/02676_optimize_old_parts_replicated.sh index 2202a349c56..c1f28f9f079 100755 --- a/tests/queries/0_stateless/02676_optimize_old_parts_replicated.sh +++ b/tests/queries/0_stateless/02676_optimize_old_parts_replicated.sh @@ -61,7 +61,7 @@ INSERT INTO test_replicated SELECT 3;" wait_for_number_of_parts 'test_replicated' 1 100 $CLICKHOUSE_CLIENT -nmq " -SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; -- Sleep for 9 seconds and verify that we keep the old part because it's the only one +SELECT sleepEachRow(1) FROM numbers(9) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -- Sleep for 9 seconds and verify that we keep the old part because it's the only one SELECT (now() - modification_time) > 5 FROM system.parts WHERE database = currentDatabase() AND table='test_replicated' AND active; DROP TABLE test_replicated;" From 08a9d97de74a27bd28d7cc387d7f5cdba707d6cb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 May 2023 19:40:43 +0200 Subject: [PATCH 0125/1997] Fix tests --- tests/queries/0_stateless/02352_rwlock.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02352_rwlock.sh b/tests/queries/0_stateless/02352_rwlock.sh index 7a0b9ef8911..7505a03a382 100755 --- a/tests/queries/0_stateless/02352_rwlock.sh +++ b/tests/queries/0_stateless/02352_rwlock.sh @@ -51,7 +51,7 @@ while :; do insert_query_id="insert-$(random_str 10)" # 20 seconds sleep - $CLICKHOUSE_CLIENT --query_id "$insert_query_id" -q "INSERT INTO ${CLICKHOUSE_DATABASE}_ordinary.data_02352 SELECT sleepEachRow(1) FROM numbers(20) GROUP BY number" & + $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 20000000 --query_id "$insert_query_id" -q "INSERT INTO ${CLICKHOUSE_DATABASE}_ordinary.data_02352 SELECT sleepEachRow(1) FROM numbers(20) GROUP BY number" & if ! wait_query_by_id_started "$insert_query_id"; then wait continue From 0818092ae8d49f2e7f87fed6c8703374384719fc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 May 2023 19:45:57 +0200 Subject: [PATCH 0126/1997] Enable Sparse columns by default --- src/Storages/MergeTree/MergeTreeSettings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 5416b77a97e..27f482d79ba 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -37,7 +37,7 @@ struct Settings; M(UInt64, min_rows_for_compact_part, 0, "Experimental. Minimal number of rows to create part in compact format instead of saving it in RAM", 0) \ M(Bool, in_memory_parts_enable_wal, true, "Whether to write blocks in Native format to write-ahead-log before creation in-memory part", 0) \ M(UInt64, write_ahead_log_max_bytes, 1024 * 1024 * 1024, "Rotate WAL, if it exceeds that amount of bytes", 0) \ - M(Float, ratio_of_defaults_for_sparse_serialization, 1.0, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \ + M(Float, ratio_of_defaults_for_sparse_serialization, 0.95, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \ \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ From 7ec98205b58ab36eb28b2f46348dfcfe22215a3c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 May 2023 22:54:14 +0300 Subject: [PATCH 0127/1997] Update MergeTreeSettings.h --- src/Storages/MergeTree/MergeTreeSettings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 27f482d79ba..caac86c6706 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -37,7 +37,7 @@ struct Settings; M(UInt64, min_rows_for_compact_part, 0, "Experimental. Minimal number of rows to create part in compact format instead of saving it in RAM", 0) \ M(Bool, in_memory_parts_enable_wal, true, "Whether to write blocks in Native format to write-ahead-log before creation in-memory part", 0) \ M(UInt64, write_ahead_log_max_bytes, 1024 * 1024 * 1024, "Rotate WAL, if it exceeds that amount of bytes", 0) \ - M(Float, ratio_of_defaults_for_sparse_serialization, 0.95, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \ + M(Float, ratio_of_defaults_for_sparse_serialization, 0.9375f, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \ \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ From f3f6ccd7733aa4946c339b4973210f85243e44d1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 May 2023 00:28:54 +0200 Subject: [PATCH 0128/1997] Update tests --- .../0_stateless/00443_preferred_block_size_bytes.sh | 6 +++--- ...0484_preferred_max_column_in_block_size_bytes.sql | 8 ++++---- .../00804_test_delta_codec_compression.sql | 12 ++++++------ .../0_stateless/00950_test_double_delta_codec.sql | 2 +- ...00961_checksums_in_system_parts_columns_table.sql | 2 +- .../0_stateless/01055_compact_parts_granularity.sh | 2 +- .../queries/0_stateless/01786_explain_merge_tree.sh | 4 ++-- tests/queries/0_stateless/02263_lazy_mark_load.sh | 2 +- .../0_stateless/02293_selected_rows_and_merges.sh | 8 +++----- .../0_stateless/02361_fsync_profile_events.sh | 7 ++++--- .../02381_compress_marks_and_primary_key.sql | 4 ++-- 11 files changed, 28 insertions(+), 29 deletions(-) diff --git a/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh b/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh index c184b58bf53..27b9f5c00c7 100755 --- a/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh +++ b/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS preferred_block_size_bytes" -$CLICKHOUSE_CLIENT -q "CREATE TABLE preferred_block_size_bytes (p Date, s String) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=1, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0" +$CLICKHOUSE_CLIENT -q "CREATE TABLE preferred_block_size_bytes (p Date, s String) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=1, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1" $CLICKHOUSE_CLIENT -q "INSERT INTO preferred_block_size_bytes (s) SELECT '16_bytes_-_-_-_' AS s FROM system.numbers LIMIT 10, 90" $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE preferred_block_size_bytes" $CLICKHOUSE_CLIENT --preferred_block_size_bytes=26 -q "SELECT DISTINCT blockSize(), ignore(p, s) FROM preferred_block_size_bytes" @@ -19,7 +19,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS preferred_block_size_bytes" # PREWHERE using empty column $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS pbs" -$CLICKHOUSE_CLIENT -q "CREATE TABLE pbs (p Date, i UInt64, sa Array(String)) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=100, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0" +$CLICKHOUSE_CLIENT -q "CREATE TABLE pbs (p Date, i UInt64, sa Array(String)) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=100, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1" $CLICKHOUSE_CLIENT -q "INSERT INTO pbs (p, i, sa) SELECT toDate(i % 30) AS p, number AS i, ['a'] AS sa FROM system.numbers LIMIT 1000" $CLICKHOUSE_CLIENT -q "ALTER TABLE pbs ADD COLUMN s UInt8 DEFAULT 0" $CLICKHOUSE_CLIENT --preferred_block_size_bytes=100000 -q "SELECT count() FROM pbs PREWHERE s = 0" @@ -30,7 +30,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE pbs" # Nullable PREWHERE $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS nullable_prewhere" -$CLICKHOUSE_CLIENT -q "CREATE TABLE nullable_prewhere (p Date, f Nullable(UInt64), d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=8, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0" +$CLICKHOUSE_CLIENT -q "CREATE TABLE nullable_prewhere (p Date, f Nullable(UInt64), d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=8, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1" $CLICKHOUSE_CLIENT -q "INSERT INTO nullable_prewhere SELECT toDate(0) AS p, if(number % 2 = 0, CAST(number AS Nullable(UInt64)), CAST(NULL AS Nullable(UInt64))) AS f, number as d FROM system.numbers LIMIT 1001" $CLICKHOUSE_CLIENT -q "SELECT sum(d), sum(f), max(d) FROM nullable_prewhere PREWHERE NOT isNull(f)" $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS nullable_prewhere" diff --git a/tests/queries/0_stateless/00484_preferred_max_column_in_block_size_bytes.sql b/tests/queries/0_stateless/00484_preferred_max_column_in_block_size_bytes.sql index 470bca70e06..be4af2221a5 100644 --- a/tests/queries/0_stateless/00484_preferred_max_column_in_block_size_bytes.sql +++ b/tests/queries/0_stateless/00484_preferred_max_column_in_block_size_bytes.sql @@ -1,7 +1,7 @@ -- Tags: no-random-settings drop table if exists tab_00484; -create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0; +create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1; insert into tab_00484 select today(), number, toFixedString('', 128) from system.numbers limit 8192; set preferred_block_size_bytes = 2000000; @@ -17,19 +17,19 @@ set preferred_max_column_in_block_size_bytes = 4194304; select max(blockSize()), min(blockSize()), any(ignore(*)) from tab_00484; drop table if exists tab_00484; -create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0; +create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1; insert into tab_00484 select today(), number, toFixedString('', 128) from system.numbers limit 47; set preferred_max_column_in_block_size_bytes = 1152; select blockSize(), * from tab_00484 where x = 1 or x > 36 format Null; drop table if exists tab_00484; -create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0; +create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1; insert into tab_00484 select today(), number, toFixedString('', 128) from system.numbers limit 10; set preferred_max_column_in_block_size_bytes = 128; select s from tab_00484 where s == '' format Null; drop table if exists tab_00484; -create table tab_00484 (date Date, x UInt64, s String) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0; +create table tab_00484 (date Date, x UInt64, s String) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1; insert into tab_00484 select today(), number, 'abc' from system.numbers limit 81920; set preferred_block_size_bytes = 0; select count(*) from tab_00484 prewhere s != 'abc' format Null; diff --git a/tests/queries/0_stateless/00804_test_delta_codec_compression.sql b/tests/queries/0_stateless/00804_test_delta_codec_compression.sql index 25988f6474b..01a2f53bf93 100644 --- a/tests/queries/0_stateless/00804_test_delta_codec_compression.sql +++ b/tests/queries/0_stateless/00804_test_delta_codec_compression.sql @@ -9,12 +9,12 @@ DROP TABLE IF EXISTS default_codec_synthetic; CREATE TABLE delta_codec_synthetic ( id UInt64 Codec(Delta, ZSTD(3)) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; CREATE TABLE default_codec_synthetic ( id UInt64 Codec(ZSTD(3)) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; INSERT INTO delta_codec_synthetic SELECT number FROM system.numbers LIMIT 5000000; INSERT INTO default_codec_synthetic SELECT number FROM system.numbers LIMIT 5000000; @@ -47,12 +47,12 @@ DROP TABLE IF EXISTS default_codec_float; CREATE TABLE delta_codec_float ( id Float64 Codec(Delta, LZ4HC) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; CREATE TABLE default_codec_float ( id Float64 Codec(LZ4HC) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; INSERT INTO delta_codec_float SELECT number FROM numbers(1547510400, 500000) WHERE number % 3 == 0 OR number % 5 == 0 OR number % 7 == 0 OR number % 11 == 0; INSERT INTO default_codec_float SELECT * from delta_codec_float; @@ -85,12 +85,12 @@ DROP TABLE IF EXISTS default_codec_string; CREATE TABLE delta_codec_string ( id Float64 Codec(Delta, LZ4) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; CREATE TABLE default_codec_string ( id Float64 Codec(LZ4) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; INSERT INTO delta_codec_string SELECT concat(toString(number), toString(number % 100)) FROM numbers(1547510400, 500000); INSERT INTO default_codec_string SELECT * from delta_codec_string; diff --git a/tests/queries/0_stateless/00950_test_double_delta_codec.sql b/tests/queries/0_stateless/00950_test_double_delta_codec.sql index f6199a6e4ec..58cf35b5248 100644 --- a/tests/queries/0_stateless/00950_test_double_delta_codec.sql +++ b/tests/queries/0_stateless/00950_test_double_delta_codec.sql @@ -24,7 +24,7 @@ CREATE TABLE codecTest ( valueI8 Int8 CODEC(DoubleDelta), valueDT DateTime CODEC(DoubleDelta), valueD Date CODEC(DoubleDelta) -) Engine = MergeTree ORDER BY key SETTINGS min_bytes_for_wide_part = 0; +) Engine = MergeTree ORDER BY key SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1; -- checking for overflow diff --git a/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql b/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql index 43b7775e816..8df7d728560 100644 --- a/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql +++ b/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql @@ -4,7 +4,7 @@ DROP TABLE IF EXISTS test_00961; CREATE TABLE test_00961 (d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = MergeTree PARTITION BY d ORDER BY (a, b) - SETTINGS index_granularity = 111, min_bytes_for_wide_part = 0, compress_marks = 0, compress_primary_key = 0, index_granularity_bytes = '10Mi'; + SETTINGS index_granularity = 111, min_bytes_for_wide_part = 0, compress_marks = 0, compress_primary_key = 0, index_granularity_bytes = '10Mi', ratio_of_defaults_for_sparse_serialization = 1; INSERT INTO test_00961 VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789); diff --git a/tests/queries/0_stateless/01055_compact_parts_granularity.sh b/tests/queries/0_stateless/01055_compact_parts_granularity.sh index f3da33f6ccf..3e5da1e6f90 100755 --- a/tests/queries/0_stateless/01055_compact_parts_granularity.sh +++ b/tests/queries/0_stateless/01055_compact_parts_granularity.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS mt_compact" $CLICKHOUSE_CLIENT -q "CREATE TABLE mt_compact(a Int, s String) ENGINE = MergeTree ORDER BY a SETTINGS min_rows_for_wide_part = 1000, - index_granularity = 14;" + index_granularity = 14, ratio_of_defaults_for_sparse_serialization = 1;" $CLICKHOUSE_CLIENT -q "SYSTEM STOP MERGES mt_compact" diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.sh b/tests/queries/0_stateless/01786_explain_merge_tree.sh index 15f8821d80d..0d4acba338a 100755 --- a/tests/queries/0_stateless/01786_explain_merge_tree.sh +++ b/tests/queries/0_stateless/01786_explain_merge_tree.sh @@ -10,7 +10,7 @@ CLICKHOUSE_CLIENT="$CLICKHOUSE_CLIENT --optimize_move_to_prewhere=1 --convert_qu $CLICKHOUSE_CLIENT -q "drop table if exists test_index" $CLICKHOUSE_CLIENT -q "drop table if exists idx" -$CLICKHOUSE_CLIENT -q "create table test_index (x UInt32, y UInt32, z UInt32, t UInt32, index t_minmax t % 20 TYPE minmax GRANULARITY 2, index t_set t % 19 type set(4) granularity 2) engine = MergeTree order by (x, y) partition by (y, bitAnd(z, 3), intDiv(t, 15)) settings index_granularity = 2, min_bytes_for_wide_part = 0" +$CLICKHOUSE_CLIENT -q "create table test_index (x UInt32, y UInt32, z UInt32, t UInt32, index t_minmax t % 20 TYPE minmax GRANULARITY 2, index t_set t % 19 type set(4) granularity 2) engine = MergeTree order by (x, y) partition by (y, bitAnd(z, 3), intDiv(t, 15)) settings index_granularity = 2, min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1" $CLICKHOUSE_CLIENT -q "insert into test_index select number, number > 3 ? 3 : number, number = 1 ? 1 : 0, number from numbers(20)" $CLICKHOUSE_CLIENT -q " @@ -35,7 +35,7 @@ $CLICKHOUSE_CLIENT -q " explain actions = 1 select x from test_index where x > 15 order by x desc; " | grep -A 100 "ReadFromMergeTree" -$CLICKHOUSE_CLIENT -q "CREATE TABLE idx (x UInt32, y UInt32, z UInt32) ENGINE = MergeTree ORDER BY (x, x + y) settings min_bytes_for_wide_part = 0" +$CLICKHOUSE_CLIENT -q "CREATE TABLE idx (x UInt32, y UInt32, z UInt32) ENGINE = MergeTree ORDER BY (x, x + y) settings min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1" $CLICKHOUSE_CLIENT -q "insert into idx select number, number, number from numbers(10)" $CLICKHOUSE_CLIENT -q " diff --git a/tests/queries/0_stateless/02263_lazy_mark_load.sh b/tests/queries/0_stateless/02263_lazy_mark_load.sh index bf37556bfa6..35a1b4a44dd 100755 --- a/tests/queries/0_stateless/02263_lazy_mark_load.sh +++ b/tests/queries/0_stateless/02263_lazy_mark_load.sh @@ -24,7 +24,7 @@ CREATE TABLE lazy_mark_test n9 UInt64 ) ENGINE = MergeTree -ORDER BY n0 SETTINGS min_bytes_for_wide_part = 0; +ORDER BY n0 SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1; EOF ${CLICKHOUSE_CLIENT} -q "SYSTEM STOP MERGES lazy_mark_test" diff --git a/tests/queries/0_stateless/02293_selected_rows_and_merges.sh b/tests/queries/0_stateless/02293_selected_rows_and_merges.sh index 9d1483f5bf7..76c562c9744 100755 --- a/tests/queries/0_stateless/02293_selected_rows_and_merges.sh +++ b/tests/queries/0_stateless/02293_selected_rows_and_merges.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) query_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(reverse(reinterpretAsString(generateUUIDv4()))))") -${CLICKHOUSE_CLIENT} -q "create table tt (x UInt32, y UInt32) engine = MergeTree order by x" +${CLICKHOUSE_CLIENT} -q "create table tt (x UInt32, y UInt32) engine = MergeTree order by x SETTINGS ratio_of_defaults_for_sparse_serialization = 1" ${CLICKHOUSE_CLIENT} -q "insert into tt select number, 0 from numbers(1e6)" ${CLICKHOUSE_CLIENT} -q "insert into tt select number, 1 from numbers(1e6)" @@ -17,13 +17,11 @@ ${CLICKHOUSE_CLIENT} --optimize_throw_if_noop 1 -q "optimize table tt final" "-- # Here SelectRows and SelectBytes should be zero, MergedRows is 2m and MergedUncompressedBytes is 16m ${CLICKHOUSE_CLIENT} -q "system flush logs" -${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'], ProfileEvents['SelecteBytes'], ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'optimize%' and current_database = currentDatabase()" +${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'], ProfileEvents['SelectedBytes'], ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'optimize%' and current_database = currentDatabase()" ${CLICKHOUSE_CLIENT} --mutations_sync 1 -q "alter table tt update y = y + 1 where 1" "--query_id=$query_id" ${CLICKHOUSE_CLIENT} -q "system flush logs" # Here for mutation all values are 0, cause mutation is executed async. # It's pretty hard to write a test with total counter. -${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'] > 10, ProfileEvents['SelecteBytes'], ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'alter%' and current_database = currentDatabase()" - - +${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'] > 10, ProfileEvents['SelectedBytes'], ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'alter%' and current_database = currentDatabase()" diff --git a/tests/queries/0_stateless/02361_fsync_profile_events.sh b/tests/queries/0_stateless/02361_fsync_profile_events.sh index 5b603133f6c..e150d70b896 100755 --- a/tests/queries/0_stateless/02361_fsync_profile_events.sh +++ b/tests/queries/0_stateless/02361_fsync_profile_events.sh @@ -12,9 +12,10 @@ $CLICKHOUSE_CLIENT -nm -q " create table data_fsync_pe (key Int) engine=MergeTree() order by key settings - min_rows_for_wide_part=2, - fsync_after_insert=1, - fsync_part_directory=1; + min_rows_for_wide_part = 2, + fsync_after_insert = 1, + fsync_part_directory = 1, + ratio_of_defaults_for_sparse_serialization = 1; " ret=1 diff --git a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql index 842e22ba87d..2fe0943745d 100644 --- a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql +++ b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql @@ -1,12 +1,12 @@ -- Tags: no-upgrade-check, no-random-merge-tree-settings drop table if exists test_02381; -create table test_02381(a UInt64, b UInt64) ENGINE = MergeTree order by (a, b) SETTINGS compress_marks=false, compress_primary_key=false; +create table test_02381(a UInt64, b UInt64) ENGINE = MergeTree order by (a, b) SETTINGS compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; insert into test_02381 select number, number * 10 from system.numbers limit 1000000; drop table if exists test_02381_compress; create table test_02381_compress(a UInt64, b UInt64) ENGINE = MergeTree order by (a, b) - SETTINGS compress_marks=true, compress_primary_key=true, marks_compression_codec='ZSTD(3)', primary_key_compression_codec='ZSTD(3)', marks_compress_block_size=65536, primary_key_compress_block_size=65536; + SETTINGS compress_marks = true, compress_primary_key = true, marks_compression_codec = 'ZSTD(3)', primary_key_compression_codec = 'ZSTD(3)', marks_compress_block_size = 65536, primary_key_compress_block_size = 65536, ratio_of_defaults_for_sparse_serialization = 1; insert into test_02381_compress select number, number * 10 from system.numbers limit 1000000; select * from test_02381_compress where a = 1000 limit 1; From e8f7a84ca6c4e00f6f9ddbf282b109f491244c4c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 May 2023 00:37:10 +0200 Subject: [PATCH 0129/1997] Update a few tests --- tests/queries/0_stateless/01375_compact_parts_codecs.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01375_compact_parts_codecs.sql b/tests/queries/0_stateless/01375_compact_parts_codecs.sql index 1dd39e67876..1c89eb09d0b 100644 --- a/tests/queries/0_stateless/01375_compact_parts_codecs.sql +++ b/tests/queries/0_stateless/01375_compact_parts_codecs.sql @@ -4,7 +4,7 @@ DROP TABLE IF EXISTS codecs; CREATE TABLE codecs (id UInt32, val UInt32, s String) ENGINE = MergeTree ORDER BY id - SETTINGS min_rows_for_wide_part = 10000; + SETTINGS min_rows_for_wide_part = 10000, ratio_of_defaults_for_sparse_serialization = 1; INSERT INTO codecs SELECT number, number, toString(number) FROM numbers(1000); SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes) FROM system.parts @@ -21,7 +21,7 @@ DROP TABLE codecs; CREATE TABLE codecs (id UInt32 CODEC(NONE), val UInt32 CODEC(NONE), s String CODEC(NONE)) ENGINE = MergeTree ORDER BY id - SETTINGS min_rows_for_wide_part = 10000; + SETTINGS min_rows_for_wide_part = 10000, ratio_of_defaults_for_sparse_serialization = 1; INSERT INTO codecs SELECT number, number, toString(number) FROM numbers(1000); SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes) FROM system.parts @@ -38,7 +38,7 @@ DROP TABLE codecs; CREATE TABLE codecs (id UInt32, val UInt32 CODEC(Delta, ZSTD), s String CODEC(ZSTD)) ENGINE = MergeTree ORDER BY id - SETTINGS min_rows_for_wide_part = 10000; + SETTINGS min_rows_for_wide_part = 10000, ratio_of_defaults_for_sparse_serialization = 1; INSERT INTO codecs SELECT number, number, toString(number) FROM numbers(1000); SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes) FROM system.parts From 7c03801bf7da6803e47f57ab78478c33a9c9a764 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 May 2023 00:54:57 +0200 Subject: [PATCH 0130/1997] Update a test --- tests/queries/0_stateless/02725_parquet_preserve_order.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02725_parquet_preserve_order.sh b/tests/queries/0_stateless/02725_parquet_preserve_order.sh index ea3e4219e35..ac29ef3f361 100755 --- a/tests/queries/0_stateless/02725_parquet_preserve_order.sh +++ b/tests/queries/0_stateless/02725_parquet_preserve_order.sh @@ -10,7 +10,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # It'll be read into two blocks. The first block will sleep 2x longer than the second. # So reordering is very likely if the order-preservation doesn't work. -$CLICKHOUSE_LOCAL -q "select number+sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=1" +$CLICKHOUSE_LOCAL -q "select number + sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=1, function_sleep_max_microseconds_per_block = 6000000" -$CLICKHOUSE_LOCAL -q "explain pipeline select number+sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=1, max_threads=2" -$CLICKHOUSE_LOCAL -q "explain pipeline select number+sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=0, parallelize_output_from_storages=1, max_threads=2" +$CLICKHOUSE_LOCAL -q "explain pipeline select number + sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=1, max_threads=2" +$CLICKHOUSE_LOCAL -q "explain pipeline select number + sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=0, parallelize_output_from_storages=1, max_threads=2" From a25de5fb4186fbe103f916b07aa8bd89975048b9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 May 2023 00:55:44 +0200 Subject: [PATCH 0131/1997] Update a test --- .../02530_dictionaries_update_field.reference | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/queries/0_stateless/02530_dictionaries_update_field.reference b/tests/queries/0_stateless/02530_dictionaries_update_field.reference index 40f2c0ee400..88c910e0313 100644 --- a/tests/queries/0_stateless/02530_dictionaries_update_field.reference +++ b/tests/queries/0_stateless/02530_dictionaries_update_field.reference @@ -4,13 +4,13 @@ flat SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First 2 SecondUpdated @@ -21,13 +21,13 @@ flat/custom SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First 2 SecondUpdated @@ -38,13 +38,13 @@ hashed SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First 2 SecondUpdated @@ -55,13 +55,13 @@ hashed/custom SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First 2 SecondUpdated @@ -72,13 +72,13 @@ complex_key_hashed SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First 2 SecondUpdated @@ -89,13 +89,13 @@ complex_key_hashed/custom SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First 2 SecondUpdated From 63b559df17a07e42768c4425538426e245d829fa Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 May 2023 06:49:41 +0200 Subject: [PATCH 0132/1997] Update a test --- .../02530_dictionaries_update_field.reference | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/queries/0_stateless/02530_dictionaries_update_field.reference b/tests/queries/0_stateless/02530_dictionaries_update_field.reference index 40f2c0ee400..88c910e0313 100644 --- a/tests/queries/0_stateless/02530_dictionaries_update_field.reference +++ b/tests/queries/0_stateless/02530_dictionaries_update_field.reference @@ -4,13 +4,13 @@ flat SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First 2 SecondUpdated @@ -21,13 +21,13 @@ flat/custom SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First 2 SecondUpdated @@ -38,13 +38,13 @@ hashed SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First 2 SecondUpdated @@ -55,13 +55,13 @@ hashed/custom SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First 2 SecondUpdated @@ -72,13 +72,13 @@ complex_key_hashed SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First 2 SecondUpdated @@ -89,13 +89,13 @@ complex_key_hashed/custom SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First 2 SecondUpdated From fc02e9efc9eb2dcb9b7209e41eafb1e50abced7d Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 8 May 2023 17:23:46 +0000 Subject: [PATCH 0133/1997] update fasttest a bit --- tests/ci/fast_test_check.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index 89066ade2cb..fe211d79810 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -214,8 +214,11 @@ def main(): # Refuse other checks to run if fast test failed if state != "success": - if FORCE_TESTS_LABEL in pr_info.labels and state != "error": - print(f"'{FORCE_TESTS_LABEL}' enabled, will report success") + if state == "error": + print("The status is 'error', report failure disregard the labels") + sys.exit(1) + elif FORCE_TESTS_LABEL in pr_info.labels: + print(f"'{FORCE_TESTS_LABEL}' enabled, reporting success") else: sys.exit(1) From 3fedd683ef97e61ebcc17b2f8b38feb297fbc26c Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 8 May 2023 22:28:31 +0200 Subject: [PATCH 0134/1997] speedup vol. II --- .../Serializations/SerializationDate.cpp | 2 +- src/Functions/FunctionsConversion.h | 16 ++++---- src/IO/ReadHelpers.h | 40 +++++++++++++++---- 3 files changed, 42 insertions(+), 16 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationDate.cpp b/src/DataTypes/Serializations/SerializationDate.cpp index bc2057d549e..8b4956f7826 100644 --- a/src/DataTypes/Serializations/SerializationDate.cpp +++ b/src/DataTypes/Serializations/SerializationDate.cpp @@ -77,7 +77,7 @@ void SerializationDate::serializeTextCSV(const IColumn & column, size_t row_num, void SerializationDate::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { DayNum value; - readCSV(value, istr); + readCSV(value, istr, time_zone); assert_cast(column).getData().push_back(value); } SerializationDate::SerializationDate(const TimezoneMixin & time_zone_) : TimezoneMixin(time_zone_) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 0f2d49f2557..6af5c44eb5e 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -981,18 +981,18 @@ void parseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTI } template <> -inline void parseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +inline void parseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone) { DayNum tmp(0); - readDateText(tmp, rb); + readDateText(tmp, rb, *time_zone); x = tmp; } template <> -inline void parseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +inline void parseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone) { ExtendedDayNum tmp(0); - readDateText(tmp, rb); + readDateText(tmp, rb, *time_zone); x = tmp; } @@ -1040,20 +1040,20 @@ bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateL } template <> -inline bool tryParseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +inline bool tryParseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone) { DayNum tmp(0); - if (!tryReadDateText(tmp, rb)) + if (!tryReadDateText(tmp, rb, *time_zone)) return false; x = tmp; return true; } template <> -inline bool tryParseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +inline bool tryParseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone) { ExtendedDayNum tmp(0); - if (!tryReadDateText(tmp, rb)) + if (!tryReadDateText(tmp, rb, *time_zone)) return false; x = tmp; return true; diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 16c28b89667..f9e21418a41 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -760,14 +760,14 @@ inline bool tryReadDateText(LocalDate & date, ReadBuffer & buf) return readDateTextImpl(date, buf); } -inline bool tryReadDateText(DayNum & date, ReadBuffer & buf) +inline bool tryReadDateText(DayNum & date, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) { - return readDateTextImpl(date, buf); + return readDateTextImpl(date, buf, time_zone); } -inline bool tryReadDateText(ExtendedDayNum & date, ReadBuffer & buf) +inline bool tryReadDateText(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) { - return readDateTextImpl(date, buf); + return readDateTextImpl(date, buf, time_zone); } template @@ -1160,7 +1160,7 @@ inline void readText(is_floating_point auto & x, ReadBuffer & buf) { readFloatTe inline void readText(String & x, ReadBuffer & buf) { readEscapedString(x, buf); } inline void readText(LocalDate & x, ReadBuffer & buf) { readDateText(x, buf); } -inline void readText(DayNum & x, ReadBuffer & buf) { readDateText(x, buf); } +inline void readText(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) { readDateText(x, buf, time_zone); } inline void readText(LocalDateTime & x, ReadBuffer & buf) { readDateTimeText(x, buf); } inline void readText(UUID & x, ReadBuffer & buf) { readUUIDText(x, buf); } inline void readText(IPv4 & x, ReadBuffer & buf) { readIPv4Text(x, buf); } @@ -1172,6 +1172,10 @@ template requires is_arithmetic_v inline void readQuoted(T & x, ReadBuffer & buf) { readText(x, buf); } +template +requires is_arithmetic_v +inline void readQuoted(T & x, ReadBuffer & buf, const DateLUTImpl & time_zone) { readText(x, buf, time_zone); } + inline void readQuoted(String & x, ReadBuffer & buf) { readQuotedString(x, buf); } inline void readQuoted(LocalDate & x, ReadBuffer & buf) @@ -1214,6 +1218,10 @@ template requires is_arithmetic_v inline void readDoubleQuoted(T & x, ReadBuffer & buf) { readText(x, buf); } +template + requires is_arithmetic_v +inline void readDoubleQuoted(T & x, ReadBuffer & buf, const DateLUTImpl & time_zone) { readText(x, buf, time_zone); } + inline void readDoubleQuoted(String & x, ReadBuffer & buf) { readDoubleQuotedString(x, buf); } inline void readDoubleQuoted(LocalDate & x, ReadBuffer & buf) @@ -1230,7 +1238,7 @@ inline void readDoubleQuoted(LocalDateTime & x, ReadBuffer & buf) assertChar('"', buf); } -/// CSV, for numbers, dates: quotes are optional, no special escaping rules. +/// CSV for numbers: quotes are optional, no special escaping rules. template inline void readCSVSimple(T & x, ReadBuffer & buf) { @@ -1248,6 +1256,24 @@ inline void readCSVSimple(T & x, ReadBuffer & buf) assertChar(maybe_quote, buf); } +// standalone overload for dates: to avoid instantiating DateLUTs while parsing other types +template +inline void readCSVSimple(T & x, ReadBuffer & buf, const DateLUTImpl & time_zone) +{ + if (buf.eof()) [[unlikely]] + throwReadAfterEOF(); + + char maybe_quote = *buf.position(); + + if (maybe_quote == '\'' || maybe_quote == '\"') + ++buf.position(); + + readText(x, buf, time_zone); + + if (maybe_quote == '\'' || maybe_quote == '\"') + assertChar(maybe_quote, buf); +} + template requires is_arithmetic_v inline void readCSV(T & x, ReadBuffer & buf) @@ -1257,7 +1283,7 @@ inline void readCSV(T & x, ReadBuffer & buf) inline void readCSV(String & x, ReadBuffer & buf, const FormatSettings::CSV & settings) { readCSVString(x, buf, settings); } inline void readCSV(LocalDate & x, ReadBuffer & buf) { readCSVSimple(x, buf); } -inline void readCSV(DayNum & x, ReadBuffer & buf) { readCSVSimple(x, buf); } +inline void readCSV(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) { readCSVSimple(x, buf, time_zone); } inline void readCSV(LocalDateTime & x, ReadBuffer & buf) { readCSVSimple(x, buf); } inline void readCSV(UUID & x, ReadBuffer & buf) { readCSVSimple(x, buf); } inline void readCSV(IPv4 & x, ReadBuffer & buf) { readCSVSimple(x, buf); } From 1751ccc7aca3830d21a06ec4f09bd28bf9254f79 Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 9 May 2023 14:18:04 +0200 Subject: [PATCH 0135/1997] fix stateless --- src/Functions/FunctionsConversion.h | 15 ++++++++++++--- src/IO/ReadHelpers.h | 12 ++++++++---- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 6af5c44eb5e..e0e188f68c2 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -439,7 +439,7 @@ struct ToDate32Transform32Or64Signed static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) { - static const Int32 daynum_min_offset = -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); + static const Int32 daynum_min_offset = -static_cast(time_zone.getDayNumOffsetEpoch()); if (from < daynum_min_offset) return daynum_min_offset; return (from < DATE_LUT_MAX_EXTEND_DAY_NUM) @@ -830,8 +830,11 @@ struct ConvertImpl(*col_with_type_and_name.type); const DateLUTImpl * time_zone = nullptr; + + if constexpr (std::is_same_v) + time_zone = &DateLUT::instance(); /// For argument of Date or DateTime type, second argument with time zone could be specified. - if constexpr (std::is_same_v || std::is_same_v || std::is_same_v) + if constexpr (std::is_same_v || std::is_same_v) { auto non_null_args = createBlockWithNestedColumns(arguments); time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0); @@ -1193,7 +1196,7 @@ struct ConvertThroughParsing const DateLUTImpl * local_time_zone [[maybe_unused]] = nullptr; const DateLUTImpl * utc_time_zone [[maybe_unused]] = nullptr; - /// For conversion to DateTime type, second argument with time zone could be specified. + /// For conversion to Date or DateTime type, second argument with time zone could be specified. if constexpr (std::is_same_v || to_datetime64) { const auto result_type = removeNullable(res_type); @@ -1206,6 +1209,12 @@ struct ConvertThroughParsing if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort || parsing_mode == ConvertFromStringParsingMode::BestEffortUS) utc_time_zone = &DateLUT::instance("UTC"); } + else if constexpr (std::is_same_v || std::is_same_v) + { + // Timezone is more or less dummy when parsing Date/Date32 from string. + local_time_zone = &DateLUT::instance(); + utc_time_zone = &DateLUT::instance("UTC"); + } const IColumn * col_from = arguments[0].column.get(); const ColumnString * col_from_string = checkAndGetColumn(col_from); diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index f9e21418a41..ea565d11914 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -718,7 +718,7 @@ inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf, const DateLU return false; ExtendedDayNum ret = date_lut.makeDayNum(local_date.year(), local_date.month(), local_date.day()); - convertToDayNum(date,ret); + convertToDayNum(date, ret); return ReturnType(true); } @@ -1159,8 +1159,11 @@ inline bool tryReadText(IPv6 & x, ReadBuffer & buf) { return tryReadIPv6Text(x, inline void readText(is_floating_point auto & x, ReadBuffer & buf) { readFloatText(x, buf); } inline void readText(String & x, ReadBuffer & buf) { readEscapedString(x, buf); } + +inline void readText(DayNum & x, ReadBuffer & buf) { readDateText(x, buf); } +inline void readText(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone) { readDateText(x, buf, time_zone); } + inline void readText(LocalDate & x, ReadBuffer & buf) { readDateText(x, buf); } -inline void readText(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) { readDateText(x, buf, time_zone); } inline void readText(LocalDateTime & x, ReadBuffer & buf) { readDateTimeText(x, buf); } inline void readText(UUID & x, ReadBuffer & buf) { readUUIDText(x, buf); } inline void readText(IPv4 & x, ReadBuffer & buf) { readIPv4Text(x, buf); } @@ -1219,7 +1222,7 @@ requires is_arithmetic_v inline void readDoubleQuoted(T & x, ReadBuffer & buf) { readText(x, buf); } template - requires is_arithmetic_v +requires is_arithmetic_v inline void readDoubleQuoted(T & x, ReadBuffer & buf, const DateLUTImpl & time_zone) { readText(x, buf, time_zone); } inline void readDoubleQuoted(String & x, ReadBuffer & buf) { readDoubleQuotedString(x, buf); } @@ -1283,7 +1286,8 @@ inline void readCSV(T & x, ReadBuffer & buf) inline void readCSV(String & x, ReadBuffer & buf, const FormatSettings::CSV & settings) { readCSVString(x, buf, settings); } inline void readCSV(LocalDate & x, ReadBuffer & buf) { readCSVSimple(x, buf); } -inline void readCSV(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) { readCSVSimple(x, buf, time_zone); } +inline void readCSV(DayNum & x, ReadBuffer & buf) { readCSVSimple(x, buf); } +inline void readCSV(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone) { readCSVSimple(x, buf, time_zone); } inline void readCSV(LocalDateTime & x, ReadBuffer & buf) { readCSVSimple(x, buf); } inline void readCSV(UUID & x, ReadBuffer & buf) { readCSVSimple(x, buf); } inline void readCSV(IPv4 & x, ReadBuffer & buf) { readCSVSimple(x, buf); } From 383fc06761f81bee735ec22692a2d506ca78c01e Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 9 May 2023 16:10:53 +0200 Subject: [PATCH 0136/1997] Fix --- .../MaterializedPostgreSQLConsumer.cpp | 37 +++--- .../MaterializedPostgreSQLSettings.h | 3 + .../PostgreSQLReplicationHandler.cpp | 21 +-- .../PostgreSQL/PostgreSQLReplicationHandler.h | 7 +- tests/integration/helpers/postgres_utility.py | 124 +++++++++++------- .../test.py | 75 ++++++++++- 6 files changed, 183 insertions(+), 84 deletions(-) diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index d048c94ac75..ea7009fc082 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -562,34 +562,27 @@ void MaterializedPostgreSQLConsumer::syncTables() Block result_rows = storage_data.buffer.description.sample_block.cloneWithColumns(std::move(storage_data.buffer.columns)); storage_data.buffer.columns = storage_data.buffer.description.sample_block.cloneEmptyColumns(); - try + if (result_rows.rows()) { - if (result_rows.rows()) - { - auto storage = storage_data.storage; + auto storage = storage_data.storage; - auto insert_context = Context::createCopy(context); - insert_context->setInternalQuery(true); + auto insert_context = Context::createCopy(context); + insert_context->setInternalQuery(true); - auto insert = std::make_shared(); - insert->table_id = storage->getStorageID(); - insert->columns = storage_data.buffer.columns_ast; + auto insert = std::make_shared(); + insert->table_id = storage->getStorageID(); + insert->columns = storage_data.buffer.columns_ast; - InterpreterInsertQuery interpreter(insert, insert_context, true); - auto io = interpreter.execute(); - auto input = std::make_shared( - result_rows.cloneEmpty(), Chunk(result_rows.getColumns(), result_rows.rows())); + InterpreterInsertQuery interpreter(insert, insert_context, true); + auto io = interpreter.execute(); + auto input = std::make_shared( + result_rows.cloneEmpty(), Chunk(result_rows.getColumns(), result_rows.rows())); - assertBlocksHaveEqualStructure(input->getPort().getHeader(), io.pipeline.getHeader(), "postgresql replica table sync"); - io.pipeline.complete(Pipe(std::move(input))); + assertBlocksHaveEqualStructure(input->getPort().getHeader(), io.pipeline.getHeader(), "postgresql replica table sync"); + io.pipeline.complete(Pipe(std::move(input))); - CompletedPipelineExecutor executor(io.pipeline); - executor.execute(); - } - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); + CompletedPipelineExecutor executor(io.pipeline); + executor.execute(); } } diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h b/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h index e8d42ef3668..d3d2faba497 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h @@ -21,6 +21,9 @@ namespace DB M(Bool, materialized_postgresql_tables_list_with_schema, false, \ "Consider by default that if there is a dot in tables list 'name.name', " \ "then the first name is postgres schema and second is postgres table. This setting is needed to allow table names with dots", 0) \ + M(UInt64, materialized_postgresql_backoff_min_ms, 200, "Poll backoff start point", 0) \ + M(UInt64, materialized_postgresql_backoff_max_ms, 10000, "Poll backoff max point", 0) \ + M(UInt64, materialized_postgresql_backoff_factor, 2, "Poll backoff factor", 0) \ DECLARE_SETTINGS_TRAITS(MaterializedPostgreSQLSettingsTraits, LIST_OF_MATERIALIZED_POSTGRESQL_SETTINGS) diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 998db4ea79e..f57a6a26a62 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -22,8 +22,6 @@ namespace DB { -static const auto RESCHEDULE_MS = 1000; -static const auto BACKOFF_TRESHOLD_MS = 10000; static const auto CLEANUP_RESCHEDULE_MS = 600000 * 3; /// 30 min namespace ErrorCodes @@ -80,7 +78,10 @@ PostgreSQLReplicationHandler::PostgreSQLReplicationHandler( , schema_list(replication_settings.materialized_postgresql_schema_list) , schema_as_a_part_of_table_name(!schema_list.empty() || replication_settings.materialized_postgresql_tables_list_with_schema) , user_provided_snapshot(replication_settings.materialized_postgresql_snapshot) - , milliseconds_to_wait(RESCHEDULE_MS) + , reschedule_backoff_min_ms(replication_settings.materialized_postgresql_backoff_min_ms) + , reschedule_backoff_max_ms(replication_settings.materialized_postgresql_backoff_max_ms) + , reschedule_backoff_factor(replication_settings.materialized_postgresql_backoff_factor) + , milliseconds_to_wait(reschedule_backoff_min_ms) { if (!schema_list.empty() && !tables_list.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot have schema list and tables list at the same time"); @@ -166,7 +167,7 @@ void PostgreSQLReplicationHandler::checkConnectionAndStart() throw; LOG_ERROR(log, "Unable to set up connection. Reconnection attempt will continue. Error message: {}", pqxx_error.what()); - startup_task->scheduleAfter(RESCHEDULE_MS); + startup_task->scheduleAfter(milliseconds_to_wait); } catch (...) { @@ -435,18 +436,18 @@ void PostgreSQLReplicationHandler::consumerFunc() if (schedule_now) { - milliseconds_to_wait = RESCHEDULE_MS; + milliseconds_to_wait = reschedule_backoff_min_ms; consumer_task->schedule(); LOG_DEBUG(log, "Scheduling replication thread: now"); } else { - consumer_task->scheduleAfter(milliseconds_to_wait); - if (milliseconds_to_wait < BACKOFF_TRESHOLD_MS) - milliseconds_to_wait *= 2; + if (milliseconds_to_wait < reschedule_backoff_max_ms) + milliseconds_to_wait = std::min(milliseconds_to_wait * reschedule_backoff_factor, reschedule_backoff_max_ms); LOG_DEBUG(log, "Scheduling replication thread: after {} ms", milliseconds_to_wait); + consumer_task->scheduleAfter(milliseconds_to_wait); } } @@ -892,7 +893,7 @@ void PostgreSQLReplicationHandler::addTableToReplication(StorageMaterializedPost catch (...) { consumer_task->activate(); - consumer_task->scheduleAfter(RESCHEDULE_MS); + consumer_task->scheduleAfter(milliseconds_to_wait); auto error_message = getCurrentExceptionMessage(false); throw Exception(ErrorCodes::POSTGRESQL_REPLICATION_INTERNAL_ERROR, @@ -922,7 +923,7 @@ void PostgreSQLReplicationHandler::removeTableFromReplication(const String & pos catch (...) { consumer_task->activate(); - consumer_task->scheduleAfter(RESCHEDULE_MS); + consumer_task->scheduleAfter(milliseconds_to_wait); auto error_message = getCurrentExceptionMessage(false); throw Exception(ErrorCodes::POSTGRESQL_REPLICATION_INTERNAL_ERROR, diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h index 10a196cf31b..4c16ff95692 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h @@ -140,13 +140,16 @@ private: BackgroundSchedulePool::TaskHolder consumer_task; BackgroundSchedulePool::TaskHolder cleanup_task; + const UInt64 reschedule_backoff_min_ms; + const UInt64 reschedule_backoff_max_ms; + const UInt64 reschedule_backoff_factor; + UInt64 milliseconds_to_wait; + std::atomic stop_synchronization = false; /// MaterializedPostgreSQL tables. Used for managing all operations with its internal nested tables. MaterializedStorages materialized_storages; - UInt64 milliseconds_to_wait; - bool replication_handler_initialized = false; }; diff --git a/tests/integration/helpers/postgres_utility.py b/tests/integration/helpers/postgres_utility.py index dfae37af434..1a00faf0f9d 100644 --- a/tests/integration/helpers/postgres_utility.py +++ b/tests/integration/helpers/postgres_utility.py @@ -76,16 +76,24 @@ def drop_postgres_schema(cursor, schema_name): def create_postgres_table( - cursor, table_name, replica_identity_full=False, template=postgres_table_template + cursor, + table_name, + database_name="", + replica_identity_full=False, + template=postgres_table_template, ): - drop_postgres_table(cursor, table_name) - cursor.execute(template.format(table_name)) + if database_name == "": + name = table_name + else: + name = f"{database_name}.{table_name}" + drop_postgres_table(cursor, name) + cursor.execute(template.format(name)) if replica_identity_full: - cursor.execute(f"ALTER TABLE {table_name} REPLICA IDENTITY FULL;") + cursor.execute(f"ALTER TABLE {name} REPLICA IDENTITY FULL;") -def drop_postgres_table(cursor, table_name): - cursor.execute(f"""DROP TABLE IF EXISTS "{table_name}" """) +def drop_postgres_table(cursor, name): + cursor.execute(f"""DROP TABLE IF EXISTS "{name}" """) def create_postgres_table_with_schema(cursor, schema_name, table_name): @@ -103,13 +111,16 @@ class PostgresManager: self.created_materialized_postgres_db_list = set() self.created_ch_postgres_db_list = set() - def init(self, instance, ip, port): + def init(self, instance, ip, port, default_database="postgres_database"): self.instance = instance self.ip = ip self.port = port - self.conn = get_postgres_conn(ip=self.ip, port=self.port) + self.default_database = default_database self.prepare() + def get_default_database(self): + return self.default_database + def restart(self): try: self.clear() @@ -119,10 +130,17 @@ class PostgresManager: raise ex def prepare(self): - conn = get_postgres_conn(ip=self.ip, port=self.port) - cursor = conn.cursor() - self.create_postgres_db(cursor, "postgres_database") - self.create_clickhouse_postgres_db(ip=self.ip, port=self.port) + self.conn = get_postgres_conn(ip=self.ip, port=self.port) + self.cursor = self.conn.cursor() + if self.default_database != "": + self.create_postgres_db(self.default_database) + self.conn = get_postgres_conn( + ip=self.ip, + port=self.port, + database=True, + database_name=self.default_database, + ) + self.cursor = self.conn.cursor() def clear(self): if self.conn.closed == 0: @@ -132,63 +150,76 @@ class PostgresManager: for db in self.created_ch_postgres_db_list.copy(): self.drop_clickhouse_postgres_db(db) if len(self.created_postgres_db_list) > 0: - conn = get_postgres_conn(ip=self.ip, port=self.port) - cursor = conn.cursor() + self.conn = get_postgres_conn(ip=self.ip, port=self.port) + self.cursor = self.conn.cursor() for db in self.created_postgres_db_list.copy(): - self.drop_postgres_db(cursor, db) + self.drop_postgres_db(db) - def get_db_cursor(self): - self.conn = get_postgres_conn(ip=self.ip, port=self.port, database=True) + def get_db_cursor(self, database_name=""): + if database_name == "": + database_name = self.default_database + self.conn = get_postgres_conn( + ip=self.ip, port=self.port, database=True, database_name=database_name + ) return self.conn.cursor() - def create_postgres_db(self, cursor, name="postgres_database"): - self.drop_postgres_db(cursor, name) - self.created_postgres_db_list.add(name) - cursor.execute(f"CREATE DATABASE {name}") + def database_or_default(self, database_name): + if database_name == "" and self.default_database == "": + raise Exception("Database name is empty") + if database_name == "": + database_name = self.default_database + return database_name - def drop_postgres_db(self, cursor, name="postgres_database"): - cursor.execute(f"DROP DATABASE IF EXISTS {name}") - if name in self.created_postgres_db_list: - self.created_postgres_db_list.remove(name) + def create_postgres_db(self, database_name=""): + database_name = self.database_or_default(database_name) + self.drop_postgres_db(database_name) + self.created_postgres_db_list.add(database_name) + self.cursor.execute(f"CREATE DATABASE {database_name}") + + def drop_postgres_db(self, database_name=""): + database_name = self.database_or_default(database_name) + self.cursor.execute(f"DROP DATABASE IF EXISTS {database_name}") + if database_name in self.created_postgres_db_list: + self.created_postgres_db_list.remove(database_name) def create_clickhouse_postgres_db( self, - ip, - port, - name="postgres_database", - database_name="postgres_database", + database_name="", schema_name="", ): - self.drop_clickhouse_postgres_db(name) - self.created_ch_postgres_db_list.add(name) + database_name = self.database_or_default(database_name) + self.drop_clickhouse_postgres_db(database_name) + self.created_ch_postgres_db_list.add(database_name) if len(schema_name) == 0: self.instance.query( f""" - CREATE DATABASE {name} - ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword')""" + CREATE DATABASE {database_name} + ENGINE = PostgreSQL('{self.ip}:{self.port}', '{database_name}', 'postgres', 'mysecretpassword')""" ) else: self.instance.query( f""" - CREATE DATABASE {name} - ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword', '{schema_name}')""" + CREATE DATABASE {database_name} + ENGINE = PostgreSQL('{self.ip}:{self.port}', '{database_name}', 'postgres', 'mysecretpassword', '{schema_name}')""" ) - def drop_clickhouse_postgres_db(self, name="postgres_database"): - self.instance.query(f"DROP DATABASE IF EXISTS {name}") - if name in self.created_ch_postgres_db_list: - self.created_ch_postgres_db_list.remove(name) + def drop_clickhouse_postgres_db(self, database_name=""): + database_name = self.database_or_default(database_name) + self.instance.query(f"DROP DATABASE IF EXISTS {database_name}") + if database_name in self.created_ch_postgres_db_list: + self.created_ch_postgres_db_list.remove(database_name) def create_materialized_db( self, ip, port, materialized_database="test_database", - postgres_database="postgres_database", + postgres_database="", settings=[], table_overrides="", ): + postgres_database = self.database_or_default(postgres_database) self.created_materialized_postgres_db_list.add(materialized_database) self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database}") @@ -207,17 +238,12 @@ class PostgresManager: self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database} SYNC") if materialized_database in self.created_materialized_postgres_db_list: self.created_materialized_postgres_db_list.remove(materialized_database) - assert materialized_database not in self.instance.query("SHOW DATABASES") - def create_and_fill_postgres_table(self, table_name): - conn = get_postgres_conn(ip=self.ip, port=self.port, database=True) - cursor = conn.cursor() - self.create_and_fill_postgres_table_from_cursor(cursor, table_name) - - def create_and_fill_postgres_table_from_cursor(self, cursor, table_name): - create_postgres_table(cursor, table_name) + def create_and_fill_postgres_table(self, table_name, database_name=""): + create_postgres_table(self.cursor, table_name, database_name) + database_name = self.database_or_default(database_name) self.instance.query( - f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(50)" + f"INSERT INTO {database_name}.{table_name} SELECT number, number from numbers(50)" ) def create_and_fill_postgres_tables(self, tables_num, numbers=50): diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 90d19e9532c..3b5194e8806 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -46,14 +46,34 @@ instance = cluster.add_instance( stay_alive=True, ) +instance2 = cluster.add_instance( + "instance2", + main_configs=["configs/log_conf.xml", "configs/merge_tree_too_many_parts.xml"], + user_configs=["configs/users.xml"], + with_postgres=True, + stay_alive=True, +) + + pg_manager = PostgresManager() +pg_manager2 = PostgresManager() @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() - pg_manager.init(instance, cluster.postgres_ip, cluster.postgres_port) + pg_manager.init( + instance, + cluster.postgres_ip, + cluster.postgres_port, + default_database="test_database", + ) + pg_manager.create_clickhouse_postgres_db() + pg_manager2.init( + instance2, cluster.postgres_ip, cluster.postgres_port, "test_database2" + ) + pg_manager2.create_clickhouse_postgres_db() yield cluster finally: @@ -649,6 +669,59 @@ def test_materialized_view(started_cluster): pg_manager.drop_materialized_db() +def test_too_many_parts(started_cluster): + table = "test_table" + pg_manager2.create_and_fill_postgres_table(table) + pg_manager2.create_materialized_db( + ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + settings=[ + f"materialized_postgresql_tables_list = 'test_table', materialized_postgresql_backoff_min_ms = 100, materialized_postgresql_backoff_max_ms = 100" + ], + ) + check_tables_are_synchronized( + instance2, "test_table", postgres_database=pg_manager2.get_default_database() + ) + assert ( + "50" == instance2.query("SELECT count() FROM test_database.test_table").strip() + ) + + instance2.query("SYSTEM STOP MERGES") + num = 50 + for i in range(10): + instance2.query( + f""" + INSERT INTO {pg_manager2.get_default_database()}.test_table SELECT {num}, {num}; + """ + ) + num = num + 1 + for i in range(30): + if num == int( + instance2.query("SELECT count() FROM test_database.test_table") + ) or instance2.contains_in_log("DB::Exception: Too many parts"): + break + time.sleep(1) + print(f"wait sync try {i}") + if instance2.contains_in_log("DB::Exception: Too many parts"): + num = num - 1 + break + assert num == int( + instance2.query("SELECT count() FROM test_database.test_table") + ) + + assert instance2.contains_in_log("DB::Exception: Too many parts") + print(num) + assert num == int(instance2.query("SELECT count() FROM test_database.test_table")) + + instance2.query("SYSTEM START MERGES") + check_tables_are_synchronized( + instance2, "test_table", postgres_database=pg_manager2.get_default_database() + ) + + # assert "200" == instance.query("SELECT count FROM test_database.test_table").strip() + pg_manager2.drop_materialized_db() + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") From ddbad79c5e67518acebbacaad5be0cad3967ac67 Mon Sep 17 00:00:00 2001 From: tpanetti Date: Fri, 5 May 2023 12:19:35 -0700 Subject: [PATCH 0137/1997] Change SHOW COLUMNS query to display MySQL types in MySQL Compatibility mode This updates the SHOW COLUMN SQL query to display MySQL types when this query is issued by a client connected via MySQL Compatibility port --- .../InterpreterShowColumnsQuery.cpp | 76 +++++++ .../InterpreterShowColumnsQuery.h | 1 + ...show_columns_mysql_compatibility.reference | 213 ++++++++++++++++++ .../02726_show_columns_mysql_compatibility.sh | 115 ++++++++++ 4 files changed, 405 insertions(+) create mode 100644 tests/queries/0_stateless/02726_show_columns_mysql_compatibility.reference create mode 100755 tests/queries/0_stateless/02726_show_columns_mysql_compatibility.sh diff --git a/src/Interpreters/InterpreterShowColumnsQuery.cpp b/src/Interpreters/InterpreterShowColumnsQuery.cpp index c86d3c753c4..c545c621abb 100644 --- a/src/Interpreters/InterpreterShowColumnsQuery.cpp +++ b/src/Interpreters/InterpreterShowColumnsQuery.cpp @@ -45,6 +45,14 @@ SELECT // TODO Interpret query.extended. It is supposed to show internal/virtual columns. Need to fetch virtual column names, see // IStorage::getVirtuals(). We can't easily do that via SQL. + // If connected via MySQL Compatibility mode, convert ClickHouse types to MySQL + if (getContext()->getClientInfo().interface == DB::ClientInfo::Interface::MYSQL) + { + rewritten_query += getMySQLQuery(); + } + else { + rewritten_query += "SELECT name AS field, type AS type, startsWith(type, 'Nullable') AS null, trim(concatWithSeparator(' ', if(is_in_primary_key, 'PRI', ''), if (is_in_sorting_key, 'SOR', ''))) AS key, if(default_kind IN ('ALIAS', 'DEFAULT', 'MATERIALIZED'), default_expression, NULL) AS default, '' AS extra "; + } if (query.full) { /// "Full" mode is mostly for MySQL compat @@ -88,6 +96,74 @@ WHERE return rewritten_query; } +String InterpreterShowColumnsQuery::getMySQLQuery() +{ + String mysql_specific_query; + + mysql_specific_query = R"(SELECT name AS field, + CASE + WHEN startsWith(type, 'Nullable') THEN + CASE + WHEN substring(type, 10, length(type) - 10) IN ('UInt8', 'Int8') THEN 'tinyint' + WHEN substring(type, 10, length(type) - 10) IN ('UInt16', 'Int16') THEN 'smallint' + WHEN substring(type, 10, length(type) - 10) IN ('UInt32', 'Int32') THEN 'int' + WHEN substring(type, 10, length(type) - 10) IN ('UInt64', 'Int64', 'UInt128', 'Int128', 'UInt256', 'Int256') THEN 'bigint' + WHEN substring(type, 10, length(type) - 10) = 'Float32' THEN 'float' + WHEN substring(type, 10, length(type) - 10) = 'Float64' THEN 'double' + WHEN substring(type, 10, length(type) - 10) LIKE 'Decimal%' THEN 'decimal' + WHEN substring(type, 10, length(type) - 10) = 'Boolean' THEN 'tinyint' + WHEN substring(type, 10, length(type) - 10) = 'String' THEN 'text' + WHEN substring(type, 10, length(type) - 10) LIKE 'FixedString%' THEN 'text' + WHEN substring(type, 10, length(type) - 10) LIKE 'Date%' THEN 'date' + WHEN substring(type, 10, length(type) - 10) LIKE 'DateTime%' THEN 'datetime' + WHEN substring(type, 10, length(type) - 10) = 'JSON' THEN 'json' + WHEN substring(type, 10, length(type) - 10) = 'UUID' THEN 'binary' + WHEN substring(type, 10, length(type) - 10) LIKE 'Enum%' THEN 'enum' + WHEN substring(type, 10, length(type) - 10) LIKE 'LowCardinality%' THEN 'text' + WHEN substring(type, 10, length(type) - 10) LIKE 'Array%' THEN 'json' + WHEN substring(type, 10, length(type) - 10) LIKE 'Map%' THEN 'json' + WHEN substring(type, 10, length(type) - 10) IN ('SimpleAggregateFunction', 'AggregateFunction') THEN 'text' + WHEN substring(type, 10, length(type) - 10) = 'Nested' THEN 'json' + WHEN substring(type, 10, length(type) - 10) LIKE 'Tuple%' THEN 'json' + WHEN substring(type, 10, length(type) - 10) LIKE 'IPv%' THEN 'text' + WHEN substring(type, 10, length(type) - 10) IN ('Expression', 'Set', 'Nothing', 'Interval') THEN 'text' + ELSE substring(type, 10, length(type) - 10) + END + ELSE + CASE + WHEN type IN ('UInt8', 'Int8') THEN 'tinyint' + WHEN type IN ('UInt16', 'Int16') THEN 'smallint' + WHEN type IN ('UInt32', 'Int32') THEN 'int' + WHEN type IN ('UInt64', 'Int64', 'UInt128', 'Int128', 'UInt256', 'Int256') THEN 'bigint' + WHEN type = 'Float32' THEN 'float' + WHEN type = 'Float64' THEN 'double' + WHEN type LIKE 'Decimal%' THEN 'decimal' + WHEN type = 'Boolean' THEN 'tinyint' + WHEN type = 'String' THEN 'text' + WHEN type LIKE 'FixedString%' THEN 'text' + WHEN type LIKE 'Date%' THEN 'date' + WHEN type LIKE 'DateTime%' THEN 'datetime' + WHEN type = 'JSON' THEN 'json' + WHEN type = 'UUID' THEN 'binary' + WHEN type LIKE 'Enum%' THEN 'enum' + WHEN type LIKE 'LowCardinality%' THEN 'text' + WHEN type LIKE 'Array%' THEN 'json' + WHEN type LIKE 'Map%' THEN 'json' + WHEN type IN ('SimpleAggregateFunction', 'AggregateFunction') THEN 'text' + WHEN type = 'Nested' THEN 'json' + WHEN type LIKE 'Tuple%' THEN 'json' + WHEN type LIKE 'IPv%' THEN 'text' + WHEN type IN ('Expression', 'Set', 'Nothing', 'Interval') THEN 'text' + ELSE type + END + END AS type, + startsWith(type, 'Nullable') AS null, + trim(concatWithSeparator(' ', if(is_in_primary_key, 'PRI', ''), if (is_in_sorting_key, 'SOR', ''))) AS key, + if(default_kind IN ('ALIAS', 'DEFAULT', 'MATERIALIZED'), default_expression, NULL) AS default, + '' AS extra )"; + + return mysql_specific_query.str(); +} BlockIO InterpreterShowColumnsQuery::execute() { diff --git a/src/Interpreters/InterpreterShowColumnsQuery.h b/src/Interpreters/InterpreterShowColumnsQuery.h index ee6dcabd97b..b843a163978 100644 --- a/src/Interpreters/InterpreterShowColumnsQuery.h +++ b/src/Interpreters/InterpreterShowColumnsQuery.h @@ -26,6 +26,7 @@ private: ASTPtr query_ptr; String getRewrittenQuery(); + String getMySQLQuery(); }; diff --git a/tests/queries/0_stateless/02726_show_columns_mysql_compatibility.reference b/tests/queries/0_stateless/02726_show_columns_mysql_compatibility.reference new file mode 100644 index 00000000000..c9ad94a34c4 --- /dev/null +++ b/tests/queries/0_stateless/02726_show_columns_mysql_compatibility.reference @@ -0,0 +1,213 @@ +Drop tables if they exist +Create tab table +Create pseudo-random database name +Create tab duplicate table +Run MySQL test +field type null key default extra +array_value json 0 NULL +boolean_value tinyint 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value date 0 NULL +datetime_value date 0 NULL +decimal_value decimal 0 NULL +enum_value enum 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +int32 int 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value text 0 NULL +low_cardinality text 0 NULL +map_value json 0 NULL +nested.nested_int json 0 NULL +nested.nested_string json 0 NULL +nullable_value int 0 NULL +string_value text 0 NULL +tuple_value json 0 NULL +uint64 bigint 0 PRI SOR NULL +uuid_value binary 0 NULL +field type null key default extra +array_value json 0 NULL +boolean_value tinyint 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value date 0 NULL +datetime_value date 0 NULL +decimal_value decimal 0 NULL +enum_value enum 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +int32 int 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value text 0 NULL +low_cardinality text 0 NULL +map_value json 0 NULL +nested.nested_int json 0 NULL +nested.nested_string json 0 NULL +nullable_value int 0 NULL +string_value text 0 NULL +tuple_value json 0 NULL +uint64 bigint 0 PRI SOR NULL +uuid_value binary 0 NULL +field type null key default extra collation comment privileges +array_value json 0 NULL NULL +boolean_value tinyint 0 NULL NULL +date32_value date 0 NULL NULL +date_value date 0 NULL NULL +datetime64_value date 0 NULL NULL +datetime_value date 0 NULL NULL +decimal_value decimal 0 NULL NULL +enum_value enum 0 NULL NULL +fixed_string_value text 0 NULL NULL +float32 float 0 NULL NULL +float64 double 0 NULL NULL +int32 int 0 NULL NULL +ipv4_value text 0 NULL NULL +ipv6_value text 0 NULL NULL +json_value text 0 NULL NULL +low_cardinality text 0 NULL NULL +map_value json 0 NULL NULL +nested.nested_int json 0 NULL NULL +nested.nested_string json 0 NULL NULL +nullable_value int 0 NULL NULL +string_value text 0 NULL NULL +tuple_value json 0 NULL NULL +uint64 bigint 0 PRI SOR NULL NULL +uuid_value binary 0 NULL NULL +field type null key default extra +int32 int 0 NULL +nested.nested_int json 0 NULL +uint64 bigint 0 PRI SOR NULL +field type null key default extra +array_value json 0 NULL +boolean_value tinyint 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value date 0 NULL +datetime_value date 0 NULL +decimal_value decimal 0 NULL +enum_value enum 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value text 0 NULL +low_cardinality text 0 NULL +map_value json 0 NULL +nested.nested_string json 0 NULL +nullable_value int 0 NULL +string_value text 0 NULL +tuple_value json 0 NULL +uuid_value binary 0 NULL +field type null key default extra +int32 int 0 NULL +nested.nested_int json 0 NULL +uint64 bigint 0 PRI SOR NULL +field type null key default extra +array_value json 0 NULL +boolean_value tinyint 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value date 0 NULL +datetime_value date 0 NULL +decimal_value decimal 0 NULL +enum_value enum 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value text 0 NULL +low_cardinality text 0 NULL +map_value json 0 NULL +nested.nested_string json 0 NULL +nullable_value int 0 NULL +string_value text 0 NULL +tuple_value json 0 NULL +uuid_value binary 0 NULL +field type null key default extra +int32 int 0 NULL +nested.nested_int json 0 NULL +uint64 bigint 0 PRI SOR NULL +field type null key default extra +array_value json 0 NULL +field type null key default extra +array_value json 0 NULL +boolean_value tinyint 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value date 0 NULL +datetime_value date 0 NULL +decimal_value decimal 0 NULL +enum_value enum 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +int32 int 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value text 0 NULL +low_cardinality text 0 NULL +map_value json 0 NULL +nested.nested_int json 0 NULL +nested.nested_string json 0 NULL +nullable_value int 0 NULL +string_value text 0 NULL +tuple_value json 0 NULL +uint64 bigint 0 PRI SOR NULL +uuid_value binary 0 NULL +field type null key default extra +array_value json 0 NULL +boolean_value tinyint 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value date 0 NULL +datetime_value date 0 NULL +decimal_value decimal 0 NULL +enum_value enum 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +int32 int 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value text 0 NULL +low_cardinality text 0 NULL +map_value json 0 NULL +nested.nested_int json 0 NULL +nested.nested_string json 0 NULL +nullable_value int 0 NULL +string_value text 0 NULL +tuple_value json 0 NULL +uint64 bigint 0 PRI SOR NULL +uuid_value binary 0 NULL +field type null key default extra +array_value json 0 NULL +boolean_value tinyint 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value date 0 NULL +datetime_value date 0 NULL +decimal_value decimal 0 NULL +enum_value enum 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +int32 int 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value text 0 NULL +low_cardinality text 0 NULL +map_value json 0 NULL +nested.nested_int json 0 NULL +nested.nested_string json 0 NULL +nullable_value int 0 NULL +string_value text 0 NULL +tuple_value json 0 NULL +uint64 bigint 0 PRI SOR NULL +uuid_value binary 0 NULL diff --git a/tests/queries/0_stateless/02726_show_columns_mysql_compatibility.sh b/tests/queries/0_stateless/02726_show_columns_mysql_compatibility.sh new file mode 100755 index 00000000000..5324496edd3 --- /dev/null +++ b/tests/queries/0_stateless/02726_show_columns_mysql_compatibility.sh @@ -0,0 +1,115 @@ +#!/bin/bash + +# This script tests the MySQL compatibility of the SHOW COLUMNS command in ClickHouse +USER="default" +PASSWORD="" +HOST="127.0.0.1" +PORT=9004 + +# First run the clickhouse test to create the ClickHouse Tables + +echo "Drop tables if they exist" +${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS tab" +${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS database_123456789abcde" +${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS database_123456789abcde.tab" + +echo "Create tab table " +${CLICKHOUSE_LOCAL} --query " + CREATE TABLE tab + ( + uint64 UInt64, + int32 Nullable(Int32), + float32 Float32, + float64 Float64, + decimal_value Decimal(10, 2), + boolean_value UInt8, -- Use 0 for false, 1 for true + string_value String, + fixed_string_value FixedString(10), + date_value Date, + date32_value Date32, + datetime_value DateTime, + datetime64_value DateTime64(3), + json_value String, -- Store JSON as a string + uuid_value UUID, + enum_value Enum8('apple' = 1, 'banana' = 2, 'orange' = 3), + low_cardinality LowCardinality(String), + array_value Array(Int32), + map_value Map(String, Int32), + tuple_value Tuple(Int32, String), + nullable_value Nullable(Int32), + ipv4_value IPv4, + ipv6_value IPv6, + nested Nested + ( + nested_int Int32, + nested_string String + ) + ) ENGINE = MergeTree + ORDER BY uint64; + " + + +echo "Create pseudo-random database name" +${CLICKHOUSE_LOCAL} --query "CREATE DATABASE database_123456789abcde;" + +echo "Create tab duplicate table" +${CLICKHOUSE_LOCAL} --query " + CREATE TABLE database_123456789abcde.tab + ( + uint64 UInt64, + int32 Nullable(Int32), + float32 Float32, + float64 Float64, + decimal_value Decimal(10, 2), + boolean_value UInt8, -- Use 0 for false, 1 for true + string_value String, + fixed_string_value FixedString(10), + date_value Date, + date32_value Date32, + datetime_value DateTime, + datetime64_value DateTime64(3), + json_value String, -- Store JSON as a string + uuid_value UUID, + enum_value Enum8('apple' = 1, 'banana' = 2, 'orange' = 3), + low_cardinality LowCardinality(String), + array_value Array(Int32), + map_value Map(String, Int32), + tuple_value Tuple(Int32, String), + nullable_value Nullable(Int32), + ipv4_value IPv4, + ipv6_value IPv6, + nested Nested + ( + nested_int Int32, + nested_string String + ) + ) ENGINE = MergeTree + ORDER BY uint64; + " + +# Write sql to temp file +TEMP_FILE=$(mktemp) + +cat < $TEMP_FILE +SHOW COLUMNS FROM tab; +SHOW EXTENDED COLUMNS FROM tab; +SHOW FULL COLUMNS FROM tab; +SHOW COLUMNS FROM tab LIKE '%int%'; +SHOW COLUMNS FROM tab NOT LIKE '%int%'; +SHOW COLUMNS FROM tab ILIKE '%INT%'; +SHOW COLUMNS FROM tab NOT ILIKE '%INT%'; +SHOW COLUMNS FROM tab WHERE field LIKE '%int%'; +SHOW COLUMNS FROM tab LIMIT 1; +SHOW COLUMNS FROM tab; +SHOW COLUMNS FROM tab FROM database_123456789abcde; +SHOW COLUMNS FROM database_123456789abcde.tab; +DROP DATABASE database_123456789abcde; +DROP TABLE tab; +EOT + +# Now run the MySQL test script on the ClickHouse DB +echo "Run MySQL test" +mysql --user="$USER" --password="$PASSWORD" --host="$HOST" --port="$PORT" < $TEMP_FILE + +# Clean up the temp file +rm $TEMP_FILE From 297188ce583a94f9942f7fd141a85dbdcfdcd587 Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 9 May 2023 22:37:25 +0200 Subject: [PATCH 0138/1997] fix Date32 --- src/DataTypes/Serializations/SerializationDate32.cpp | 11 +++++++---- src/DataTypes/Serializations/SerializationDate32.h | 5 ++++- src/Functions/FunctionsConversion.h | 7 +++++-- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationDate32.cpp b/src/DataTypes/Serializations/SerializationDate32.cpp index ef92202f89d..8dcaee8d266 100644 --- a/src/DataTypes/Serializations/SerializationDate32.cpp +++ b/src/DataTypes/Serializations/SerializationDate32.cpp @@ -11,7 +11,7 @@ namespace DB void SerializationDate32::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { - writeDateText(ExtendedDayNum(assert_cast(column).getData()[row_num]), ostr); + writeDateText(ExtendedDayNum(assert_cast(column).getData()[row_num]), ostr, time_zone); } void SerializationDate32::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const @@ -24,7 +24,7 @@ void SerializationDate32::deserializeWholeText(IColumn & column, ReadBuffer & is void SerializationDate32::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { ExtendedDayNum x; - readDateText(x, istr); + readDateText(x, istr, time_zone); assert_cast(column).getData().push_back(x); } @@ -44,7 +44,7 @@ void SerializationDate32::deserializeTextQuoted(IColumn & column, ReadBuffer & i { ExtendedDayNum x; assertChar('\'', istr); - readDateText(x, istr); + readDateText(x, istr, time_zone); assertChar('\'', istr); assert_cast(column).getData().push_back(x); /// It's important to do this at the end - for exception safety. } @@ -60,7 +60,7 @@ void SerializationDate32::deserializeTextJSON(IColumn & column, ReadBuffer & ist { ExtendedDayNum x; assertChar('"', istr); - readDateText(x, istr); + readDateText(x, istr, time_zone); assertChar('"', istr); assert_cast(column).getData().push_back(x); } @@ -78,4 +78,7 @@ void SerializationDate32::deserializeTextCSV(IColumn & column, ReadBuffer & istr readCSV(value, istr); assert_cast(column).getData().push_back(value.getExtenedDayNum()); } +SerializationDate32::SerializationDate32(const TimezoneMixin & time_zone_) : TimezoneMixin(time_zone_) +{ +} } diff --git a/src/DataTypes/Serializations/SerializationDate32.h b/src/DataTypes/Serializations/SerializationDate32.h index 484b4f4a958..e8e8f1a74d6 100644 --- a/src/DataTypes/Serializations/SerializationDate32.h +++ b/src/DataTypes/Serializations/SerializationDate32.h @@ -1,12 +1,15 @@ #pragma once #include +#include namespace DB { -class SerializationDate32 final : public SerializationNumber +class SerializationDate32 final : public SerializationNumber, public TimezoneMixin { public: + explicit SerializationDate32(const TimezoneMixin & time_zone_ = TimezoneMixin()); + void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index e0e188f68c2..2f751e72222 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -731,7 +731,10 @@ struct FormatImpl template static ReturnType execute(const DataTypeDate32::FieldType x, WriteBuffer & wb, const DataTypeDate32 *, const DateLUTImpl * time_zone) { + std::cerr << "BEFORE: " << std::endl; + std::cerr << time_zone->getTimeZone() << std::endl; writeDateText(ExtendedDayNum(x), wb, *time_zone); + std::cerr << "AFTER" << std::endl; return ReturnType(true); } }; @@ -831,7 +834,7 @@ struct ConvertImpl) + if constexpr (std::is_same_v || std::is_same_v) time_zone = &DateLUT::instance(); /// For argument of Date or DateTime type, second argument with time zone could be specified. if constexpr (std::is_same_v || std::is_same_v) @@ -1765,7 +1768,7 @@ public: || std::is_same_v // toDate(value[, timezone : String]) || std::is_same_v // TODO: shall we allow timestamp argument for toDate? DateTime knows nothing about timezones and this argument is ignored below. - // toDate(value[, timezone : String]) + // toDate32(value[, timezone : String]) || std::is_same_v // toDateTime(value[, timezone: String]) || std::is_same_v From 8d0644e79301a9a0ccf67b66d44c43e4766d8aa7 Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 9 May 2023 23:02:03 +0200 Subject: [PATCH 0139/1997] cleanup --- src/Functions/FunctionsConversion.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 2f751e72222..b10d9f4a31a 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -731,10 +731,7 @@ struct FormatImpl template static ReturnType execute(const DataTypeDate32::FieldType x, WriteBuffer & wb, const DataTypeDate32 *, const DateLUTImpl * time_zone) { - std::cerr << "BEFORE: " << std::endl; - std::cerr << time_zone->getTimeZone() << std::endl; writeDateText(ExtendedDayNum(x), wb, *time_zone); - std::cerr << "AFTER" << std::endl; return ReturnType(true); } }; From 07630ef43fd40f46dfba9adca487c3b69ca2ad3c Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 11 May 2023 01:10:34 +0200 Subject: [PATCH 0140/1997] upd --- src/Client/ClientBase.cpp | 4 +--- src/Client/Connection.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 8b5db85fc02..fad9494ba4b 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1082,9 +1082,7 @@ void ClientBase::onProgress(const Progress & value) void ClientBase::onTimezoneUpdate(const String & tz) { - Settings settings; - settings.session_timezone = tz; - global_context->applySettingsChanges(settings.changes()); + global_context->setSetting("session_timezone", tz); } diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 062f05105aa..86585d805d9 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -355,10 +355,10 @@ void Connection::receiveHello() nonce.emplace(read_nonce); } } - else if (packet_type == Protocol::Server::TimezoneUpdate) - { - // skip this packet at hello, will receive and process it later - } +// else if (packet_type == Protocol::Server::TimezoneUpdate) +// { +// // skip this packet at hello, will receive and process it later +// } else if (packet_type == Protocol::Server::Exception) receiveException()->rethrow(); else From 58bdcc29315a712e1255c13d31669f4545de9edb Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 10 May 2023 23:55:13 +0000 Subject: [PATCH 0141/1997] allow to cast IPv6 to IPv4 for address in proper mapping block --- src/Functions/FunctionsConversion.h | 30 ++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 9c4085f9745..9cdd09780e3 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -57,6 +57,7 @@ #include #include #include +#include #include @@ -217,13 +218,13 @@ struct ConvertImpl } else if constexpr ( (std::is_same_v != std::is_same_v) - && !(is_any_of || is_any_of) + && !(is_any_of || is_any_of) ) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Conversion from {} to {} is not supported", TypeName, TypeName); } - else if constexpr (std::is_same_v != std::is_same_v) + else if constexpr (std::is_same_v != std::is_same_v && !std::is_same_v) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Conversion between numeric types and IPv6 is not supported. " @@ -304,7 +305,30 @@ struct ConvertImpl } else { - if constexpr (std::is_same_v && std::is_same_v) + if constexpr (std::is_same_v && std::is_same_v) + { + const uint8_t ip4_cidr[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}; + const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); + if (!matchIPv6Subnet(src, ip4_cidr, 96)) + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "IPv6 in column {} is not in IPv4 mapping block", named_from.column->getName()); + + uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); + if constexpr (std::endian::native == std::endian::little) + { + dst[0] = src[15]; + dst[1] = src[14]; + dst[2] = src[13]; + dst[3] = src[12]; + } + else + { + dst[3] = src[15]; + dst[2] = src[14]; + dst[1] = src[13]; + dst[0] = src[12]; + } + } + else if constexpr (std::is_same_v && std::is_same_v) vec_to[i] = static_cast(static_cast(vec_from[i])); else vec_to[i] = static_cast(vec_from[i]); From 6b0bd698d36014a5eac052857bac2185a1f45f41 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 11 May 2023 04:17:53 +0200 Subject: [PATCH 0142/1997] Fix mistake --- .../02530_dictionaries_update_field.reference | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/queries/0_stateless/02530_dictionaries_update_field.reference b/tests/queries/0_stateless/02530_dictionaries_update_field.reference index 88c910e0313..40f2c0ee400 100644 --- a/tests/queries/0_stateless/02530_dictionaries_update_field.reference +++ b/tests/queries/0_stateless/02530_dictionaries_update_field.reference @@ -4,13 +4,13 @@ flat SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First 2 SecondUpdated @@ -21,13 +21,13 @@ flat/custom SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First 2 SecondUpdated @@ -38,13 +38,13 @@ hashed SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First 2 SecondUpdated @@ -55,13 +55,13 @@ hashed/custom SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First 2 SecondUpdated @@ -72,13 +72,13 @@ complex_key_hashed SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First 2 SecondUpdated @@ -89,13 +89,13 @@ complex_key_hashed/custom SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First 2 SecondUpdated From 65d28a959ff5b21199c2b20d8dcb7c7b399f314d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 11 May 2023 04:26:29 +0200 Subject: [PATCH 0143/1997] Update integration tests (1/2) --- .../configs/config.d/storage_conf.xml | 1 + .../test_merge_tree_hdfs/configs/config.d/storage_conf.xml | 1 + .../test_merge_tree_s3_failover/configs/config.xml | 4 ++++ .../test_s3_zero_copy_replication/configs/config.d/s3.xml | 1 + 4 files changed, 7 insertions(+) diff --git a/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml b/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml index cb87abcc693..d69fe96a3e2 100644 --- a/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml +++ b/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml @@ -45,5 +45,6 @@ true + 1.0 diff --git a/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml index 890c396ed95..7d59081486b 100644 --- a/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml +++ b/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml @@ -28,5 +28,6 @@ 0 + 1.0 diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.xml index feb537ebbce..743d75d9a21 100644 --- a/tests/integration/test_merge_tree_s3_failover/configs/config.xml +++ b/tests/integration/test_merge_tree_s3_failover/configs/config.xml @@ -15,4 +15,8 @@ 500 ./clickhouse/ users.xml + + + 1.0 + diff --git a/tests/integration/test_s3_zero_copy_replication/configs/config.d/s3.xml b/tests/integration/test_s3_zero_copy_replication/configs/config.d/s3.xml index f7d9efc2cae..55c35999703 100644 --- a/tests/integration/test_s3_zero_copy_replication/configs/config.d/s3.xml +++ b/tests/integration/test_s3_zero_copy_replication/configs/config.d/s3.xml @@ -70,6 +70,7 @@ 1024 1 true + 1.0 From 108e256578574b26f8adeb3916b15238f0557ee9 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 11 May 2023 16:17:52 +0000 Subject: [PATCH 0144/1997] allow to cast IPv4 to IPv6 --- src/Functions/FunctionsConversion.h | 32 ++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 9cdd09780e3..5bf59f33cb5 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -218,13 +218,13 @@ struct ConvertImpl } else if constexpr ( (std::is_same_v != std::is_same_v) - && !(is_any_of || is_any_of) + && !(is_any_of || is_any_of) ) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Conversion from {} to {} is not supported", TypeName, TypeName); } - else if constexpr (std::is_same_v != std::is_same_v && !std::is_same_v) + else if constexpr (std::is_same_v != std::is_same_v && !(std::is_same_v || std::is_same_v)) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Conversion between numeric types and IPv6 is not supported. " @@ -322,10 +322,32 @@ struct ConvertImpl } else { - dst[3] = src[15]; - dst[2] = src[14]; - dst[1] = src[13]; dst[0] = src[12]; + dst[1] = src[13]; + dst[2] = src[14]; + dst[3] = src[15]; + } + } + else if constexpr (std::is_same_v && std::is_same_v) + { + const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); + uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); + std::memset(dst, '\0', IPV6_BINARY_LENGTH); + dst[10] = dst[11] = 0xff; + + if constexpr (std::endian::native == std::endian::little) + { + dst[12] = src[3]; + dst[13] = src[2]; + dst[14] = src[1]; + dst[15] = src[0]; + } + else + { + dst[12] = src[0]; + dst[13] = src[1]; + dst[14] = src[2]; + dst[15] = src[3]; } } else if constexpr (std::is_same_v && std::is_same_v) From c9e752fdc5c4cc401df240f7cd5f77586d9b542d Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 11 May 2023 18:09:46 +0200 Subject: [PATCH 0145/1997] Fix --- tests/integration/helpers/postgres_utility.py | 51 ++++---- .../test.py | 113 +++++++----------- .../test.py | 64 +++++----- 3 files changed, 99 insertions(+), 129 deletions(-) diff --git a/tests/integration/helpers/postgres_utility.py b/tests/integration/helpers/postgres_utility.py index 1a00faf0f9d..3c8a23b15a2 100644 --- a/tests/integration/helpers/postgres_utility.py +++ b/tests/integration/helpers/postgres_utility.py @@ -87,7 +87,9 @@ def create_postgres_table( else: name = f"{database_name}.{table_name}" drop_postgres_table(cursor, name) - cursor.execute(template.format(name)) + query = template.format(name) + cursor.execute(query) + print(f"Query: {query}") if replica_identity_full: cursor.execute(f"ALTER TABLE {name} REPLICA IDENTITY FULL;") @@ -129,6 +131,9 @@ class PostgresManager: self.prepare() raise ex + def execute(self, query): + self.cursor.execute(query) + def prepare(self): self.conn = get_postgres_conn(ip=self.ip, port=self.port) self.cursor = self.conn.cursor() @@ -141,6 +146,7 @@ class PostgresManager: database_name=self.default_database, ) self.cursor = self.conn.cursor() + self.create_clickhouse_postgres_db() def clear(self): if self.conn.closed == 0: @@ -164,11 +170,11 @@ class PostgresManager: return self.conn.cursor() def database_or_default(self, database_name): - if database_name == "" and self.default_database == "": - raise Exception("Database name is empty") - if database_name == "": - database_name = self.default_database - return database_name + if database_name != "": + return database_name + if self.default_database != "": + return self.default_database + raise Exception("Database name is empty") def create_postgres_db(self, database_name=""): database_name = self.database_or_default(database_name) @@ -186,8 +192,11 @@ class PostgresManager: self, database_name="", schema_name="", + postgres_database="", ): database_name = self.database_or_default(database_name) + if postgres_database == "": + postgres_database = database_name self.drop_clickhouse_postgres_db(database_name) self.created_ch_postgres_db_list.add(database_name) @@ -195,13 +204,13 @@ class PostgresManager: self.instance.query( f""" CREATE DATABASE {database_name} - ENGINE = PostgreSQL('{self.ip}:{self.port}', '{database_name}', 'postgres', 'mysecretpassword')""" + ENGINE = PostgreSQL('{self.ip}:{self.port}', '{postgres_database}', 'postgres', 'mysecretpassword')""" ) else: self.instance.query( f""" CREATE DATABASE {database_name} - ENGINE = PostgreSQL('{self.ip}:{self.port}', '{database_name}', 'postgres', 'mysecretpassword', '{schema_name}')""" + ENGINE = PostgreSQL('{self.ip}:{self.port}', '{postgres_database}', 'postgres', 'mysecretpassword', '{schema_name}')""" ) def drop_clickhouse_postgres_db(self, database_name=""): @@ -239,6 +248,16 @@ class PostgresManager: if materialized_database in self.created_materialized_postgres_db_list: self.created_materialized_postgres_db_list.remove(materialized_database) + def create_postgres_schema(self, name): + create_postgres_schema(self.cursor, name) + + def create_postgres_table( + self, table_name, database_name="", template=postgres_table_template + ): + create_postgres_table( + self.cursor, table_name, database_name=database_name, template=template + ) + def create_and_fill_postgres_table(self, table_name, database_name=""): create_postgres_table(self.cursor, table_name, database_name) database_name = self.database_or_default(database_name) @@ -246,22 +265,14 @@ class PostgresManager: f"INSERT INTO {database_name}.{table_name} SELECT number, number from numbers(50)" ) - def create_and_fill_postgres_tables(self, tables_num, numbers=50): - conn = get_postgres_conn(ip=self.ip, port=self.port, database=True) - cursor = conn.cursor() - self.create_and_fill_postgres_tables_from_cursor( - cursor, tables_num, numbers=numbers - ) - - def create_and_fill_postgres_tables_from_cursor( - self, cursor, tables_num, numbers=50 - ): + def create_and_fill_postgres_tables(self, tables_num, numbers=50, database_name=""): for i in range(tables_num): table_name = f"postgresql_replica_{i}" - create_postgres_table(cursor, table_name) + create_postgres_table(self.cursor, table_name, database_name) if numbers > 0: + db = self.database_or_default(database_name) self.instance.query( - f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers({numbers})" + f"INSERT INTO {db}.{table_name} SELECT number, number from numbers({numbers})" ) diff --git a/tests/integration/test_postgresql_replica_database_engine_1/test.py b/tests/integration/test_postgresql_replica_database_engine_1/test.py index 377b1c89efc..1eb2efc73a5 100644 --- a/tests/integration/test_postgresql_replica_database_engine_1/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_1/test.py @@ -46,7 +46,12 @@ pg_manager = PostgresManager() def started_cluster(): try: cluster.start() - pg_manager.init(instance, cluster.postgres_ip, cluster.postgres_port) + pg_manager.init( + instance, + cluster.postgres_ip, + cluster.postgres_port, + default_database="postgres_database", + ) yield cluster finally: @@ -74,16 +79,10 @@ def test_load_and_sync_all_database_tables(started_cluster): def test_replicating_dml(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() NUM_TABLES = 5 for i in range(NUM_TABLES): - create_postgres_table(cursor, "postgresql_replica_{}".format(i)) + pg_manager.create_postgres_table(f"postgresql_replica_{i}") instance.query( "INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {} from numbers(50)".format( i, i @@ -96,39 +95,29 @@ def test_replicating_dml(started_cluster): for i in range(NUM_TABLES): instance.query( - "INSERT INTO postgres_database.postgresql_replica_{} SELECT 50 + number, {} from numbers(1000)".format( - i, i - ) + f"INSERT INTO postgres_database.postgresql_replica_{i} SELECT 50 + number, {i} from numbers(1000)" ) check_several_tables_are_synchronized(instance, NUM_TABLES) for i in range(NUM_TABLES): - cursor.execute( - "UPDATE postgresql_replica_{} SET value = {} * {} WHERE key < 50;".format( - i, i, i - ) + pg_manager.execute( + f"UPDATE postgresql_replica_{i} SET value = {i} * {i} WHERE key < 50;" ) - cursor.execute( - "UPDATE postgresql_replica_{} SET value = {} * {} * {} WHERE key >= 50;".format( - i, i, i, i - ) + pg_manager.execute( + f"UPDATE postgresql_replica_{i} SET value = {i} * {i} * {i} WHERE key >= 50;" ) + check_several_tables_are_synchronized(instance, NUM_TABLES) for i in range(NUM_TABLES): - cursor.execute( - "DELETE FROM postgresql_replica_{} WHERE (value*value + {}) % 2 = 0;".format( - i, i - ) + pg_manager.execute( + f"DELETE FROM postgresql_replica_{i} WHERE (value*value + {i}) % 2 = 0;" ) - cursor.execute( - "UPDATE postgresql_replica_{} SET value = value - (value % 7) WHERE key > 128 AND key < 512;".format( - i - ) - ) - cursor.execute( - "DELETE FROM postgresql_replica_{} WHERE key % 7 = 1;".format(i, i) + pg_manager.execute( + f"UPDATE postgresql_replica_{i} SET value = value - (value % 7) WHERE key > 128 AND key < 512;" ) + pg_manager.execute(f"DELETE FROM postgresql_replica_{i} WHERE key % 7 = 1;") + check_several_tables_are_synchronized(instance, NUM_TABLES) @@ -288,13 +277,7 @@ def test_load_and_sync_subset_of_database_tables(started_cluster): def test_changing_replica_identity_value(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") + pg_manager.create_postgres_table("postgresql_replica") instance.query( "INSERT INTO postgres_database.postgresql_replica SELECT 50 + number, number from numbers(50)" ) @@ -307,7 +290,7 @@ def test_changing_replica_identity_value(started_cluster): "INSERT INTO postgres_database.postgresql_replica SELECT 100 + number, number from numbers(50)" ) check_tables_are_synchronized(instance, "postgresql_replica") - cursor.execute("UPDATE postgresql_replica SET key=key-25 WHERE key<100 ") + pg_manager.execute("UPDATE postgresql_replica SET key=key-25 WHERE key<100 ") check_tables_are_synchronized(instance, "postgresql_replica") @@ -331,18 +314,13 @@ def test_clickhouse_restart(started_cluster): def test_replica_identity_index(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, + pg_manager.create_postgres_table( + "postgresql_replica", template=postgres_table_template_3 ) - cursor = conn.cursor() - - create_postgres_table( - cursor, "postgresql_replica", template=postgres_table_template_3 + pg_manager.execute("CREATE unique INDEX idx on postgresql_replica(key1, key2);") + pg_manager.execute( + "ALTER TABLE postgresql_replica REPLICA IDENTITY USING INDEX idx" ) - cursor.execute("CREATE unique INDEX idx on postgresql_replica(key1, key2);") - cursor.execute("ALTER TABLE postgresql_replica REPLICA IDENTITY USING INDEX idx") instance.query( "INSERT INTO postgres_database.postgresql_replica SELECT number, number, number, number from numbers(50, 10)" ) @@ -355,35 +333,29 @@ def test_replica_identity_index(started_cluster): ) check_tables_are_synchronized(instance, "postgresql_replica", order_by="key1") - cursor.execute("UPDATE postgresql_replica SET key1=key1-25 WHERE key1<100 ") - cursor.execute("UPDATE postgresql_replica SET key2=key2-25 WHERE key2>100 ") - cursor.execute("UPDATE postgresql_replica SET value1=value1+100 WHERE key1<100 ") - cursor.execute("UPDATE postgresql_replica SET value2=value2+200 WHERE key2>100 ") + pg_manager.execute("UPDATE postgresql_replica SET key1=key1-25 WHERE key1<100 ") + pg_manager.execute("UPDATE postgresql_replica SET key2=key2-25 WHERE key2>100 ") + pg_manager.execute( + "UPDATE postgresql_replica SET value1=value1+100 WHERE key1<100 " + ) + pg_manager.execute( + "UPDATE postgresql_replica SET value2=value2+200 WHERE key2>100 " + ) check_tables_are_synchronized(instance, "postgresql_replica", order_by="key1") - cursor.execute("DELETE FROM postgresql_replica WHERE key2<75;") + pg_manager.execute("DELETE FROM postgresql_replica WHERE key2<75;") check_tables_are_synchronized(instance, "postgresql_replica", order_by="key1") def test_table_schema_changes(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() NUM_TABLES = 5 for i in range(NUM_TABLES): - create_postgres_table( - cursor, - "postgresql_replica_{}".format(i), - template=postgres_table_template_2, + pg_manager.create_postgres_table( + f"postgresql_replica_{i}", template=postgres_table_template_2 ) instance.query( - "INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {}, {}, {} from numbers(25)".format( - i, i, i, i - ) + f"INSERT INTO postgres_database.postgresql_replica_{i} SELECT number, {i}, {i}, {i} from numbers(25)" ) pg_manager.create_materialized_db( @@ -393,9 +365,7 @@ def test_table_schema_changes(started_cluster): for i in range(NUM_TABLES): instance.query( - "INSERT INTO postgres_database.postgresql_replica_{} SELECT 25 + number, {}, {}, {} from numbers(25)".format( - i, i, i, i - ) + f"INSERT INTO postgres_database.postgresql_replica_{i} SELECT 25 + number, {i}, {i}, {i} from numbers(25)" ) check_several_tables_are_synchronized(instance, NUM_TABLES) @@ -444,10 +414,7 @@ def test_many_concurrent_queries(started_cluster): port=started_cluster.postgres_port, database=True, ) - cursor = conn.cursor() - pg_manager.create_and_fill_postgres_tables_from_cursor( - cursor, NUM_TABLES, numbers=10000 - ) + pg_manager.create_and_fill_postgres_tables(NUM_TABLES, numbers=10000) def attack(thread_id): print("thread {}".format(thread_id)) diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 3b5194e8806..2b17024f417 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -67,13 +67,11 @@ def started_cluster(): instance, cluster.postgres_ip, cluster.postgres_port, - default_database="test_database", + default_database="postgres_database", ) - pg_manager.create_clickhouse_postgres_db() pg_manager2.init( - instance2, cluster.postgres_ip, cluster.postgres_port, "test_database2" + instance2, cluster.postgres_ip, cluster.postgres_port, "postgres_database2" ) - pg_manager2.create_clickhouse_postgres_db() yield cluster finally: @@ -88,11 +86,10 @@ def setup_teardown(): def test_add_new_table_to_replication(started_cluster): - cursor = pg_manager.get_db_cursor() - cursor.execute("DROP TABLE IF EXISTS test_table") + pg_manager.execute("DROP TABLE IF EXISTS test_table") NUM_TABLES = 5 - pg_manager.create_and_fill_postgres_tables_from_cursor(cursor, NUM_TABLES, 10000) + pg_manager.create_and_fill_postgres_tables(NUM_TABLES, 10000) pg_manager.create_materialized_db( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port ) @@ -105,7 +102,7 @@ def test_add_new_table_to_replication(started_cluster): ) table_name = "postgresql_replica_5" - pg_manager.create_and_fill_postgres_table_from_cursor(cursor, table_name) + pg_manager.create_and_fill_postgres_table(table_name) result = instance.query("SHOW CREATE DATABASE test_database") assert ( @@ -158,7 +155,7 @@ def test_add_new_table_to_replication(started_cluster): ) table_name = "postgresql_replica_6" - create_postgres_table(cursor, table_name) + pg_manager.create_postgres_table(table_name) instance.query( "INSERT INTO postgres_database.{} SELECT number, number from numbers(10000)".format( table_name @@ -169,7 +166,7 @@ def test_add_new_table_to_replication(started_cluster): instance.restart_clickhouse() table_name = "postgresql_replica_7" - create_postgres_table(cursor, table_name) + pg_manager.create_postgres_table(table_name) instance.query( "INSERT INTO postgres_database.{} SELECT number, number from numbers(10000)".format( table_name @@ -271,8 +268,7 @@ def test_remove_table_from_replication(started_cluster): == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_2,postgresql_replica_3,postgresql_replica_4\\'\n" ) - cursor = pg_manager.get_db_cursor() - cursor.execute(f"drop table if exists postgresql_replica_0;") + pg_manager.execute(f"drop table if exists postgresql_replica_0;") # Removing from replication table which does not exist in PostgreSQL must be ok. instance.query("DETACH TABLE test_database.postgresql_replica_0 PERMANENTLY") @@ -282,10 +278,11 @@ def test_remove_table_from_replication(started_cluster): def test_predefined_connection_configuration(started_cluster): - cursor = pg_manager.get_db_cursor() - cursor.execute(f"DROP TABLE IF EXISTS test_table") - cursor.execute(f"CREATE TABLE test_table (key integer PRIMARY KEY, value integer)") - cursor.execute(f"INSERT INTO test_table SELECT 1, 2") + pg_manager.execute(f"DROP TABLE IF EXISTS test_table") + pg_manager.execute( + f"CREATE TABLE test_table (key integer PRIMARY KEY, value integer)" + ) + pg_manager.execute(f"INSERT INTO test_table SELECT 1, 2") instance.query( "CREATE DATABASE test_database ENGINE = MaterializedPostgreSQL(postgres1) SETTINGS materialized_postgresql_tables_list='test_table'" ) @@ -332,10 +329,9 @@ def test_database_with_single_non_default_schema(started_cluster): create_postgres_schema(cursor, schema_name) pg_manager.create_clickhouse_postgres_db( - ip=cluster.postgres_ip, - port=cluster.postgres_port, - name=clickhouse_postgres_db, + database_name=clickhouse_postgres_db, schema_name=schema_name, + postgres_database="postgres_database", ) for i in range(NUM_TABLES): @@ -367,7 +363,7 @@ def test_database_with_single_non_default_schema(started_cluster): check_all_tables_are_synchronized() altered_table = random.randint(0, NUM_TABLES - 1) - cursor.execute( + pg_manager.execute( "ALTER TABLE test_schema.postgresql_replica_{} ADD COLUMN value2 integer".format( altered_table ) @@ -434,10 +430,9 @@ def test_database_with_multiple_non_default_schemas_1(started_cluster): create_postgres_schema(cursor, schema_name) pg_manager.create_clickhouse_postgres_db( - ip=cluster.postgres_ip, - port=cluster.postgres_port, - name=clickhouse_postgres_db, + database_name=clickhouse_postgres_db, schema_name=schema_name, + postgres_database="postgres_database", ) for i in range(NUM_TABLES): @@ -472,7 +467,7 @@ def test_database_with_multiple_non_default_schemas_1(started_cluster): check_all_tables_are_synchronized() altered_table = random.randint(0, NUM_TABLES - 1) - cursor.execute( + pg_manager.execute( "ALTER TABLE test_schema.postgresql_replica_{} ADD COLUMN value2 integer".format( altered_table ) @@ -550,10 +545,7 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster): clickhouse_postgres_db = f"clickhouse_postgres_db{i}" create_postgres_schema(cursor, schema_name) pg_manager.create_clickhouse_postgres_db( - ip=cluster.postgres_ip, - port=cluster.postgres_port, - name=clickhouse_postgres_db, - schema_name=schema_name, + database_name=clickhouse_postgres_db, schema_name=schema_name, postgres_database="postgres_database", ) for ti in range(NUM_TABLES): table_name = f"postgresql_replica_{ti}" @@ -586,7 +578,7 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster): altered_schema = random.randint(0, schemas_num - 1) altered_table = random.randint(0, NUM_TABLES - 1) clickhouse_postgres_db = f"clickhouse_postgres_db{altered_schema}" - cursor.execute( + pg_manager.execute( f"ALTER TABLE schema{altered_schema}.postgresql_replica_{altered_table} ADD COLUMN value2 integer" ) @@ -619,10 +611,9 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster): def test_table_override(started_cluster): - cursor = pg_manager.get_db_cursor() table_name = "table_override" materialized_database = "test_database" - create_postgres_table(cursor, table_name, template=postgres_table_template_5) + pg_manager.create_postgres_table(table_name, template=postgres_table_template_5) instance.query( f"create table {table_name}(key Int32, value UUID) engine = PostgreSQL (postgres1, table={table_name})" ) @@ -649,10 +640,11 @@ def test_table_override(started_cluster): def test_materialized_view(started_cluster): - cursor = pg_manager.get_db_cursor() - cursor.execute(f"DROP TABLE IF EXISTS test_table") - cursor.execute(f"CREATE TABLE test_table (key integer PRIMARY KEY, value integer)") - cursor.execute(f"INSERT INTO test_table SELECT 1, 2") + pg_manager.execute(f"DROP TABLE IF EXISTS test_table") + pg_manager.execute( + f"CREATE TABLE test_table (key integer PRIMARY KEY, value integer)" + ) + pg_manager.execute(f"INSERT INTO test_table SELECT 1, 2") instance.query("DROP DATABASE IF EXISTS test_database") instance.query( "CREATE DATABASE test_database ENGINE = MaterializedPostgreSQL(postgres1) SETTINGS materialized_postgresql_tables_list='test_table'" @@ -663,7 +655,7 @@ def test_materialized_view(started_cluster): "CREATE MATERIALIZED VIEW mv ENGINE=MergeTree ORDER BY tuple() POPULATE AS SELECT * FROM test_database.test_table" ) assert "1\t2" == instance.query("SELECT * FROM mv").strip() - cursor.execute(f"INSERT INTO test_table SELECT 3, 4") + pg_manager.execute(f"INSERT INTO test_table SELECT 3, 4") check_tables_are_synchronized(instance, "test_table") assert "1\t2\n3\t4" == instance.query("SELECT * FROM mv ORDER BY 1, 2").strip() pg_manager.drop_materialized_db() From 60b69601e9b1e3563eb43bf4ea1deee582e088fa Mon Sep 17 00:00:00 2001 From: zvonand Date: Fri, 12 May 2023 00:27:11 +0200 Subject: [PATCH 0146/1997] update docs --- .../server-configuration-parameters/settings.md | 4 ++++ docs/en/operations/settings/settings.md | 16 +++++++++++----- .../functions/date-time-functions.md | 14 +++++++++++--- .../server-configuration-parameters/settings.md | 4 ++++ docs/ru/operations/settings/settings.md | 12 ++++++++---- .../functions/date-time-functions.md | 13 +++++++++++-- 6 files changed, 49 insertions(+), 14 deletions(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index e3ca04f5b9b..36ddf6faad0 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1612,6 +1612,10 @@ The time zone is necessary for conversions between String and DateTime formats w Asia/Istanbul ``` +**See also** + +- [session_timezone](../settings/settings.md#session_timezone) + ## tcp_port {#server_configuration_parameters-tcp_port} Port for communicating with clients over the TCP protocol. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index cc5f292f677..2a929acd5f2 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4036,23 +4036,25 @@ Use this setting only for backward compatibility if your use cases depend on old ## session_timezone {#session_timezone} -If specified, sets an implicit timezone (instead of server-default). All DateTime/DateTime64 values (and/or functions results) that have no explicit timezone specified are treated as having this timezone instead of default. -Setting this to `''` (empty string) effectively resets implicit timezone to server timezone. +If specified, sets an implicit timezone (instead of [server default](../server-configuration-parameters/settimgs.md#server_configuration_parameters-timezone). +All DateTime/DateTime64 values (and/or functions results) that have no explicit timezone specified are treated as having this timezone instead of default. +A value of `''` (empty string) configures the session timezone to the server default timezone. + Examples: -```clickhouse +```sql SELECT timeZone(), serverTimezone() FORMAT TSV Europe/Berlin Europe/Berlin ``` -```clickhouse +```sql SELECT timeZone(), serverTimezone() SETTINGS session_timezone = 'Asia/Novosibirsk' FORMAT TSV Asia/Novosibirsk Europe/Berlin ``` -```clickhouse +```sql SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS session_timezone = 'America/Denver' FORMAT TSV 1999-12-13 07:23:23.123 @@ -4064,6 +4066,10 @@ Possible values: Default value: `''`. +**See also** + +- [timezone](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone) + ## final {#final} Automatically applies [FINAL](../../sql-reference/statements/select/from.md#final-modifier) modifier to all tables in a query, to tables where [FINAL](../../sql-reference/statements/select/from.md#final-modifier) is applicable, including joined tables and tables in sub-queries, and diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 596a2c509cd..9207a135c67 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -139,7 +139,7 @@ makeDateTime32(year, month, day, hour, minute, second[, fraction[, precision[, t ## timeZone -Returns the default timezone of the server for current session. This can be modified using `SET session_timezone = 'New/Value'` +Returns the default timezone of the current session, i.e. the value of setting [session_timezone](../../operations/settings/settings.md#session_timezone). If the function is executed in the context of a distributed table, then it generates a normal column with values relevant to each shard, otherwise it produces a constant value. **Syntax** @@ -156,9 +156,13 @@ Alias: `timezone`. Type: [String](../../sql-reference/data-types/string.md). +**See also** + +- [serverTimeZone](#serverTimeZone) + ## serverTimeZone -Returns the actual timezone in which the server runs in. +Returns the default timezone of the server, i.e. the value of setting [timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). If it is executed in the context of a distributed table, then it generates a normal column with values relevant to each shard. Otherwise it produces a constant value. **Syntax** @@ -175,6 +179,10 @@ Alias: `ServerTimezone`, `servertimezone`. Type: [String](../../sql-reference/data-types/string.md). +**See also** + +- [timeZone](#timeZone) + ## toTimeZone Converts a date or date with time to the specified time zone. Does not change the internal value (number of unix seconds) of the data, only the value's time zone attribute and the value's string representation changes. @@ -408,7 +416,7 @@ Result: ``` :::note -The return type of `toStartOf*`, `toLastDayOfMonth`, `toMonday`, `timeSlot` functions described below is determined by the configuration parameter [enable_extended_results_for_datetime_functions](../../operations/settings/settings.md#enable-extended-results-for-datetime-functions) which is `0` by default. +Thes return type of `toStartOf*`, `toLastDayOfMonth`, `toMonday`, `timeSlot` functions described below is determined by the configuration parameter [enable_extended_results_for_datetime_functions](../../operations/settings/settings.md#enable-extended-results-for-datetime-functions) which is `0` by default. Behavior for * `enable_extended_results_for_datetime_functions = 0`: Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime`. Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime`. Though these functions can take values of the extended types `Date32` and `DateTime64` as an argument, passing them a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results. diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 787153d4d19..33db6df0fdd 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -1355,6 +1355,10 @@ Parameters: Europe/Moscow ``` +**См. также** + +- [session_timezone](../settings/settings.md#session_timezone) + ## tcp_port {#server_configuration_parameters-tcp_port} Порт для взаимодействия с клиентами по протоколу TCP. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 585a3995afe..56bfbf8a57f 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -4077,23 +4077,23 @@ SELECT sum(number) FROM numbers(10000000000) SETTINGS partial_result_on_first_ca ## session_timezone {#session_timezone} -Задаёт значение часового пояса (session_timezone) по умолчанию для текущей сессии вместо часового пояса сервера. То есть, все значения DateTime/DateTime64, для которых явно не задан параметр timezone, будут интерпретированы как относящиеся к указанной зоне. +Задаёт значение часового пояса (session_timezone) по умолчанию для текущей сессии вместо [часового пояса сервера](../server-configuration-parameters/settimgs.md#server_configuration_parameters-timezone). То есть, все значения DateTime/DateTime64, для которых явно не задан параметр timezone, будут интерпретированы как относящиеся к указанной зоне. При значении настройки `''` (пустая строка), будет совпадать с часовым поясом сервера. Примеры: -```clickhouse +```sql SELECT timeZone(), serverTimezone() FORMAT TSV Europe/Berlin Europe/Berlin ``` -```clickhouse +```sql SELECT timeZone(), serverTimezone() SETTINGS session_timezone = 'Asia/Novosibirsk' FORMAT TSV Asia/Novosibirsk Europe/Berlin ``` -```clickhouse +```sql SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS session_timezone = 'America/Denver' FORMAT TSV 1999-12-13 07:23:23.123 @@ -4104,3 +4104,7 @@ SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zuric - Любая зона из `system.time_zones`, например `Europe/Berlin`, `UTC` или `Zulu` Значение по умолчанию: `''`. + +**Смотрите также** + +- [timezone](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone) \ No newline at end of file diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 3e378c08308..2d9f96c3199 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -26,7 +26,8 @@ SELECT ## timeZone {#timezone} -Возвращает часовой пояс сервера, считающийся умолчанием для текущей сессии. Можно изменить значение с помощью `SET session_timezone = 'New/Timezone''` +Возвращает часовой пояс сервера, считающийся умолчанием для текущей сессии: значение параметра [session_timezone](../../operations/settings/settings.md#session_timezone), если установлено. + Если функция вызывается в контексте распределенной таблицы, то она генерирует обычный столбец со значениями, актуальными для каждого шарда. Иначе возвращается константа. **Синтаксис** @@ -43,9 +44,13 @@ timeZone() Тип: [String](../../sql-reference/data-types/string.md). +**Смотрите также** + +- [serverTimeZone](#servertimezone) + ## serverTimeZone {#servertimezone} -Возвращает (истинный) часовой пояс сервера, в котором тот работает. +Возвращает часовой пояс сервера по умолчанию, в т.ч. установленный [timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) Если функция вызывается в контексте распределенной таблицы, то она генерирует обычный столбец со значениями, актуальными для каждого шарда. Иначе возвращается константа. **Синтаксис** @@ -62,6 +67,10 @@ serverTimeZone() Тип: [String](../../sql-reference/data-types/string.md). +**Смотрите также** + +- [timeZone](#timezone) + ## toTimeZone {#totimezone} Переводит дату или дату с временем в указанный часовой пояс. Часовой пояс - это атрибут типов `Date` и `DateTime`. Внутреннее значение (количество секунд) поля таблицы или результирующего столбца не изменяется, изменяется тип поля и, соответственно, его текстовое отображение. From c3af36915f049794a9c44c55ebb6a6bc950eadc8 Mon Sep 17 00:00:00 2001 From: zvonand Date: Fri, 12 May 2023 01:29:34 +0200 Subject: [PATCH 0147/1997] fixed docs 2 --- docs/en/operations/settings/settings.md | 2 +- docs/ru/operations/settings/settings.md | 2 +- src/DataTypes/Serializations/SerializationDate.cpp | 3 ++- src/DataTypes/Serializations/SerializationDate.h | 7 +++++-- src/DataTypes/Serializations/SerializationDate32.cpp | 3 ++- src/DataTypes/Serializations/SerializationDate32.h | 7 +++++-- src/IO/ReadHelpers.h | 3 +-- 7 files changed, 17 insertions(+), 10 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 2a929acd5f2..e796ea83a6f 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4036,7 +4036,7 @@ Use this setting only for backward compatibility if your use cases depend on old ## session_timezone {#session_timezone} -If specified, sets an implicit timezone (instead of [server default](../server-configuration-parameters/settimgs.md#server_configuration_parameters-timezone). +If specified, sets an implicit timezone (instead of [server default](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone). All DateTime/DateTime64 values (and/or functions results) that have no explicit timezone specified are treated as having this timezone instead of default. A value of `''` (empty string) configures the session timezone to the server default timezone. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 56bfbf8a57f..98486847fd9 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -4077,7 +4077,7 @@ SELECT sum(number) FROM numbers(10000000000) SETTINGS partial_result_on_first_ca ## session_timezone {#session_timezone} -Задаёт значение часового пояса (session_timezone) по умолчанию для текущей сессии вместо [часового пояса сервера](../server-configuration-parameters/settimgs.md#server_configuration_parameters-timezone). То есть, все значения DateTime/DateTime64, для которых явно не задан параметр timezone, будут интерпретированы как относящиеся к указанной зоне. +Задаёт значение часового пояса (session_timezone) по умолчанию для текущей сессии вместо [часового пояса сервера](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone). То есть, все значения DateTime/DateTime64, для которых явно не задан параметр timezone, будут интерпретированы как относящиеся к указанной зоне. При значении настройки `''` (пустая строка), будет совпадать с часовым поясом сервера. Примеры: diff --git a/src/DataTypes/Serializations/SerializationDate.cpp b/src/DataTypes/Serializations/SerializationDate.cpp index 8b4956f7826..1ed48fdd31d 100644 --- a/src/DataTypes/Serializations/SerializationDate.cpp +++ b/src/DataTypes/Serializations/SerializationDate.cpp @@ -80,7 +80,8 @@ void SerializationDate::deserializeTextCSV(IColumn & column, ReadBuffer & istr, readCSV(value, istr, time_zone); assert_cast(column).getData().push_back(value); } -SerializationDate::SerializationDate(const TimezoneMixin & time_zone_) : TimezoneMixin(time_zone_) + +SerializationDate::SerializationDate(const DateLUTImpl & time_zone_) : time_zone(time_zone_) { } diff --git a/src/DataTypes/Serializations/SerializationDate.h b/src/DataTypes/Serializations/SerializationDate.h index c4e57470673..4d6a6fa36ec 100644 --- a/src/DataTypes/Serializations/SerializationDate.h +++ b/src/DataTypes/Serializations/SerializationDate.h @@ -6,10 +6,10 @@ namespace DB { -class SerializationDate final : public SerializationNumber, public TimezoneMixin +class SerializationDate final : public SerializationNumber { public: - explicit SerializationDate(const TimezoneMixin & time_zone_ = TimezoneMixin()); + explicit SerializationDate(const DateLUTImpl & time_zone_ = DateLUT::instance()); void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; @@ -21,6 +21,9 @@ public: void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + +protected: + const DateLUTImpl & time_zone; }; } diff --git a/src/DataTypes/Serializations/SerializationDate32.cpp b/src/DataTypes/Serializations/SerializationDate32.cpp index 8dcaee8d266..851710de839 100644 --- a/src/DataTypes/Serializations/SerializationDate32.cpp +++ b/src/DataTypes/Serializations/SerializationDate32.cpp @@ -78,7 +78,8 @@ void SerializationDate32::deserializeTextCSV(IColumn & column, ReadBuffer & istr readCSV(value, istr); assert_cast(column).getData().push_back(value.getExtenedDayNum()); } -SerializationDate32::SerializationDate32(const TimezoneMixin & time_zone_) : TimezoneMixin(time_zone_) + +SerializationDate32::SerializationDate32(const DateLUTImpl & time_zone_) : time_zone(time_zone_) { } } diff --git a/src/DataTypes/Serializations/SerializationDate32.h b/src/DataTypes/Serializations/SerializationDate32.h index e8e8f1a74d6..6b6e5442240 100644 --- a/src/DataTypes/Serializations/SerializationDate32.h +++ b/src/DataTypes/Serializations/SerializationDate32.h @@ -5,10 +5,10 @@ namespace DB { -class SerializationDate32 final : public SerializationNumber, public TimezoneMixin +class SerializationDate32 final : public SerializationNumber { public: - explicit SerializationDate32(const TimezoneMixin & time_zone_ = TimezoneMixin()); + explicit SerializationDate32(const DateLUTImpl & time_zone_ = DateLUT::instance()); void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; @@ -20,5 +20,8 @@ public: void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + +protected: + const DateLUTImpl & time_zone; }; } diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index ea565d11914..3bd9275322e 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -1160,8 +1160,7 @@ inline void readText(is_floating_point auto & x, ReadBuffer & buf) { readFloatTe inline void readText(String & x, ReadBuffer & buf) { readEscapedString(x, buf); } -inline void readText(DayNum & x, ReadBuffer & buf) { readDateText(x, buf); } -inline void readText(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone) { readDateText(x, buf, time_zone); } +inline void readText(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) { readDateText(x, buf, time_zone); } inline void readText(LocalDate & x, ReadBuffer & buf) { readDateText(x, buf); } inline void readText(LocalDateTime & x, ReadBuffer & buf) { readDateTimeText(x, buf); } From 24067ea977b6e4484f68efba8858ba8d0ad1cd6b Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 12 May 2023 15:54:50 +0000 Subject: [PATCH 0148/1997] allow conversion for toIPv4OrDefault --- src/Functions/FunctionsCodingIP.h | 81 +++++++++++++++++++++++++++++ src/Functions/FunctionsConversion.h | 20 +++++++ 2 files changed, 101 insertions(+) diff --git a/src/Functions/FunctionsCodingIP.h b/src/Functions/FunctionsCodingIP.h index d02cc81f608..bd53fa7e043 100644 --- a/src/Functions/FunctionsCodingIP.h +++ b/src/Functions/FunctionsCodingIP.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -16,6 +17,7 @@ namespace ErrorCodes extern const int CANNOT_PARSE_IPV4; extern const int CANNOT_PARSE_IPV6; extern const int ILLEGAL_COLUMN; + extern const int CANNOT_CONVERT_TYPE; } enum class IPStringToNumExceptionMode : uint8_t @@ -296,4 +298,83 @@ ColumnPtr convertToIPv4(ColumnPtr column, const PaddedPODArray * null_map return col_res; } +template +ColumnPtr convertIPv6ToIPv4(ColumnPtr column, const PaddedPODArray * null_map = nullptr) +{ + const ColumnIPv6 * column_ipv6 = checkAndGetColumn(column.get()); + + if (!column_ipv6) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column type {}. Expected IPv6.", column->getName()); + + size_t column_size = column_ipv6->size(); + + ColumnUInt8::MutablePtr col_null_map_to; + ColumnUInt8::Container * vec_null_map_to = nullptr; + + if constexpr (exception_mode == IPStringToNumExceptionMode::Null) + { + col_null_map_to = ColumnUInt8::create(column_size, false); + vec_null_map_to = &col_null_map_to->getData(); + } + + const uint8_t ip4_cidr[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}; + + auto col_res = ToColumn::create(); + auto & vec_res = col_res->getData(); + vec_res.resize(column_size); + const auto & vec_src = column_ipv6->getData(); + + for (size_t i = 0; i < vec_res.size(); ++i) + { + const uint8_t * src = reinterpret_cast(&vec_src[i]); + uint8_t * dst = reinterpret_cast(&vec_res[i]); + + if (null_map && (*null_map)[i]) + { + std::memset(dst, '\0', IPV4_BINARY_LENGTH); + if constexpr (exception_mode == IPStringToNumExceptionMode::Null) + (*vec_null_map_to)[i] = true; + continue; + } + + if (!matchIPv6Subnet(src, ip4_cidr, 96)) + { + if constexpr (exception_mode == IPStringToNumExceptionMode::Throw) + { + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "IPv6 in column {} is not in IPv4 mapping block", column->getName()); + } + else if constexpr (exception_mode == IPStringToNumExceptionMode::Default) + { + std::memset(dst, '\0', IPV4_BINARY_LENGTH); + } + else if constexpr (exception_mode == IPStringToNumExceptionMode::Null) + { + (*vec_null_map_to)[i] = true; + std::memset(dst, '\0', IPV4_BINARY_LENGTH); + } + continue; + } + + if constexpr (std::endian::native == std::endian::little) + { + dst[0] = src[15]; + dst[1] = src[14]; + dst[2] = src[13]; + dst[3] = src[12]; + } + else + { + dst[0] = src[12]; + dst[1] = src[13]; + dst[2] = src[14]; + dst[3] = src[15]; + } + } + + if constexpr (exception_mode == IPStringToNumExceptionMode::Null) + return ColumnNullable::create(std::move(col_res), std::move(col_null_map_to)); + + return col_res; +} + } diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 5bf59f33cb5..4d4efc84df1 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -4035,6 +4035,26 @@ private: return true; } } + else if constexpr (WhichDataType(FromDataType::type_id).isIPv6() && WhichDataType(ToDataType::type_id).isIPv4()) + { + ret = [cast_ipv4_ipv6_default_on_conversion_error_value, requested_result_is_nullable]( + ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t) + -> ColumnPtr + { + if (!WhichDataType(result_type).isIPv4()) + throw Exception( + ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv4", result_type->getName()); + + const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; + if (cast_ipv4_ipv6_default_on_conversion_error_value || requested_result_is_nullable) + return convertIPv6ToIPv4(arguments[0].column, null_map); + else + return convertIPv6ToIPv4(arguments[0].column, null_map); + }; + + return true; + } + if constexpr (WhichDataType(ToDataType::type_id).isStringOrFixedString()) { if (from_type->getCustomSerialization()) From fc857aa2dbf297d2681af16ddfbafb47739db854 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 15 May 2023 03:06:03 +0000 Subject: [PATCH 0149/1997] tests added --- .../queries/0_stateless/02234_cast_to_ip_address.reference | 4 ++++ tests/queries/0_stateless/02234_cast_to_ip_address.sql | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.reference b/tests/queries/0_stateless/02234_cast_to_ip_address.reference index 9023b36a9bf..fa9c6bd0f94 100644 --- a/tests/queries/0_stateless/02234_cast_to_ip_address.reference +++ b/tests/queries/0_stateless/02234_cast_to_ip_address.reference @@ -11,6 +11,10 @@ IPv4 functions 127.0.0.1 127.0.0.1 -- +1.2.3.4 +1.2.3.4 +0.0.0.0 +-- 127.0.0.1 -- 0 diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.sql b/tests/queries/0_stateless/02234_cast_to_ip_address.sql index 6c65fe86cc9..28f1afff57f 100644 --- a/tests/queries/0_stateless/02234_cast_to_ip_address.sql +++ b/tests/queries/0_stateless/02234_cast_to_ip_address.sql @@ -20,6 +20,13 @@ SELECT toIPv4OrNull('127.0.0.1'); SELECT '--'; +SELECT toIPv4(toIPv6('::ffff:1.2.3.4')); +SELECT toIPv4(toIPv6('::afff:1.2.3.4')); --{serverError CANNOT_CONVERT_TYPE} +SELECT toIPv4OrDefault(toIPv6('::ffff:1.2.3.4')); +SELECT toIPv4OrDefault(toIPv6('::afff:1.2.3.4')); + +SELECT '--'; + SELECT cast('test' , 'IPv4'); --{serverError CANNOT_PARSE_IPV4} SELECT cast('127.0.0.1' , 'IPv4'); From e2112576f04d3aeda1bc5384b5c49da4dc2a8e0c Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 16 May 2023 12:21:32 +0200 Subject: [PATCH 0150/1997] Fix black check --- .../test_postgresql_replica_database_engine_2/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 2b17024f417..acec01e732b 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -545,7 +545,9 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster): clickhouse_postgres_db = f"clickhouse_postgres_db{i}" create_postgres_schema(cursor, schema_name) pg_manager.create_clickhouse_postgres_db( - database_name=clickhouse_postgres_db, schema_name=schema_name, postgres_database="postgres_database", + database_name=clickhouse_postgres_db, + schema_name=schema_name, + postgres_database="postgres_database", ) for ti in range(NUM_TABLES): table_name = f"postgresql_replica_{ti}" From 1eb939766bc78a59dd11b3534f4fd7b693d75e21 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 16 May 2023 17:48:49 +0000 Subject: [PATCH 0151/1997] add test --- src/Interpreters/AsynchronousInsertQueue.cpp | 4 + src/Interpreters/AsynchronousInsertQueue.h | 1 + .../02726_async_insert_flush_stress.reference | 1 + .../02726_async_insert_flush_stress.sh | 86 +++++++++++++++++++ 4 files changed, 92 insertions(+) create mode 100644 tests/queries/0_stateless/02726_async_insert_flush_stress.reference create mode 100755 tests/queries/0_stateless/02726_async_insert_flush_stress.sh diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 4592e92151e..e176c7afd76 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -318,6 +318,7 @@ void AsynchronousInsertQueue::flushAll() LOG_DEBUG(log, "Requested to flush asynchronous insert queue"); + /// Disable background flushes to avoid adding new elements to the queue. flush_stopped = true; std::vector queues_to_flush(pool_size); @@ -343,10 +344,13 @@ void AsynchronousInsertQueue::flushAll() } } + /// Note that jobs scheduled before the call of 'flushAll' are not counted here. LOG_DEBUG(log, "Will wait for finishing of {} flushing jobs (about {} inserts, {} bytes, {} distinct queries)", pool.active(), total_entries, total_bytes, total_queries); + /// Wait until all jobs are finished. That includes also jobs + /// that were scheduled before the call of 'flushAll'. pool.wait(); LOG_DEBUG(log, "Finished flushing of asynchronous insert queue"); diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index 455e486c798..b22b0c73907 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -38,6 +38,7 @@ public: std::unique_ptr insert_data_buffer; }; + /// Force flush the whole queue. void flushAll(); PushResult push(ASTPtr query, ContextPtr query_context); size_t getPoolSize() const { return pool_size; } diff --git a/tests/queries/0_stateless/02726_async_insert_flush_stress.reference b/tests/queries/0_stateless/02726_async_insert_flush_stress.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02726_async_insert_flush_stress.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02726_async_insert_flush_stress.sh b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh new file mode 100755 index 00000000000..4685e49b96d --- /dev/null +++ b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash +# Tags: long + +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +function insert1() +{ + url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" + while true; do + ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT CSV +1,"a" +2,"b" +' + done +} + +function insert2() +{ + url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" + while true; do + ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT JSONEachRow {"id": 5, "s": "e"} {"id": 6, "s": "f"}' + done +} + +function insert3() +{ + url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" + while true; do + ${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO FUNCTION remote('127.0.0.1', $CLICKHOUSE_DATABASE, async_inserts) VALUES (7, 'g') (8, 'h')" + done +} + +function select1() +{ + while true; do + ${CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts FORMAT Null" + done +} + +function select2() +{ + while true; do + ${CLICKHOUSE_CLIENT} -q "SELECT * FROM system.asynchronous_inserts FORMAT Null" + done +} + +function flush1() +{ + while true; do + sleep 0.2 + ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH ASYNC INSERT QUEUE" + done +} + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id" + +TIMEOUT=10 + +export -f insert1 +export -f insert2 +export -f insert3 +export -f select1 +export -f select2 +export -f flush1 + +for _ in {1..5}; do + timeout $TIMEOUT bash -c insert1 & + timeout $TIMEOUT bash -c insert2 & + timeout $TIMEOUT bash -c insert3 & +done + +timeout $TIMEOUT bash -c select1 & +timeout $TIMEOUT bash -c select2 & +timeout $TIMEOUT bash -c flush1 & + +wait + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH ASYNC INSERT QUEUE" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.asynchronous_inserts" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts"; From 66cb7f818b1b68f10baf7d27f68c143c9131fac4 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Thu, 11 May 2023 02:26:31 +0000 Subject: [PATCH 0152/1997] Fix flaky tests caused by OPTIMIZE FINAL failing memory budget check --- src/Storages/StorageMergeTree.cpp | 74 +++++++++++++------ ..._between_multiple_mutations_tasks_long.sql | 4 +- ...e_big_sets_between_mutation_tasks_long.sql | 4 +- 3 files changed, 56 insertions(+), 26 deletions(-) diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index b4dc2830bd6..0932253e991 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -925,44 +925,70 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( SelectPartsDecision select_decision = SelectPartsDecision::CANNOT_SELECT; - if (!canEnqueueBackgroundTask()) + auto is_background_memory_usage_ok = [](String * disable_reason) -> bool { - if (out_disable_reason) - *out_disable_reason = fmt::format("Current background tasks memory usage ({}) is more than the limit ({})", + if (canEnqueueBackgroundTask()) + return true; + if (disable_reason) + *disable_reason = fmt::format("Current background tasks memory usage ({}) is more than the limit ({})", formatReadableSizeWithBinarySuffix(background_memory_tracker.get()), formatReadableSizeWithBinarySuffix(background_memory_tracker.getSoftLimit())); - } - else if (partition_id.empty()) - { - UInt64 max_source_parts_size = merger_mutator.getMaxSourcePartsSizeForMerge(); - bool merge_with_ttl_allowed = getTotalMergesWithTTLInMergeList() < data_settings->max_number_of_merges_with_ttl_in_pool; + return false; + }; - /// TTL requirements is much more strict than for regular merge, so - /// if regular not possible, than merge with ttl is not also not - /// possible. - if (max_source_parts_size > 0) + if (partition_id.empty()) + { + if (is_background_memory_usage_ok(out_disable_reason)) { - select_decision = merger_mutator.selectPartsToMerge( - future_part, - aggressive, - max_source_parts_size, - can_merge, - merge_with_ttl_allowed, - txn, - out_disable_reason); + UInt64 max_source_parts_size = merger_mutator.getMaxSourcePartsSizeForMerge(); + bool merge_with_ttl_allowed = getTotalMergesWithTTLInMergeList() < data_settings->max_number_of_merges_with_ttl_in_pool; + + /// TTL requirements is much more strict than for regular merge, so + /// if regular not possible, than merge with ttl is not also not + /// possible. + if (max_source_parts_size > 0) + { + select_decision = merger_mutator.selectPartsToMerge( + future_part, + aggressive, + max_source_parts_size, + can_merge, + merge_with_ttl_allowed, + txn, + out_disable_reason); + } + else if (out_disable_reason) + *out_disable_reason = "Current value of max_source_parts_size is zero"; } - else if (out_disable_reason) - *out_disable_reason = "Current value of max_source_parts_size is zero"; } else { while (true) { - select_decision = merger_mutator.selectAllPartsToMergeWithinPartition( - future_part, can_merge, partition_id, final, metadata_snapshot, txn, out_disable_reason, optimize_skip_merged_partitions); auto timeout_ms = getSettings()->lock_acquire_timeout_for_background_operations.totalMilliseconds(); auto timeout = std::chrono::milliseconds(timeout_ms); + if (!is_background_memory_usage_ok(out_disable_reason)) + { + constexpr auto poll_interval = std::chrono::seconds(1); + Int64 attempts = timeout / poll_interval; + bool ok = false; + for (Int64 i = 0; i < attempts; ++i) + { + std::this_thread::sleep_for(poll_interval); + if (is_background_memory_usage_ok(out_disable_reason)) + { + ok = true; + break; + } + } + if (!ok) + break; + } + + select_decision = merger_mutator.selectAllPartsToMergeWithinPartition( + future_part, can_merge, partition_id, final, metadata_snapshot, txn, out_disable_reason, optimize_skip_merged_partitions); + /// If final - we will wait for currently processing merges to finish and continue. if (final && select_decision != SelectPartsDecision::SELECTED diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql b/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql index 92e372d0cdb..ff8b9c71e92 100644 --- a/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql +++ b/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql @@ -1,4 +1,6 @@ --- Tags: long, no-debug, no-tsan, no-asan, no-ubsan, no-msan +-- Tags: long, no-debug, no-tsan, no-asan, no-ubsan, no-msan, no-parallel + +-- no-parallel because the sets use a lot of memory, which may interfere with other tests DROP TABLE IF EXISTS 02581_trips; diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql index 21ff453cd8e..b7314c8fa47 100644 --- a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql +++ b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql @@ -1,4 +1,6 @@ --- Tags: long, no-debug, no-tsan, no-asan, no-ubsan, no-msan +-- Tags: long, no-debug, no-tsan, no-asan, no-ubsan, no-msan, no-parallel + +-- no-parallel because the sets use a lot of memory, which may interfere with other tests DROP TABLE IF EXISTS 02581_trips; From 849cddd8b281be1ecebf104f5b8f6670c1c6e916 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 17 May 2023 12:33:26 +0200 Subject: [PATCH 0153/1997] Add forgotten file --- .../configs/merge_tree_too_many_parts.xml | 5 +++++ .../test_postgresql_replica_database_engine_2/test.py | 7 ++----- 2 files changed, 7 insertions(+), 5 deletions(-) create mode 100644 tests/integration/test_postgresql_replica_database_engine_2/configs/merge_tree_too_many_parts.xml diff --git a/tests/integration/test_postgresql_replica_database_engine_2/configs/merge_tree_too_many_parts.xml b/tests/integration/test_postgresql_replica_database_engine_2/configs/merge_tree_too_many_parts.xml new file mode 100644 index 00000000000..4bc63453f55 --- /dev/null +++ b/tests/integration/test_postgresql_replica_database_engine_2/configs/merge_tree_too_many_parts.xml @@ -0,0 +1,5 @@ + + + 5 + + diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 3b5194e8806..07f356250ea 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -88,11 +88,8 @@ def setup_teardown(): def test_add_new_table_to_replication(started_cluster): - cursor = pg_manager.get_db_cursor() - cursor.execute("DROP TABLE IF EXISTS test_table") NUM_TABLES = 5 - - pg_manager.create_and_fill_postgres_tables_from_cursor(cursor, NUM_TABLES, 10000) + pg_manager.create_and_fill_postgres_tables(NUM_TABLES, 10000) pg_manager.create_materialized_db( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port ) @@ -105,7 +102,7 @@ def test_add_new_table_to_replication(started_cluster): ) table_name = "postgresql_replica_5" - pg_manager.create_and_fill_postgres_table_from_cursor(cursor, table_name) + pg_manager.create_and_fill_postgres_table(table_name) result = instance.query("SHOW CREATE DATABASE test_database") assert ( From 05a90a2e971ae7538ed72e1a3db02523c91b67d8 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 17 May 2023 12:19:00 +0000 Subject: [PATCH 0154/1997] fix tests --- tests/queries/0_stateless/01271_show_privileges.reference | 1 + .../0_stateless/02117_show_create_table_system.reference | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index ec245d8b9e0..eb8b912f03b 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -133,6 +133,7 @@ SYSTEM SYNC TRANSACTION LOG ['SYNC TRANSACTION LOG'] GLOBAL SYSTEM SYSTEM SYNC FILE CACHE ['SYNC FILE CACHE'] GLOBAL SYSTEM SYSTEM FLUSH DISTRIBUTED ['FLUSH DISTRIBUTED'] TABLE SYSTEM FLUSH SYSTEM FLUSH LOGS ['FLUSH LOGS'] GLOBAL SYSTEM FLUSH +SYSTEM FLUSH ASYNC INSERT QUEUE ['FLUSH ASYNC INSERT QUEUE'] GLOBAL SYSTEM FLUSH SYSTEM FLUSH [] \N SYSTEM SYSTEM THREAD FUZZER ['SYSTEM START THREAD FUZZER','SYSTEM STOP THREAD FUZZER','START THREAD FUZZER','STOP THREAD FUZZER'] GLOBAL SYSTEM SYSTEM UNFREEZE ['SYSTEM UNFREEZE'] GLOBAL SYSTEM diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 09cc62dac00..85cdc278892 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -297,7 +297,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'SOURCES' = 160, 'CLUSTER' = 161, 'ALL' = 162, 'NONE' = 163), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -581,10 +581,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'SOURCES' = 160, 'CLUSTER' = 161, 'ALL' = 162, 'NONE' = 163), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'SOURCES' = 160, 'CLUSTER' = 161, 'ALL' = 162, 'NONE' = 163)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' From bf6afd27a655bb159583af2020113e101ca82e00 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 17 May 2023 13:32:51 +0000 Subject: [PATCH 0155/1997] build with disks --- programs/keeper-converter/KeeperConverter.cpp | 4 +- programs/keeper/CMakeLists.txt | 58 +++- programs/keeper/Keeper.cpp | 13 + programs/server/config.d/users.xml | 26 ++ src/Coordination/Context/Context.cpp | 259 ++++++++++++++++++ src/Coordination/Context/Context.h | 111 ++++++++ src/Coordination/Context/Settings.cpp | 24 ++ src/Coordination/Context/ThreadStatusExt.cpp | 14 + src/Coordination/CoordinationSettings.cpp | 60 ---- src/Coordination/CoordinationSettings.h | 9 - src/Coordination/KeeperContext.cpp | 104 +++++++ src/Coordination/KeeperContext.h | 37 ++- src/Coordination/KeeperDispatcher.cpp | 6 +- src/Coordination/KeeperServer.cpp | 33 ++- src/Coordination/KeeperSnapshotManager.cpp | 14 +- src/Coordination/KeeperStateMachine.cpp | 6 +- src/Coordination/KeeperStorage.cpp | 46 ++-- src/Coordination/ZooKeeperDataReader.cpp | 4 +- src/Coordination/pathUtils.cpp | 4 +- src/Coordination/pathUtils.h | 4 +- src/Coordination/tests/gtest_coordination.cpp | 2 +- src/Core/SettingsFields.cpp | 29 ++ src/Core/SettingsFields.h | 16 +- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 2 + src/Disks/IO/getThreadPoolReader.cpp | 24 -- .../ObjectStorages/DiskObjectStorage.cpp | 19 -- src/Disks/ObjectStorages/DiskObjectStorage.h | 14 +- .../ObjectStorages/DiskObjectStorageCache.cpp | 28 ++ src/Disks/registerDisks.cpp | 17 ++ src/Interpreters/Context.h | 8 + 30 files changed, 807 insertions(+), 188 deletions(-) create mode 100644 programs/server/config.d/users.xml create mode 100644 src/Coordination/Context/Context.cpp create mode 100644 src/Coordination/Context/Context.h create mode 100644 src/Coordination/Context/Settings.cpp create mode 100644 src/Coordination/Context/ThreadStatusExt.cpp create mode 100644 src/Coordination/KeeperContext.cpp create mode 100644 src/Disks/ObjectStorages/DiskObjectStorageCache.cpp diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp index c81e61685fd..58d090ca8b9 100644 --- a/programs/keeper-converter/KeeperConverter.cpp +++ b/programs/keeper-converter/KeeperConverter.cpp @@ -40,8 +40,8 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv) try { - auto keeper_context = std::make_shared(); - keeper_context->digest_enabled = true; + auto keeper_context = std::make_shared(true); + keeper_context->setDigestEnabled(true); DB::KeeperStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false); diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index e5d56023f7b..c0c0a6dd1b0 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -46,6 +46,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManagerS3.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateMachine.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperContext.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateManager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStorage.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperAsynchronousMetrics.cpp @@ -58,10 +59,14 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsFields.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/BaseSettings.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/ServerSettings.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/Field.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsEnums.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/ServerUUID.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/UUID.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/BackgroundSchedulePool.cpp + + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/IO/ReadBuffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperTCPHandler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/TCPServer.cpp @@ -92,6 +97,10 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/ICompressionCodec.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/LZ4_decompress_faster.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/CurrentThread.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollections.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollectionConfiguration.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/IKeeper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/TestKeeper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -102,11 +111,56 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperLock.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperNodeCache.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/registerDisks.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IDisk.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskFactory.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskSelector.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskLocal.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskLocalCheckThread.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/LocalDirectorySyncGuard.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/TemporaryFileOnDisk.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/loadLocalDiskConfig.cpp + + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/IObjectStorage.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataFromDiskTransactionState.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorage.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp + + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/registerDiskS3.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3Capabilities.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/diskSettings.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/ProxyListConfiguration.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.cpp + + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/IOUringReader.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferFromTemporaryFile.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getThreadPoolReader.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolRemoteFSReader.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolReader.cpp + + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Storages/StorageS3Settings.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/BaseDaemon.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/SentryWriter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/GraphiteWriter.cpp ${CMAKE_CURRENT_BINARY_DIR}/../../src/Daemon/GitHash.generated.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Context/Context.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Context/Settings.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Context/ThreadStatusExt.cpp + Keeper.cpp clickhouse-keeper.cpp ) @@ -117,10 +171,6 @@ if (BUILD_STANDALONE_KEEPER) target_compile_definitions (clickhouse-keeper PRIVATE -DCLICKHOUSE_PROGRAM_STANDALONE_BUILD) target_compile_definitions (clickhouse-keeper PUBLIC -DWITHOUT_TEXT_LOG) - target_include_directories(clickhouse-keeper PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../src") # uses includes from src directory - target_include_directories(clickhouse-keeper PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/../../src/Core/include") # uses some includes from core - target_include_directories(clickhouse-keeper PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/../../src") # uses some includes from common - target_link_libraries(clickhouse-keeper PRIVATE ch_contrib::abseil_swiss_tables diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 3d1773260f5..996c4678450 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -21,6 +21,8 @@ #include #include +#include + #include #include @@ -40,6 +42,8 @@ #include #include +#include + int mainEntryClickHouseKeeper(int argc, char ** argv) { @@ -408,6 +412,15 @@ try std::mutex servers_lock; auto servers = std::make_shared>(); + auto shared_context = Context::createShared(); + auto global_context = Context::createGlobal(shared_context.get()); + + global_context->makeGlobalContext(); + global_context->setPath(path); + global_context->setRemoteHostFilter(config()); + + registerDisks(/*global_skip_access_check=*/false); + tiny_context = std::make_shared(); /// This object will periodically calculate some metrics. KeeperAsynchronousMetrics async_metrics( diff --git a/programs/server/config.d/users.xml b/programs/server/config.d/users.xml new file mode 100644 index 00000000000..d8a62b45baa --- /dev/null +++ b/programs/server/config.d/users.xml @@ -0,0 +1,26 @@ + + + + 10000000000 + 0 + 2 + + + 5000000000 + 20000000000 + + + + + + + + + + + + + default + + + diff --git a/src/Coordination/Context/Context.cpp b/src/Coordination/Context/Context.cpp new file mode 100644 index 00000000000..aeb4e405938 --- /dev/null +++ b/src/Coordination/Context/Context.cpp @@ -0,0 +1,259 @@ +#include + +#include +#include +#include + +#include + +#include + +#include +#include + +namespace ProfileEvents +{ + extern const Event ContextLock; +} + +namespace CurrentMetrics +{ + extern const Metric ContextLockWait; + extern const Metric BackgroundSchedulePoolTask; + extern const Metric BackgroundSchedulePoolSize; + extern const Metric IOWriterThreads; + extern const Metric IOWriterThreadsActive; +} + +namespace DB +{ + + +struct ContextSharedPart : boost::noncopyable +{ + ContextSharedPart() + : macros(std::make_unique()) + {} + + /// For access of most of shared objects. Recursive mutex. + mutable std::recursive_mutex mutex; + + ServerSettings server_settings; + + String path; /// Path to the data directory, with a slash at the end. + ConfigurationPtr config; /// Global configuration settings. + MultiVersion macros; /// Substitutions extracted from config. + mutable std::unique_ptr schedule_pool; /// A thread pool that can run different jobs in background + RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml + /// + mutable std::unique_ptr asynchronous_remote_fs_reader; + mutable std::unique_ptr asynchronous_local_fs_reader; + mutable std::unique_ptr synchronous_local_fs_reader; + + mutable std::unique_ptr threadpool_writer; + + mutable ThrottlerPtr remote_read_throttler; /// A server-wide throttler for remote IO reads + mutable ThrottlerPtr remote_write_throttler; /// A server-wide throttler for remote IO writes + + mutable ThrottlerPtr local_read_throttler; /// A server-wide throttler for local IO reads + mutable ThrottlerPtr local_write_throttler; /// A server-wide throttler for local IO writes + +}; + +Context::Context() = default; +Context::~Context() = default; +Context::Context(const Context &) = default; +Context & Context::operator=(const Context &) = default; + +SharedContextHolder::SharedContextHolder(SharedContextHolder &&) noexcept = default; +SharedContextHolder & SharedContextHolder::operator=(SharedContextHolder &&) noexcept = default; +SharedContextHolder::SharedContextHolder() = default; +SharedContextHolder::~SharedContextHolder() = default; +SharedContextHolder::SharedContextHolder(std::unique_ptr shared_context) + : shared(std::move(shared_context)) {} + +void SharedContextHolder::reset() { shared.reset(); } + +void Context::makeGlobalContext() +{ + initGlobal(); + global_context = shared_from_this(); +} + +ContextMutablePtr Context::createGlobal(ContextSharedPart * shared) +{ + auto res = std::shared_ptr(new Context); + res->shared = shared; + return res; +} + +void Context::initGlobal() +{ + assert(!global_context_instance); + global_context_instance = shared_from_this(); +} + +SharedContextHolder Context::createShared() +{ + return SharedContextHolder(std::make_unique()); +} + +ContextMutablePtr Context::getGlobalContext() const +{ + auto ptr = global_context.lock(); + if (!ptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no global context or global context has expired"); + return ptr; +} + +std::unique_lock Context::getLock() const +{ + ProfileEvents::increment(ProfileEvents::ContextLock); + CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; + return std::unique_lock(shared->mutex); +} + +String Context::getPath() const +{ + auto lock = getLock(); + return shared->path; +} + +void Context::setPath(const String & path) +{ + auto lock = getLock(); + shared->path = path; +} + +MultiVersion::Version Context::getMacros() const +{ + return shared->macros.get(); +} + +BackgroundSchedulePool & Context::getSchedulePool() const +{ + auto lock = getLock(); + if (!shared->schedule_pool) + { + shared->schedule_pool = std::make_unique( + shared->server_settings.background_schedule_pool_size, + CurrentMetrics::BackgroundSchedulePoolTask, + CurrentMetrics::BackgroundSchedulePoolSize, + "BgSchPool"); + } + + return *shared->schedule_pool; +} + +void Context::setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config) +{ + shared->remote_host_filter.setValuesFromConfig(config); +} + +const RemoteHostFilter & Context::getRemoteHostFilter() const +{ + return shared->remote_host_filter; +} + +IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) const +{ + auto lock = getLock(); + + switch (type) + { + case FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER: + { + if (!shared->asynchronous_remote_fs_reader) + shared->asynchronous_remote_fs_reader = createThreadPoolReader(type, getConfigRef()); + return *shared->asynchronous_remote_fs_reader; + } + case FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER: + { + if (!shared->asynchronous_local_fs_reader) + shared->asynchronous_local_fs_reader = createThreadPoolReader(type, getConfigRef()); + + return *shared->asynchronous_local_fs_reader; + } + case FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER: + { + if (!shared->synchronous_local_fs_reader) + shared->synchronous_local_fs_reader = createThreadPoolReader(type, getConfigRef()); + + return *shared->synchronous_local_fs_reader; + } + } +} + +std::shared_ptr Context::getFilesystemCacheLog() const +{ + return nullptr; +} + +std::shared_ptr Context::getFilesystemReadPrefetchesLog() const +{ + return nullptr; +} + +void Context::setConfig(const ConfigurationPtr & config) +{ + auto lock = getLock(); + shared->config = config; +} + +const Poco::Util::AbstractConfiguration & Context::getConfigRef() const +{ + auto lock = getLock(); + return shared->config ? *shared->config : Poco::Util::Application::instance().config(); +} + +std::shared_ptr Context::getAsyncReadCounters() const +{ + auto lock = getLock(); + if (!async_read_counters) + async_read_counters = std::make_shared(); + return async_read_counters; +} + +ThreadPool & Context::getThreadPoolWriter() const +{ + const auto & config = getConfigRef(); + + auto lock = getLock(); + + if (!shared->threadpool_writer) + { + auto pool_size = config.getUInt(".threadpool_writer_pool_size", 100); + auto queue_size = config.getUInt(".threadpool_writer_queue_size", 1000000); + + shared->threadpool_writer = std::make_unique( + CurrentMetrics::IOWriterThreads, CurrentMetrics::IOWriterThreadsActive, pool_size, pool_size, queue_size); + } + + return *shared->threadpool_writer; +} + +ThrottlerPtr Context::getRemoteReadThrottler() const +{ + return nullptr; +} + +ThrottlerPtr Context::getRemoteWriteThrottler() const +{ + return nullptr; +} + +ThrottlerPtr Context::getLocalReadThrottler() const +{ + return nullptr; +} + +ThrottlerPtr Context::getLocalWriteThrottler() const +{ + return nullptr; +} + +ReadSettings Context::getReadSettings() const +{ + return ReadSettings{}; +} + +} diff --git a/src/Coordination/Context/Context.h b/src/Coordination/Context/Context.h new file mode 100644 index 00000000000..683209d942e --- /dev/null +++ b/src/Coordination/Context/Context.h @@ -0,0 +1,111 @@ +#pragma once + +#include + +#include +#include + +#include + +#include +#include + +#include + +#include + +#include + +namespace DB +{ + +struct ContextSharedPart; +class Macros; +class FilesystemCacheLog; +class FilesystemReadPrefetchesLog; + +/// A small class which owns ContextShared. +/// We don't use something like unique_ptr directly to allow ContextShared type to be incomplete. +struct SharedContextHolder +{ + ~SharedContextHolder(); + SharedContextHolder(); + explicit SharedContextHolder(std::unique_ptr shared_context); + SharedContextHolder(SharedContextHolder &&) noexcept; + + SharedContextHolder & operator=(SharedContextHolder &&) noexcept; + + ContextSharedPart * get() const { return shared.get(); } + void reset(); +private: + std::unique_ptr shared; +}; + + +class Context : public std::enable_shared_from_this +{ +private: + /// Use copy constructor or createGlobal() instead + Context(); + Context(const Context &); + Context & operator=(const Context &); + + std::unique_lock getLock() const; + + ContextWeakMutablePtr global_context; + inline static ContextPtr global_context_instance; + ContextSharedPart * shared; + + /// Query metrics for reading data asynchronously with IAsynchronousReader. + mutable std::shared_ptr async_read_counters; + + Settings settings; /// Setting for query execution. +public: + /// Create initial Context with ContextShared and etc. + static ContextMutablePtr createGlobal(ContextSharedPart * shared); + static SharedContextHolder createShared(); + + ContextMutablePtr getGlobalContext() const; + static ContextPtr getGlobalContextInstance() { return global_context_instance; } + + void makeGlobalContext(); + void initGlobal(); + + ~Context(); + + using ConfigurationPtr = Poco::AutoPtr; + + /// Global application configuration settings. + void setConfig(const ConfigurationPtr & config); + const Poco::Util::AbstractConfiguration & getConfigRef() const; + + const Settings & getSettingsRef() const { return settings; } + + String getPath() const; + void setPath(const String & path); + + MultiVersion::Version getMacros() const; + + BackgroundSchedulePool & getSchedulePool() const; + + /// Storage of allowed hosts from config.xml + void setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config); + const RemoteHostFilter & getRemoteHostFilter() const; + + std::shared_ptr getFilesystemCacheLog() const; + std::shared_ptr getFilesystemReadPrefetchesLog() const; + + IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const; + std::shared_ptr getAsyncReadCounters() const; + ThreadPool & getThreadPoolWriter() const; + + ThrottlerPtr getRemoteReadThrottler() const; + ThrottlerPtr getRemoteWriteThrottler() const; + + ThrottlerPtr getLocalReadThrottler() const; + ThrottlerPtr getLocalWriteThrottler() const; + + ReadSettings getReadSettings() const; +}; + +} diff --git a/src/Coordination/Context/Settings.cpp b/src/Coordination/Context/Settings.cpp new file mode 100644 index 00000000000..12a7a42ffac --- /dev/null +++ b/src/Coordination/Context/Settings.cpp @@ -0,0 +1,24 @@ +#include + +namespace DB +{ + +IMPLEMENT_SETTINGS_TRAITS(SettingsTraits, LIST_OF_SETTINGS) + +std::vector Settings::getAllRegisteredNames() const +{ + std::vector all_settings; + for (const auto & setting_field : all()) + { + all_settings.push_back(setting_field.getName()); + } + return all_settings; +} + +void Settings::set(std::string_view name, const Field & value) +{ + BaseSettings::set(name, value); +} + + +} diff --git a/src/Coordination/Context/ThreadStatusExt.cpp b/src/Coordination/Context/ThreadStatusExt.cpp new file mode 100644 index 00000000000..97f7287be8c --- /dev/null +++ b/src/Coordination/Context/ThreadStatusExt.cpp @@ -0,0 +1,14 @@ +#include + +namespace DB +{ + +void CurrentThread::detachFromGroupIfNotDetached() +{ +} + +void CurrentThread::attachToGroup(const ThreadGroupPtr &) +{ +} + +} diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp index 7a66134f43f..833ff7be7ae 100644 --- a/src/Coordination/CoordinationSettings.cpp +++ b/src/Coordination/CoordinationSettings.cpp @@ -85,14 +85,6 @@ void KeeperConfigurationAndSettings::dump(WriteBufferFromOwnString & buf) const writeText(four_letter_word_allow_list, buf); buf.write('\n'); - writeText("log_storage_path=", buf); - writeText(log_storage_path, buf); - buf.write('\n'); - - writeText("snapshot_storage_path=", buf); - writeText(snapshot_storage_path, buf); - buf.write('\n'); - /// coordination_settings writeText("max_requests_batch_size=", buf); @@ -188,61 +180,9 @@ KeeperConfigurationAndSettings::loadFromConfig(const Poco::Util::AbstractConfigu DEFAULT_FOUR_LETTER_WORD_CMD)); - ret->log_storage_path = getLogsPathFromConfig(config, standalone_keeper_); - ret->snapshot_storage_path = getSnapshotsPathFromConfig(config, standalone_keeper_); - - ret->state_file_path = getStateFilePathFromConfig(config, standalone_keeper_); - ret->coordination_settings->loadFromConfig("keeper_server.coordination_settings", config); return ret; } -String KeeperConfigurationAndSettings::getLogsPathFromConfig(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper_) -{ - /// the most specialized path - if (config.has("keeper_server.log_storage_path")) - return config.getString("keeper_server.log_storage_path"); - - if (config.has("keeper_server.storage_path")) - return std::filesystem::path{config.getString("keeper_server.storage_path")} / "logs"; - - if (standalone_keeper_) - return std::filesystem::path{config.getString("path", KEEPER_DEFAULT_PATH)} / "logs"; - else - return std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination/logs"; -} - -String KeeperConfigurationAndSettings::getSnapshotsPathFromConfig(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper_) -{ - /// the most specialized path - if (config.has("keeper_server.snapshot_storage_path")) - return config.getString("keeper_server.snapshot_storage_path"); - - if (config.has("keeper_server.storage_path")) - return std::filesystem::path{config.getString("keeper_server.storage_path")} / "snapshots"; - - if (standalone_keeper_) - return std::filesystem::path{config.getString("path", KEEPER_DEFAULT_PATH)} / "snapshots"; - else - return std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination/snapshots"; -} - -String KeeperConfigurationAndSettings::getStateFilePathFromConfig(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper_) -{ - if (config.has("keeper_server.storage_path")) - return std::filesystem::path{config.getString("keeper_server.storage_path")} / "state"; - - if (config.has("keeper_server.snapshot_storage_path")) - return std::filesystem::path(config.getString("keeper_server.snapshot_storage_path")).parent_path() / "state"; - - if (config.has("keeper_server.log_storage_path")) - return std::filesystem::path(config.getString("keeper_server.log_storage_path")).parent_path() / "state"; - - if (standalone_keeper_) - return std::filesystem::path{config.getString("path", KEEPER_DEFAULT_PATH)} / "state"; - else - return std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination/state"; -} - } diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h index 90f66ccc09b..16da8404b44 100644 --- a/src/Coordination/CoordinationSettings.h +++ b/src/Coordination/CoordinationSettings.h @@ -81,17 +81,8 @@ struct KeeperConfigurationAndSettings bool standalone_keeper; CoordinationSettingsPtr coordination_settings; - String log_storage_path; - String snapshot_storage_path; - String state_file_path; - void dump(WriteBufferFromOwnString & buf) const; static std::shared_ptr loadFromConfig(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper_); - -private: - static String getLogsPathFromConfig(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper_); - static String getSnapshotsPathFromConfig(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper_); - static String getStateFilePathFromConfig(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper_); }; using KeeperConfigurationAndSettingsPtr = std::shared_ptr; diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp new file mode 100644 index 00000000000..9e504f5aa07 --- /dev/null +++ b/src/Coordination/KeeperContext.cpp @@ -0,0 +1,104 @@ +#include + +#include +#include +#include + +namespace DB +{ + + +KeeperContext::KeeperContext(bool standalone_keeper_) + : disk_selector(std::make_shared()) + , standalone_keeper(standalone_keeper_) +{} + +void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config) +{ + digest_enabled = config.getBool("keeper_server.digest_enabled", false); + ignore_system_path_on_startup = config.getBool("keeper_server.ignore_system_path_on_startup", false); + + disk_selector->initialize(config, "storage_configuration.disks", Context::getGlobalContextInstance()); + + log_storage_path = getLogsPathFromConfig(config); + snapshot_storage_path = getSnapshotsPathFromConfig(config); + + state_file_path = getStateFilePathFromConfig(config); +} + +KeeperContext::Phase KeeperContext::getServerState() const +{ + return server_state; +} + +void KeeperContext::setServerState(KeeperContext::Phase server_state_) +{ + server_state = server_state_; +} + +bool KeeperContext::ignoreSystemPathOnStartup() const +{ + return ignore_system_path_on_startup; +} + +bool KeeperContext::digestEnabled() const +{ + return digest_enabled; +} + +void KeeperContext::setDigestEnabled(bool digest_enabled_) +{ + digest_enabled = digest_enabled_; +} + +KeeperContext::Storage KeeperContext::getLogsPathFromConfig(const Poco::Util::AbstractConfiguration & config) const +{ + /// the most specialized path + if (config.has("keeper_server.log_storage_path")) + return std::make_shared("LogDisk", config.getString("keeper_server.log_storage_path"), 0); + + if (config.has("keeper_server.log_storage_disk")) + return config.getString("keeper_server.log_storage_disk"); + + if (config.has("keeper_server.storage_path")) + return std::make_shared("LogDisk", std::filesystem::path{config.getString("keeper_server.storage_path")} / "logs", 0); + + if (standalone_keeper) + return std::make_shared("LogDisk", std::filesystem::path{config.getString("path", KEEPER_DEFAULT_PATH)} / "logs", 0); + else + return std::make_shared("LogDisk", std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination/logs", 0); +} + +std::string KeeperContext::getSnapshotsPathFromConfig(const Poco::Util::AbstractConfiguration & config) +{ + /// the most specialized path + if (config.has("keeper_server.snapshot_storage_path")) + return config.getString("keeper_server.snapshot_storage_path"); + + if (config.has("keeper_server.storage_path")) + return std::filesystem::path{config.getString("keeper_server.storage_path")} / "snapshots"; + + if (standalone_keeper) + return std::filesystem::path{config.getString("path", KEEPER_DEFAULT_PATH)} / "snapshots"; + else + return std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination/snapshots"; +} + +std::string KeeperContext::getStateFilePathFromConfig(const Poco::Util::AbstractConfiguration & config) +{ + if (config.has("keeper_server.storage_path")) + return std::filesystem::path{config.getString("keeper_server.storage_path")} / "state"; + + if (config.has("keeper_server.snapshot_storage_path")) + return std::filesystem::path(config.getString("keeper_server.snapshot_storage_path")).parent_path() / "state"; + + if (config.has("keeper_server.log_storage_path")) + return std::filesystem::path(config.getString("keeper_server.log_storage_path")).parent_path() / "state"; + + if (standalone_keeper) + return std::filesystem::path{config.getString("path", KEEPER_DEFAULT_PATH)} / "state"; + else + return std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination/state"; +} + +} diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index 64fa8cea6ec..2a215f9d58f 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -1,10 +1,20 @@ #pragma once +#include + +#include + +#include +#include + namespace DB { -struct KeeperContext +class KeeperContext { +public: + explicit KeeperContext(bool standalone_keeper_); + enum class Phase : uint8_t { INIT, @@ -12,10 +22,35 @@ struct KeeperContext SHUTDOWN }; + void initialize(const Poco::Util::AbstractConfiguration & config); + + Phase getServerState() const; + void setServerState(Phase server_state_); + + bool ignoreSystemPathOnStartup() const; + + bool digestEnabled() const; + void setDigestEnabled(bool digest_enabled_); +private: + /// local disk defined using path or disk name + using Storage = std::variant; + + Storage getLogsPathFromConfig(const Poco::Util::AbstractConfiguration & config) const; + std::string getSnapshotsPathFromConfig(const Poco::Util::AbstractConfiguration & config); + std::string getStateFilePathFromConfig(const Poco::Util::AbstractConfiguration & config); + Phase server_state{Phase::INIT}; bool ignore_system_path_on_startup{false}; bool digest_enabled{true}; + + std::shared_ptr disk_selector; + + Storage log_storage_path; + Storage snapshot_storage_path; + Storage state_file_path; + + bool standalone_keeper; }; using KeeperContextPtr = std::shared_ptr; diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index d64134f3024..6632e58782f 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -789,12 +789,14 @@ static uint64_t getDirSize(const fs::path & dir) uint64_t KeeperDispatcher::getLogDirSize() const { - return getDirSize(configuration_and_settings->log_storage_path); + //return getDirSize(configuration_and_settings->log_storage_path); + return 0; } uint64_t KeeperDispatcher::getSnapDirSize() const { - return getDirSize(configuration_and_settings->snapshot_storage_path); + //return getDirSize(configuration_and_settings->snapshot_storage_path); + return 0; } Keeper4LWInfo KeeperDispatcher::getKeeper4LWInfo() const diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 9ca792eecdd..d63593436f4 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -114,18 +114,17 @@ KeeperServer::KeeperServer( , coordination_settings(configuration_and_settings_->coordination_settings) , log(&Poco::Logger::get("KeeperServer")) , is_recovering(config.getBool("keeper_server.force_recovery", false)) - , keeper_context{std::make_shared()} + , keeper_context{std::make_shared(true)} , create_snapshot_on_exit(config.getBool("keeper_server.create_snapshot_on_exit", true)) { if (coordination_settings->quorum_reads) LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower."); - keeper_context->digest_enabled = config.getBool("keeper_server.digest_enabled", false); - keeper_context->ignore_system_path_on_startup = config.getBool("keeper_server.ignore_system_path_on_startup", false); + keeper_context->initialize(config); - if (!fs::exists(configuration_and_settings_->snapshot_storage_path)) - fs::create_directories(configuration_and_settings_->snapshot_storage_path); - auto snapshots_disk = std::make_shared("Keeper-snapshots", configuration_and_settings_->snapshot_storage_path, 0); + //if (!fs::exists(keeper_context->snapshot_storage_path)) + // fs::create_directories(keeper_context->snapshot_storage_path); + auto snapshots_disk = std::make_shared("Keeper-snapshots", "", 0); state_machine = nuraft::cs_new( responses_queue_, @@ -137,23 +136,23 @@ KeeperServer::KeeperServer( commit_callback, checkAndGetSuperdigest(configuration_and_settings_->super_digest)); - auto state_path = fs::path(configuration_and_settings_->state_file_path).parent_path().generic_string(); - auto state_file_name = fs::path(configuration_and_settings_->state_file_path).filename().generic_string(); + //auto state_path = fs::path(keeper_context->state_file_path).parent_path().generic_string(); + //auto state_file_name = fs::path(configuration_and_settings_->state_file_path).filename().generic_string(); - if (!fs::exists(state_path)) - fs::create_directories(state_path); - auto state_disk = std::make_shared("Keeper-state", state_path, 0); + //if (!fs::exists(state_path)) + // fs::create_directories(state_path); + auto state_disk = std::make_shared("Keeper-state", "", 0); - if (!fs::exists(configuration_and_settings_->log_storage_path)) - fs::create_directories(configuration_and_settings_->log_storage_path); - auto logs_disk = std::make_shared("Keeper-logs", configuration_and_settings_->log_storage_path, 0); + //if (!fs::exists(configuration_and_settings_->log_storage_path)) + // fs::create_directories(configuration_and_settings_->log_storage_path); + auto logs_disk = std::make_shared("Keeper-logs", "", 0); state_manager = nuraft::cs_new( server_id, "keeper_server", logs_disk, state_disk, - state_file_name, + "state", config, coordination_settings); } @@ -431,7 +430,7 @@ void KeeperServer::startup(const Poco::Util::AbstractConfiguration & config, boo launchRaftServer(config, enable_ipv6); - keeper_context->server_state = KeeperContext::Phase::RUNNING; + keeper_context->setServerState(KeeperContext::Phase::RUNNING); } void KeeperServer::shutdownRaftServer() @@ -446,7 +445,7 @@ void KeeperServer::shutdownRaftServer() raft_instance->shutdown(); - keeper_context->server_state = KeeperContext::Phase::SHUTDOWN; + keeper_context->setServerState(KeeperContext::Phase::SHUTDOWN); if (create_snapshot_on_exit) raft_instance->create_snapshot(); diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index b7fca5c1eab..e1c0c034cff 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -9,11 +9,11 @@ #include #include #include -#include #include #include #include #include +#include #include #include #include @@ -157,7 +157,7 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr if (snapshot.version >= SnapshotVersion::V5) { writeBinary(snapshot.zxid, out); - if (keeper_context->digest_enabled) + if (keeper_context->digestEnabled()) { writeBinary(static_cast(KeeperStorage::CURRENT_DIGEST_VERSION), out); writeBinary(snapshot.nodes_digest, out); @@ -268,7 +268,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial deserialization_result.snapshot_meta = deserializeSnapshotMetadata(in); KeeperStorage & storage = *deserialization_result.storage; - bool recalculate_digest = keeper_context->digest_enabled; + bool recalculate_digest = keeper_context->digestEnabled(); if (version >= SnapshotVersion::V5) { readBinary(storage.zxid, in); @@ -350,7 +350,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial const std::string error_msg = fmt::format("Cannot read node on path {} from a snapshot because it is used as a system node", path); if (match_result == IS_CHILD) { - if (keeper_context->ignore_system_path_on_startup || keeper_context->server_state != KeeperContext::Phase::INIT) + if (keeper_context->ignoreSystemPathOnStartup() || keeper_context->getServerState() != KeeperContext::Phase::INIT) { LOG_ERROR(&Poco::Logger::get("KeeperSnapshotManager"), "{}. Ignoring it", error_msg); continue; @@ -366,7 +366,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial { if (!is_node_empty(node)) { - if (keeper_context->ignore_system_path_on_startup || keeper_context->server_state != KeeperContext::Phase::INIT) + if (keeper_context->ignoreSystemPathOnStartup() || keeper_context->getServerState() != KeeperContext::Phase::INIT) { LOG_ERROR(&Poco::Logger::get("KeeperSnapshotManager"), "{}. Ignoring it", error_msg); node = KeeperStorage::Node{}; @@ -395,9 +395,9 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial { if (itr.key != "/") { - auto parent_path = parentPath(itr.key); + auto parent_path = parentNodePath(itr.key); storage.container.updateValue( - parent_path, [version, path = itr.key](KeeperStorage::Node & value) { value.addChild(getBaseName(path), /*update_size*/ version < SnapshotVersion::V4); }); + parent_path, [version, path = itr.key](KeeperStorage::Node & value) { value.addChild(getBaseNodeName(path), /*update_size*/ version < SnapshotVersion::V4); }); } } diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 53657d9b0b9..a7c845e5017 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -223,7 +223,7 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req throw; } - if (keeper_context->digest_enabled && request_for_session.digest) + if (keeper_context->digestEnabled() && request_for_session.digest) assertDigest(*request_for_session.digest, storage->getNodesDigest(false), *request_for_session.request, false); return true; @@ -271,7 +271,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n LOG_WARNING(log, "Failed to push response with session id {} to the queue, probably because of shutdown", response_for_session.session_id); } - if (keeper_context->digest_enabled && request_for_session.digest) + if (keeper_context->digestEnabled() && request_for_session.digest) assertDigest(*request_for_session.digest, storage->getNodesDigest(true), *request_for_session.request, true); } @@ -429,7 +429,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res }; - if (keeper_context->server_state == KeeperContext::Phase::SHUTDOWN) + if (keeper_context->getServerState() == KeeperContext::Phase::SHUTDOWN) { LOG_INFO(log, "Creating a snapshot during shutdown because 'create_snapshot_on_exit' is enabled."); auto snapshot_path = snapshot_task.create_snapshot(std::move(snapshot_task.snapshot)); diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 7a1a5e42632..64e785693e4 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -128,7 +128,7 @@ KeeperStorage::ResponsesForSessions processWatchesImpl( watches.erase(watch_it); } - auto parent_path = parentPath(path); + auto parent_path = parentNodePath(path); Strings paths_to_check_for_list_watches; if (event_type == Coordination::Event::CREATED) @@ -276,7 +276,7 @@ void KeeperStorage::initializeSystemNodes() [](auto & node) { ++node.stat.numChildren; - node.addChild(getBaseName(keeper_system_path)); + node.addChild(getBaseNodeName(keeper_system_path)); } ); addDigest(updated_root_it->value, "/"); @@ -290,9 +290,9 @@ void KeeperStorage::initializeSystemNodes() child_system_node.setData(data); auto [map_key, _] = container.insert(std::string{path}, child_system_node); /// Take child path from key owned by map. - auto child_path = getBaseName(map_key->getKey()); + auto child_path = getBaseNodeName(map_key->getKey()); container.updateValue( - parentPath(StringRef(path)), + parentNodePath(StringRef(path)), [child_path](auto & parent) { // don't update stats so digest is okay @@ -705,7 +705,7 @@ bool KeeperStorage::createNode( bool is_sequental, Coordination::ACLs node_acls) { - auto parent_path = parentPath(path); + auto parent_path = parentNodePath(path); auto node_it = container.find(parent_path); if (node_it == container.end()) @@ -728,7 +728,7 @@ bool KeeperStorage::createNode( created_node.is_sequental = is_sequental; auto [map_key, _] = container.insert(path, created_node); /// Take child path from key owned by map. - auto child_path = getBaseName(map_key->getKey()); + auto child_path = getBaseNodeName(map_key->getKey()); container.updateValue( parent_path, [child_path](KeeperStorage::Node & parent) @@ -758,8 +758,8 @@ bool KeeperStorage::removeNode(const std::string & path, int32_t version) acl_map.removeUsage(prev_node.acl_id); container.updateValue( - parentPath(path), - [child_basename = getBaseName(node_it->key)](KeeperStorage::Node & parent) + parentNodePath(path), + [child_basename = getBaseNodeName(node_it->key)](KeeperStorage::Node & parent) { parent.removeChild(child_basename); chassert(parent.stat.numChildren == static_cast(parent.getChildren().size())); @@ -843,7 +843,7 @@ Coordination::ACLs getNodeACLs(KeeperStorage & storage, StringRef path, bool is_ void handleSystemNodeModification(const KeeperContext & keeper_context, std::string_view error_msg) { - if (keeper_context.server_state == KeeperContext::Phase::INIT && !keeper_context.ignore_system_path_on_startup) + if (keeper_context.getServerState() == KeeperContext::Phase::INIT && !keeper_context.ignoreSystemPathOnStartup()) throw Exception( ErrorCodes::LOGICAL_ERROR, "{}. Ignoring it can lead to data loss. " @@ -906,7 +906,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { auto path = zk_request->getPath(); - return storage.checkACL(parentPath(path), Coordination::ACL::Create, session_id, is_local); + return storage.checkACL(parentNodePath(path), Coordination::ACL::Create, session_id, is_local); } std::vector @@ -917,7 +917,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr std::vector new_deltas; - auto parent_path = parentPath(request.path); + auto parent_path = parentNodePath(request.path); auto parent_node = storage.uncommitted_state.getNode(parent_path); if (parent_node == nullptr) return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; @@ -948,7 +948,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr if (storage.uncommitted_state.getNode(path_created)) return {KeeperStorage::Delta{zxid, Coordination::Error::ZNODEEXISTS}}; - if (getBaseName(path_created).size == 0) + if (getBaseNodeName(path_created).size == 0) return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; Coordination::ACLs node_acls; @@ -1098,7 +1098,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr { bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - return storage.checkACL(parentPath(zk_request->getPath()), Coordination::ACL::Delete, session_id, is_local); + return storage.checkACL(parentNodePath(zk_request->getPath()), Coordination::ACL::Delete, session_id, is_local); } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; @@ -1120,7 +1120,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr const auto update_parent_pzxid = [&]() { - auto parent_path = parentPath(request.path); + auto parent_path = parentNodePath(request.path); if (!storage.uncommitted_state.getNode(parent_path)) return; @@ -1155,7 +1155,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr update_parent_pzxid(); new_deltas.emplace_back( - std::string{parentPath(request.path)}, + std::string{parentNodePath(request.path)}, zxid, KeeperStorage::UpdateNodeDelta{[](KeeperStorage::Node & parent) { @@ -1298,7 +1298,7 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce request.version}); new_deltas.emplace_back( - parentPath(request.path).toString(), + parentNodePath(request.path).toString(), zxid, KeeperStorage::UpdateNodeDelta { @@ -1458,7 +1458,7 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { auto path = zk_request->getPath(); - return storage.checkACL(check_not_exists ? parentPath(path) : path, Coordination::ACL::Read, session_id, is_local); + return storage.checkACL(check_not_exists ? parentNodePath(path) : path, Coordination::ACL::Read, session_id, is_local); } std::vector @@ -2001,7 +2001,7 @@ KeeperStorageRequestProcessorsFactory::KeeperStorageRequestProcessorsFactory() UInt64 KeeperStorage::calculateNodesDigest(UInt64 current_digest, const std::vector & new_deltas) const { - if (!keeper_context->digest_enabled) + if (!keeper_context->digestEnabled()) return current_digest; std::unordered_map> updated_nodes; @@ -2099,7 +2099,7 @@ void KeeperStorage::preprocessRequest( TransactionInfo transaction{.zxid = new_last_zxid}; uint64_t new_digest = getNodesDigest(false).value; SCOPE_EXIT({ - if (keeper_context->digest_enabled) + if (keeper_context->digestEnabled()) // if the version of digest we got from the leader is the same as the one this instances has, we can simply copy the value // and just check the digest on the commit // a mistake can happen while applying the changes to the uncommitted_state so for now let's just recalculate the digest here also @@ -2122,7 +2122,7 @@ void KeeperStorage::preprocessRequest( { new_deltas.emplace_back ( - parentPath(ephemeral_path).toString(), + parentNodePath(ephemeral_path).toString(), new_last_zxid, UpdateNodeDelta { @@ -2315,7 +2315,7 @@ void KeeperStorage::rollbackRequest(int64_t rollback_zxid, bool allow_missing) KeeperStorage::Digest KeeperStorage::getNodesDigest(bool committed) const { - if (!keeper_context->digest_enabled) + if (!keeper_context->digestEnabled()) return {.version = DigestVersion::NO_DIGEST}; if (committed || uncommitted_transactions.empty()) @@ -2326,13 +2326,13 @@ KeeperStorage::Digest KeeperStorage::getNodesDigest(bool committed) const void KeeperStorage::removeDigest(const Node & node, const std::string_view path) { - if (keeper_context->digest_enabled) + if (keeper_context->digestEnabled()) nodes_digest -= node.getDigest(path); } void KeeperStorage::addDigest(const Node & node, const std::string_view path) { - if (keeper_context->digest_enabled) + if (keeper_context->digestEnabled()) { node.invalidateDigestCache(); nodes_digest += node.getDigest(path); diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index bdc462f3ea0..94fc07bcc4a 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -139,8 +139,8 @@ int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, Poco::L { if (itr.key != "/") { - auto parent_path = parentPath(itr.key); - storage.container.updateValue(parent_path, [my_path = itr.key] (KeeperStorage::Node & value) { value.addChild(getBaseName(my_path)); ++value.stat.numChildren; }); + auto parent_path = parentNodePath(itr.key); + storage.container.updateValue(parent_path, [my_path = itr.key] (KeeperStorage::Node & value) { value.addChild(getBaseNodeName(my_path)); ++value.stat.numChildren; }); } } diff --git a/src/Coordination/pathUtils.cpp b/src/Coordination/pathUtils.cpp index 1e1da339d2e..afa42b4a639 100644 --- a/src/Coordination/pathUtils.cpp +++ b/src/Coordination/pathUtils.cpp @@ -21,7 +21,7 @@ static size_t findLastSlash(StringRef path) return std::string::npos; } -StringRef parentPath(StringRef path) +StringRef parentNodePath(StringRef path) { auto rslash_pos = findLastSlash(path); if (rslash_pos > 0) @@ -29,7 +29,7 @@ StringRef parentPath(StringRef path) return "/"; } -StringRef getBaseName(StringRef path) +StringRef getBaseNodeName(StringRef path) { size_t basename_start = findLastSlash(path); return StringRef{path.data + basename_start + 1, path.size - basename_start - 1}; diff --git a/src/Coordination/pathUtils.h b/src/Coordination/pathUtils.h index 69ed2d8b177..b2b79b14110 100644 --- a/src/Coordination/pathUtils.h +++ b/src/Coordination/pathUtils.h @@ -6,8 +6,8 @@ namespace DB { -StringRef parentPath(StringRef path); +StringRef parentNodePath(StringRef path); -StringRef getBaseName(StringRef path); +StringRef getBaseNodeName(StringRef path); } diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 62217fb2dd3..de5f2da262b 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -2366,7 +2366,7 @@ TEST_P(CoordinationTest, TestSystemNodeModify) int64_t zxid{0}; // On INIT we abort when a system path is modified - keeper_context->server_state = KeeperContext::Phase::RUNNING; + keeper_context->setServerState(KeeperContext::Phase::RUNNING); KeeperStorage storage{500, "", keeper_context}; const auto assert_create = [&](const std::string_view path, const auto expected_code) { diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index 8cd4efb68c6..c1f9fa00f2a 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -379,6 +379,35 @@ void SettingFieldMap::readBinary(ReadBuffer & in) *this = map; } +#else + +SettingFieldMap::SettingFieldMap(const Field &) : value(Map()) {} +String SettingFieldMap::toString() const +{ + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); +} + + +SettingFieldMap & SettingFieldMap::operator =(const Field &) +{ + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); +} + +void SettingFieldMap::parseFromString(const String &) +{ + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); +} + +void SettingFieldMap::writeBinary(WriteBuffer &) const +{ + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); +} + +void SettingFieldMap::readBinary(ReadBuffer &) +{ + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); +} + #endif namespace diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index 2cd55e6b4c5..8cbce71b094 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -245,6 +245,12 @@ struct SettingFieldString void readBinary(ReadBuffer & in); }; +#ifdef CLICKHOUSE_PROGRAM_STANDALONE_BUILD +#define NORETURN [[noreturn]] +#else +#define NORETURN +#endif + struct SettingFieldMap { public: @@ -261,13 +267,15 @@ public: operator const Map &() const { return value; } /// NOLINT explicit operator Field() const { return value; } - String toString() const; - void parseFromString(const String & str); + NORETURN String toString() const; + NORETURN void parseFromString(const String & str); - void writeBinary(WriteBuffer & out) const; - void readBinary(ReadBuffer & in); + NORETURN void writeBinary(WriteBuffer & out) const; + NORETURN void readBinary(ReadBuffer & in); }; +#undef NORETURN + struct SettingFieldChar { public: diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 68b5a9c9d96..6c3556580a2 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -50,6 +50,7 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c size_t current_read_until_position = read_until_position ? read_until_position : object.bytes_size; auto current_read_buffer_creator = [=, this]() { return read_buffer_creator(object_path, current_read_until_position); }; +#ifndef CLICKHOUSE_PROGRAM_STANDALONE_BUILD if (with_cache) { auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path); @@ -66,6 +67,7 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c read_until_position ? std::optional(read_until_position) : std::nullopt, cache_log); } +#endif return current_read_buffer_creator(); } diff --git a/src/Disks/IO/getThreadPoolReader.cpp b/src/Disks/IO/getThreadPoolReader.cpp index deb8f66106c..7dbff9ffe76 100644 --- a/src/Disks/IO/getThreadPoolReader.cpp +++ b/src/Disks/IO/getThreadPoolReader.cpp @@ -7,9 +7,7 @@ #include #include -#ifndef CLICKHOUSE_PROGRAM_STANDALONE_BUILD #include -#endif namespace DB { @@ -21,32 +19,10 @@ namespace ErrorCodes IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) { -#ifdef CLICKHOUSE_PROGRAM_STANDALONE_BUILD - const auto & config = Poco::Util::Application::instance().config(); - switch (type) - { - case FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER: - { - static auto asynchronous_remote_fs_reader = createThreadPoolReader(type, config); - return *asynchronous_remote_fs_reader; - } - case FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER: - { - static auto asynchronous_local_fs_reader = createThreadPoolReader(type, config); - return *asynchronous_local_fs_reader; - } - case FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER: - { - static auto synchronous_local_fs_reader = createThreadPoolReader(type, config); - return *synchronous_local_fs_reader; - } - } -#else auto context = Context::getGlobalContextInstance(); if (!context) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); return context->getThreadPoolReader(type); -#endif } std::unique_ptr createThreadPoolReader( diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index f832ba5b7b6..2a9cb86f91c 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -530,24 +529,6 @@ DiskObjectStoragePtr DiskObjectStorage::createDiskObjectStorage() threadpool_size); } -void DiskObjectStorage::wrapWithCache(FileCachePtr cache, const FileCacheSettings & cache_settings, const String & layer_name) -{ - object_storage = std::make_shared(object_storage, cache, cache_settings, layer_name); -} - -NameSet DiskObjectStorage::getCacheLayersNames() const -{ - NameSet cache_layers; - auto current_object_storage = object_storage; - while (current_object_storage->supportsCache()) - { - auto * cached_object_storage = assert_cast(current_object_storage.get()); - cache_layers.insert(cached_object_storage->getCacheConfigName()); - current_object_storage = cached_object_storage->getWrappedObjectStorage(); - } - return cache_layers; -} - std::unique_ptr DiskObjectStorage::readFile( const String & path, const ReadSettings & settings, diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 4372bc75950..2d942eda1d2 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -184,20 +184,22 @@ public: /// MergeTree table on this disk. bool isWriteOnce() const override; - /// Add a cache layer. - /// Example: DiskObjectStorage(S3ObjectStorage) -> DiskObjectStorage(CachedObjectStorage(S3ObjectStorage)) - /// There can be any number of cache layers: - /// DiskObjectStorage(CachedObjectStorage(...CacheObjectStorage(S3ObjectStorage)...)) - void wrapWithCache(FileCachePtr cache, const FileCacheSettings & cache_settings, const String & layer_name); - /// Get structure of object storage this disk works with. Examples: /// DiskObjectStorage(S3ObjectStorage) /// DiskObjectStorage(CachedObjectStorage(S3ObjectStorage)) /// DiskObjectStorage(CachedObjectStorage(CachedObjectStorage(S3ObjectStorage))) String getStructure() const { return fmt::format("DiskObjectStorage-{}({})", getName(), object_storage->getName()); } +#ifndef CLICKHOUSE_PROGRAM_STANDALONE_BUILD + /// Add a cache layer. + /// Example: DiskObjectStorage(S3ObjectStorage) -> DiskObjectStorage(CachedObjectStorage(S3ObjectStorage)) + /// There can be any number of cache layers: + /// DiskObjectStorage(CachedObjectStorage(...CacheObjectStorage(S3ObjectStorage)...)) + void wrapWithCache(FileCachePtr cache, const FileCacheSettings & cache_settings, const String & layer_name); + /// Get names of all cache layers. Name is how cache is defined in configuration file. NameSet getCacheLayersNames() const override; +#endif static std::shared_ptr getAsyncExecutor(const std::string & log_name, size_t size); diff --git a/src/Disks/ObjectStorages/DiskObjectStorageCache.cpp b/src/Disks/ObjectStorages/DiskObjectStorageCache.cpp new file mode 100644 index 00000000000..9e5012dec54 --- /dev/null +++ b/src/Disks/ObjectStorages/DiskObjectStorageCache.cpp @@ -0,0 +1,28 @@ +#include + +#include + +#include + +namespace DB +{ + +void DiskObjectStorage::wrapWithCache(FileCachePtr cache, const FileCacheSettings & cache_settings, const String & layer_name) +{ + object_storage = std::make_shared(object_storage, cache, cache_settings, layer_name); +} + +NameSet DiskObjectStorage::getCacheLayersNames() const +{ + NameSet cache_layers; + auto current_object_storage = object_storage; + while (current_object_storage->supportsCache()) + { + auto * cached_object_storage = assert_cast(current_object_storage.get()); + cache_layers.insert(cached_object_storage->getCacheConfigName()); + current_object_storage = cached_object_storage->getWrappedObjectStorage(); + } + return cache_layers; +} + +} diff --git a/src/Disks/registerDisks.cpp b/src/Disks/registerDisks.cpp index 48d5a19fb61..676744a8e79 100644 --- a/src/Disks/registerDisks.cpp +++ b/src/Disks/registerDisks.cpp @@ -32,6 +32,8 @@ void registerDiskCache(DiskFactory & factory, bool global_skip_access_check); void registerDiskLocalObjectStorage(DiskFactory & factory, bool global_skip_access_check); +#ifndef CLICKHOUSE_PROGRAM_STANDALONE_BUILD + void registerDisks(bool global_skip_access_check) { auto & factory = DiskFactory::instance(); @@ -61,4 +63,19 @@ void registerDisks(bool global_skip_access_check) registerDiskLocalObjectStorage(factory, global_skip_access_check); } +#else + +void registerDisks(bool global_skip_access_check) +{ + auto & factory = DiskFactory::instance(); + + registerDiskLocal(factory, global_skip_access_check); + +#if USE_AWS_S3 + registerDiskS3(factory, global_skip_access_check); +#endif +} + +#endif + } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 87843a458e8..23f3281330a 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1,5 +1,7 @@ #pragma once +#ifndef CLICKHOUSE_PROGRAM_STANDALONE_BUILD + #include #include #include @@ -1226,3 +1228,9 @@ struct HTTPContext : public IHTTPContext }; } + +#else + +#include + +#endif From f459ac5517bcac49c7e6d583fca827504d2b0aa1 Mon Sep 17 00:00:00 2001 From: zvonand Date: Sat, 20 May 2023 01:38:35 +0200 Subject: [PATCH 0156/1997] resolve ambiguity by adding a section to docs --- docs/en/operations/settings/settings.md | 20 ++++++++++++++++++++ docs/ru/operations/settings/settings.md | 20 ++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index cf9209e182f..9cca1ee5ec3 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4077,6 +4077,26 @@ SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zuric 1999-12-13 07:23:23.123 ``` +:::warning +The way this setting affects parsing of Date or DateTime types may seem non-obvious, see example and explanation below: +::: + +```sql +CREATE TABLE test_tz (`d` DateTime('UTC')) ENGINE = Memory AS SELECT toDateTime('2000-01-01 00:00:00', 'UTC'); + +SELECT *, timezone() FROM test_tz WHERE d = toDateTime('2000-01-01 00:00:00') SETTINGS session_timezone = 'Asia/Novosibirsk' +0 rows in set. + +SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS session_timezone = 'Asia/Novosibirsk' +┌───────────────────d─┬─timezone()───────┐ +│ 2000-01-01 00:00:00 │ Asia/Novosibirsk │ +└─────────────────────┴──────────────────┘ +``` + +This happens due to different parsing pipelines: + - `toDateTime('2000-01-01 00:00:00')` creates a new DateTime in a usual way, and thus `session_timezone` setting from query context is applied. + - `2000-01-01 00:00:00` is parsed to a DateTime inheriting type of `d` column, including DateTime's time zone, and `session_timezone` has no impact on this value. + Possible values: - Any timezone name from `system.time_zones`, e.g. `Europe/Berlin`, `UTC` or `Zulu` diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 98486847fd9..607082054cc 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -4105,6 +4105,26 @@ SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zuric Значение по умолчанию: `''`. +:::warning +То, как этот параметр влияет на парсинг значений типа Date или DateTime, может показаться неочевидным. Пример и пояснение см. ниже: +::: + +```sql +CREATE TABLE test_tz (`d` DateTime('UTC')) ENGINE = Memory AS SELECT toDateTime('2000-01-01 00:00:00', 'UTC'); + +SELECT *, timezone() FROM test_tz WHERE d = toDateTime('2000-01-01 00:00:00') SETTINGS session_timezone = 'Asia/Novosibirsk' +0 rows in set. + +SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS session_timezone = 'Asia/Novosibirsk' +┌───────────────────d─┬─timezone()───────┐ +│ 2000-01-01 00:00:00 │ Asia/Novosibirsk │ +└─────────────────────┴──────────────────┘ +``` + +Это происходит из-за различного происхождения значения, используемого для сравнения: +- `toDateTime('2000-01-01 00:00:00')` создаёт значение типа `DateTime` как и в любом другом случае, в том числе применяет параметр `session_timezone` из контекста запроса, +- `2000-01-01 00:00:00` парсится в `DateTime` того же типа, что и колонка `d` (в том числе с той же `timezone`), и параметр `session_timezone` в данном случае не учитывается. + **Смотрите также** - [timezone](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone) \ No newline at end of file From af5793b26e0ee8fc02201f8d0439b5a15a019e7f Mon Sep 17 00:00:00 2001 From: zvonand Date: Sat, 20 May 2023 16:38:45 +0200 Subject: [PATCH 0157/1997] fix incode docs --- src/Functions/serverConstants.cpp | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index 1460fc16265..9cd43be50c4 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -160,31 +160,33 @@ REGISTER_FUNCTION(TcpPort) REGISTER_FUNCTION(Timezone) { - factory.registerFunction({ - R"( + factory.registerFunction( + FunctionDocumentation{ + .description=R"( Returns the default timezone for current session. Used as default timezone for parsing DateTime|DateTime64 without explicitly specified timezone. Can be changed with SET timezone = 'New/Tz' [example:timezone] -)", - Documentation::Examples{{"timezone", "SELECT timezone();"}}, - Documentation::Categories{"Constant", "Miscellaneous"} - }); - factory.registerAlias("timeZone", "timezone"); + )", + .examples{{"timezone", "SELECT timezone();", ""}}, + .categories{"Constant", "Miscellaneous"} +}); +factory.registerAlias("timeZone", "timezone"); } REGISTER_FUNCTION(ServerTimezone) { - factory.registerFunction({ - R"( + factory.registerFunction( + FunctionDocumentation{ + .description=R"( Returns the timezone name in which server operates. [example:serverTimezone] -)", - Documentation::Examples{{"serverTimezone", "SELECT serverTimezone();"}}, - Documentation::Categories{"Constant", "Miscellaneous"} - }); + )", + .examples{{"serverTimezone", "SELECT serverTimezone();", ""}}, + .categories{"Constant", "Miscellaneous"} +}); factory.registerAlias("serverTimeZone", "serverTimezone"); factory.registerAlias("servertimezone", "serverTimezone"); } From 24320f8f93f56aa9a7088c4daf80a066facdc5b6 Mon Sep 17 00:00:00 2001 From: alekseygolub Date: Sun, 21 May 2023 15:58:29 +0000 Subject: [PATCH 0158/1997] fixed bad pattern in tests --- .../0_stateless/02722_database_filesystem.sh | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/tests/queries/0_stateless/02722_database_filesystem.sh b/tests/queries/0_stateless/02722_database_filesystem.sh index 80f97af693e..7466141d3e3 100755 --- a/tests/queries/0_stateless/02722_database_filesystem.sh +++ b/tests/queries/0_stateless/02722_database_filesystem.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -9,19 +8,21 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) CLICKHOUSE_USER_FILES_PATH=$(clickhouse-client --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') # Prepare data -mkdir -p ${CLICKHOUSE_USER_FILES_PATH}/tmp/ -echo '"id","str","int","text"' > ${CLICKHOUSE_USER_FILES_PATH}/tmp.csv -echo '1,"abc",123,"abacaba"' >> ${CLICKHOUSE_USER_FILES_PATH}/tmp.csv -echo '2,"def",456,"bacabaa"' >> ${CLICKHOUSE_USER_FILES_PATH}/tmp.csv -echo '3,"story",78912,"acabaab"' >> ${CLICKHOUSE_USER_FILES_PATH}/tmp.csv -echo '4,"history",21321321,"cabaaba"' >> ${CLICKHOUSE_USER_FILES_PATH}/tmp.csv +unique_name=${CLICKHOUSE_TEST_UNIQUE_NAME} +user_files_tmp_dir=${CLICKHOUSE_USER_FILES_PATH}/${unique_name} +mkdir -p ${user_files_tmp_dir}/tmp/ +echo '"id","str","int","text"' > ${user_files_tmp_dir}/tmp.csv +echo '1,"abc",123,"abacaba"' >> ${user_files_tmp_dir}/tmp.csv +echo '2,"def",456,"bacabaa"' >> ${user_files_tmp_dir}/tmp.csv +echo '3,"story",78912,"acabaab"' >> ${user_files_tmp_dir}/tmp.csv +echo '4,"history",21321321,"cabaaba"' >> ${user_files_tmp_dir}/tmp.csv tmp_dir=${CLICKHOUSE_TEST_UNIQUE_NAME} [[ -d $tmp_dir ]] && rm -rd $tmp_dir mkdir $tmp_dir -cp ${CLICKHOUSE_USER_FILES_PATH}/tmp.csv ${tmp_dir}/tmp.csv -cp ${CLICKHOUSE_USER_FILES_PATH}/tmp.csv ${CLICKHOUSE_USER_FILES_PATH}/tmp/tmp.csv -cp ${CLICKHOUSE_USER_FILES_PATH}/tmp.csv ${CLICKHOUSE_USER_FILES_PATH}/tmp.myext +cp ${user_files_tmp_dir}/tmp.csv ${tmp_dir}/tmp.csv +cp ${user_files_tmp_dir}/tmp.csv ${user_files_tmp_dir}/tmp/tmp.csv +cp ${user_files_tmp_dir}/tmp.csv ${user_files_tmp_dir}/tmp.myext ################# echo "Test 1: create filesystem database and check implicit calls" @@ -31,8 +32,8 @@ CREATE DATABASE test1 ENGINE = Filesystem; """ echo $? ${CLICKHOUSE_CLIENT} --query "SHOW DATABASES" | grep "test1" -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp.csv\`;" -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp/tmp.csv\`;" +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp.csv\`;" +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp/tmp.csv\`;" ${CLICKHOUSE_LOCAL} -q "SELECT COUNT(*) FROM \"${tmp_dir}/tmp.csv\"" ################# @@ -62,9 +63,9 @@ CREATE DATABASE test2 ENGINE = Filesystem('relative_unknown_dir'); ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp2.csv\`;" 2>&1| grep -F "Code: 107" > /dev/null && echo "OK" # BAD_ARGUMENTS: Cannot determine the file format by it's extension -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp.myext\`;" 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp.myext\`;" 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" # Clean ${CLICKHOUSE_CLIENT} --query "DROP DATABASE test1;" rm -rd $tmp_dir -rm -rd $CLICKHOUSE_USER_FILES_PATH +rm -rd $user_files_tmp_dir From 8c816a5c4a97044b1d3d902145ef4eefecd7beb8 Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 22 May 2023 01:00:40 +0200 Subject: [PATCH 0159/1997] update --- src/Common/DateLUT.h | 37 ++++++++++++++++--------------- src/Core/Settings.h | 2 +- src/Functions/serverConstants.cpp | 4 +--- src/IO/ReadHelpers.h | 4 ++-- 4 files changed, 23 insertions(+), 24 deletions(-) diff --git a/src/Common/DateLUT.h b/src/Common/DateLUT.h index 59b280240ea..23698331afe 100644 --- a/src/Common/DateLUT.h +++ b/src/Common/DateLUT.h @@ -17,30 +17,24 @@ class DateLUT : private boost::noncopyable { public: - /// Return singleton DateLUTImpl instance for server's (native) time zone. - static ALWAYS_INLINE const DateLUTImpl & serverTimezoneInstance() - { - const auto & date_lut = getInstance(); - return *date_lut.default_impl.load(std::memory_order_acquire); - } - - /// Return singleton DateLUTImpl instance for timezone set by `timezone` setting for current session is used. - /// If it is not set, server's timezone (the one which server has) is being used. + /// Return singleton DateLUTImpl instance for session timezone. + /// The session timezone is configured by a session setting. + /// If not set (empty string), it is the server timezone. static ALWAYS_INLINE const DateLUTImpl & instance() { const auto & date_lut = getInstance(); if (DB::CurrentThread::isInitialized()) { - std::string effective_time_zone; - const auto query_context = DB::CurrentThread::get().getQueryContext(); + std::string context_timezone; + const DB::ContextPtr query_context = DB::CurrentThread::get().getQueryContext(); if (query_context) { - effective_time_zone = extractTimezoneFromContext(query_context); + context_timezone = extractTimezoneFromContext(query_context); - if (!effective_time_zone.empty()) - return date_lut.getImplementation(effective_time_zone); + if (!context_timezone.empty()) + return date_lut.getImplementation(context_timezone); } /// Timezone is passed in query_context, but on CH-Client we have no query context, @@ -48,10 +42,10 @@ public: const auto global_context = DB::CurrentThread::get().getGlobalContext(); if (global_context) { - effective_time_zone = extractTimezoneFromContext(global_context); + context_timezone = extractTimezoneFromContext(global_context); - if (!effective_time_zone.empty()) - return date_lut.getImplementation(effective_time_zone); + if (!context_timezone.empty()) + return date_lut.getImplementation(context_timezone); } } @@ -67,6 +61,13 @@ public: return date_lut.getImplementation(time_zone); } + // Return singleton DateLUTImpl for the server time zone. + static ALWAYS_INLINE const DateLUTImpl & serverTimezoneInstance() + { + const auto & date_lut = getInstance(); + return *date_lut.default_impl.load(std::memory_order_acquire); + } + static void setDefaultTimezone(const std::string & time_zone) { auto & date_lut = getInstance(); @@ -80,7 +81,7 @@ protected: private: static DateLUT & getInstance(); - static std::string extractTimezoneFromContext(const DB::ContextPtr query_context); + static std::string extractTimezoneFromContext(DB::ContextPtr query_context); const DateLUTImpl & getImplementation(const std::string & time_zone) const; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 90063f8efd4..2d766e8e18f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -747,7 +747,7 @@ class IColumn; M(Bool, allow_experimental_undrop_table_query, false, "Allow to use undrop query to restore dropped table in a limited time", 0) \ M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \ - M(Timezone, session_timezone, "", "Use specified timezone for interpreting Date and DateTime instead of server's timezone in current session.", 0) \ + M(Timezone, session_timezone, "", "The default timezone for the current session. The server default timezone, if empty.", 0) \ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index 9cd43be50c4..d3e1e6e10fe 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -60,7 +60,7 @@ namespace }; - /// Returns default timezone for current session. + /// Returns timezone for current session. class FunctionTimezone : public FunctionConstantBase { public: @@ -187,8 +187,6 @@ Returns the timezone name in which server operates. .examples{{"serverTimezone", "SELECT serverTimezone();", ""}}, .categories{"Constant", "Miscellaneous"} }); - factory.registerAlias("serverTimeZone", "serverTimezone"); - factory.registerAlias("servertimezone", "serverTimezone"); } REGISTER_FUNCTION(Uptime) diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 3bd9275322e..cbe18e11c9a 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -706,7 +706,7 @@ inline void convertToDayNum(DayNum & date, ExtendedDayNum & from) } template -inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) +inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut) { static constexpr bool throw_exception = std::is_same_v; @@ -723,7 +723,7 @@ inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf, const DateLU } template -inline ReturnType readDateTextImpl(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) +inline ReturnType readDateTextImpl(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut) { static constexpr bool throw_exception = std::is_same_v; From e1bf96a786be0883993d2d9e8a5d2c1fcd89095c Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 22 May 2023 14:29:15 +0800 Subject: [PATCH 0160/1997] finish dev --- src/Functions/geohashEncode.cpp | 76 ++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 10 deletions(-) diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index bc0c8b8fc5f..a05fa7fc8d6 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -37,7 +37,7 @@ public: bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } + // ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } @@ -59,7 +59,50 @@ public: } template - bool tryExecute(const IColumn * lon_column, const IColumn * lat_column, UInt64 precision_value, ColumnPtr & result) const + bool tryVectorVector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result) const + { + const ColumnVector * longitude = checkAndGetColumn>(lon_column); + const ColumnVector * latitude = checkAndGetColumn>(lat_column); + if (!latitude || !longitude) + return false; + + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const size_t size = lat_column->size(); + + out_offsets.resize(size); + out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1)); + + char * begin = reinterpret_cast(out_vec.data()); + char * pos = begin; + + for (size_t i = 0; i < size; ++i) + { + const Float64 longitude_value = longitude->getElement(i); + const Float64 latitude_value = latitude->getElement(i); + const UInt64 precision_value = std::min(precision_column->get64(i), GEOHASH_MAX_TEXT_LENGTH); + + const size_t encoded_size = geohashEncode(longitude_value, latitude_value, precision_value, pos); + + pos += encoded_size; + *pos = '\0'; + out_offsets[i] = ++pos - begin; + } + out_vec.resize(pos - begin); + + if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column size mismatch (internal logical error)"); + + result = std::move(col_str); + + return true; + + } + + template + bool tryVectorConstant(const IColumn * lon_column, const IColumn * lat_column, UInt64 precision_value, ColumnPtr & result) const { const ColumnVector * longitude = checkAndGetColumn>(lon_column); const ColumnVector * latitude = checkAndGetColumn>(lat_column); @@ -105,16 +148,29 @@ public: const IColumn * longitude = arguments[0].column.get(); const IColumn * latitude = arguments[1].column.get(); - const UInt64 precision_value = std::min(GEOHASH_MAX_TEXT_LENGTH, - arguments.size() == 3 ? arguments[2].column->get64(0) : GEOHASH_MAX_TEXT_LENGTH); + if (arguments.size() < 3 || isColumnConst(*arguments[3].column)) + { + const UInt64 precision_value = std::min( + GEOHASH_MAX_TEXT_LENGTH, arguments.size() == 3 ? arguments[2].column->get64(0) : GEOHASH_MAX_TEXT_LENGTH); - ColumnPtr res_column; + ColumnPtr res_column; + if (tryVectorConstant(longitude, latitude, precision_value, res_column) + || tryVectorConstant(longitude, latitude, precision_value, res_column) + || tryVectorConstant(longitude, latitude, precision_value, res_column) + || tryVectorConstant(longitude, latitude, precision_value, res_column)) + return res_column; + } + else + { + const IColumn * precision = arguments[2].column.get(); + ColumnPtr res_column; + if (tryVectorVector(longitude, latitude, precision, res_column) + || tryVectorVector(longitude, latitude, precision, res_column) + || tryVectorVector(longitude, latitude, precision, res_column) + || tryVectorVector(longitude, latitude, precision, res_column)) + return res_column; - if (tryExecute(longitude, latitude, precision_value, res_column) || - tryExecute(longitude, latitude, precision_value, res_column) || - tryExecute(longitude, latitude, precision_value, res_column) || - tryExecute(longitude, latitude, precision_value, res_column)) - return res_column; + } std::string arguments_description; for (size_t i = 0; i < arguments.size(); ++i) From 1f91a75b5472f3f1321aac9a76c3078880ba5dc9 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 22 May 2023 15:52:58 +0800 Subject: [PATCH 0161/1997] fix bugs of geoHashEncode --- src/Functions/geohashEncode.cpp | 114 ++++++------------------------- src/Storages/HDFS/HDFSCommon.cpp | 4 +- 2 files changed, 22 insertions(+), 96 deletions(-) diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index a05fa7fc8d6..5f225a96c2b 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -4,6 +4,7 @@ #include #include +#include #include @@ -58,14 +59,25 @@ public: return std::make_shared(); } - template - bool tryVectorVector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result) const + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { - const ColumnVector * longitude = checkAndGetColumn>(lon_column); - const ColumnVector * latitude = checkAndGetColumn>(lat_column); - if (!latitude || !longitude) - return false; + const IColumn * longitude = arguments[0].column.get(); + const IColumn * latitude = arguments[1].column.get(); + ColumnPtr precision; + if (arguments.size() < 3) + precision = DataTypeUInt8().createColumnConst(longitude->size(), GEOHASH_MAX_TEXT_LENGTH); + else + precision = arguments[2].column; + + ColumnPtr res_column; + vector(longitude, latitude, precision.get(), res_column); + return res_column; + } + +private: + void vector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result) const + { auto col_str = ColumnString::create(); ColumnString::Chars & out_vec = col_str->getChars(); ColumnString::Offsets & out_offsets = col_str->getOffsets(); @@ -80,8 +92,8 @@ public: for (size_t i = 0; i < size; ++i) { - const Float64 longitude_value = longitude->getElement(i); - const Float64 latitude_value = latitude->getElement(i); + const Float64 longitude_value = lon_column->getFloat64(i); + const Float64 latitude_value = lat_column->getFloat64(i); const UInt64 precision_value = std::min(precision_column->get64(i), GEOHASH_MAX_TEXT_LENGTH); const size_t encoded_size = geohashEncode(longitude_value, latitude_value, precision_value, pos); @@ -96,92 +108,6 @@ public: throw Exception(ErrorCodes::LOGICAL_ERROR, "Column size mismatch (internal logical error)"); result = std::move(col_str); - - return true; - - } - - template - bool tryVectorConstant(const IColumn * lon_column, const IColumn * lat_column, UInt64 precision_value, ColumnPtr & result) const - { - const ColumnVector * longitude = checkAndGetColumn>(lon_column); - const ColumnVector * latitude = checkAndGetColumn>(lat_column); - if (!latitude || !longitude) - return false; - - auto col_str = ColumnString::create(); - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - const size_t size = lat_column->size(); - - out_offsets.resize(size); - out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1)); - - char * begin = reinterpret_cast(out_vec.data()); - char * pos = begin; - - for (size_t i = 0; i < size; ++i) - { - const Float64 longitude_value = longitude->getElement(i); - const Float64 latitude_value = latitude->getElement(i); - - const size_t encoded_size = geohashEncode(longitude_value, latitude_value, precision_value, pos); - - pos += encoded_size; - *pos = '\0'; - out_offsets[i] = ++pos - begin; - } - out_vec.resize(pos - begin); - - if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Column size mismatch (internal logical error)"); - - result = std::move(col_str); - - return true; - - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const IColumn * longitude = arguments[0].column.get(); - const IColumn * latitude = arguments[1].column.get(); - - if (arguments.size() < 3 || isColumnConst(*arguments[3].column)) - { - const UInt64 precision_value = std::min( - GEOHASH_MAX_TEXT_LENGTH, arguments.size() == 3 ? arguments[2].column->get64(0) : GEOHASH_MAX_TEXT_LENGTH); - - ColumnPtr res_column; - if (tryVectorConstant(longitude, latitude, precision_value, res_column) - || tryVectorConstant(longitude, latitude, precision_value, res_column) - || tryVectorConstant(longitude, latitude, precision_value, res_column) - || tryVectorConstant(longitude, latitude, precision_value, res_column)) - return res_column; - } - else - { - const IColumn * precision = arguments[2].column.get(); - ColumnPtr res_column; - if (tryVectorVector(longitude, latitude, precision, res_column) - || tryVectorVector(longitude, latitude, precision, res_column) - || tryVectorVector(longitude, latitude, precision, res_column) - || tryVectorVector(longitude, latitude, precision, res_column)) - return res_column; - - } - - std::string arguments_description; - for (size_t i = 0; i < arguments.size(); ++i) - { - if (i != 0) - arguments_description += ", "; - arguments_description += arguments[i].column->getName(); - } - - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported argument types: {} for function {}", - arguments_description, getName()); } }; diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/HDFS/HDFSCommon.cpp index 932e80831fe..7b149518c0a 100644 --- a/src/Storages/HDFS/HDFSCommon.cpp +++ b/src/Storages/HDFS/HDFSCommon.cpp @@ -38,8 +38,8 @@ HDFSFileInfo::~HDFSFileInfo() } -void HDFSBuilderWrapper::loadFromConfig(const Poco::Util::AbstractConfiguration & config, - const String & prefix, bool isUser) +void HDFSBuilderWrapper::loadFromConfig( + const Poco::Util::AbstractConfiguration & config, const String & prefix, [[maybe_unused]] bool isUser) { Poco::Util::AbstractConfiguration::Keys keys; From 39806657711f933c7e0d0fa04e8cc0e8cd769eaa Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 22 May 2023 15:58:28 +0800 Subject: [PATCH 0162/1997] fix uts --- tests/queries/0_stateless/00932_geohash_support.reference | 4 ++++ tests/queries/0_stateless/00932_geohash_support.sql | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00932_geohash_support.reference b/tests/queries/0_stateless/00932_geohash_support.reference index ffc290681c7..0dcb084eb6d 100644 --- a/tests/queries/0_stateless/00932_geohash_support.reference +++ b/tests/queries/0_stateless/00932_geohash_support.reference @@ -9,6 +9,10 @@ default precision: ezs42d000000 mixing const and non-const-columns: ezs42d000000 +ezs42d000000 +ezs42d000000 +ezs42d000000 +ezs42d000000 from table (with const precision): 1 6 Ok 1 6 Ok diff --git a/tests/queries/0_stateless/00932_geohash_support.sql b/tests/queries/0_stateless/00932_geohash_support.sql index aeed72176b9..89f8eba9ca2 100644 --- a/tests/queries/0_stateless/00932_geohash_support.sql +++ b/tests/queries/0_stateless/00932_geohash_support.sql @@ -24,7 +24,10 @@ select geohashEncode(-5.60302734375, 42.593994140625); select 'mixing const and non-const-columns:'; select geohashEncode(materialize(-5.60302734375), materialize(42.593994140625), 0); -select geohashEncode(materialize(-5.60302734375), materialize(42.593994140625), materialize(0)); -- { serverError 44 } +select geohashEncode(materialize(-5.60302734375), materialize(42.593994140625), materialize(0)); +select geohashEncode(-5.60302734375, materialize(42.593994140625), 0); +select geohashEncode(materialize(-5.60302734375), 42.593994140625, 0); +select geohashEncode(-5.60302734375, 42.593994140625, 0); select 'from table (with const precision):'; From 056e5824b57a78314b7ae565585ef0afea1bd836 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 22 May 2023 16:02:42 +0800 Subject: [PATCH 0163/1997] remove useless code --- src/Functions/geohashEncode.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index 5f225a96c2b..ff61bf7d27c 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -38,7 +38,6 @@ public: bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } - // ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } From ceecb1488af0fe4413053c87d9042b9d79602371 Mon Sep 17 00:00:00 2001 From: FFFFFFFHHHHHHH <916677625@qq.com> Date: Mon, 22 May 2023 17:29:52 +0800 Subject: [PATCH 0164/1997] add function arrayJaccardIndex --- src/Functions/array/arrayJaccardIndex.cpp | 161 ++++++++++++++++++ .../02737_arrayJaccardIndex.reference | 32 ++++ .../0_stateless/02737_arrayJaccardIndex.sql | 26 +++ 3 files changed, 219 insertions(+) create mode 100644 src/Functions/array/arrayJaccardIndex.cpp create mode 100644 tests/queries/0_stateless/02737_arrayJaccardIndex.reference create mode 100644 tests/queries/0_stateless/02737_arrayJaccardIndex.sql diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp new file mode 100644 index 00000000000..8cce98ab64d --- /dev/null +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -0,0 +1,161 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; +} + +class FunctionArrayJaccardIndex : public IFunction +{ +public: + using ResultType = Float64; + static constexpr auto name = "arrayJaccardIndex"; + String getName() const override { return name; } + static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } + explicit FunctionArrayJaccardIndex(ContextPtr context_) : context(context_) {} + size_t getNumberOfArguments() const override { return 2; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + DataTypes types; + for (size_t i = 0; i < 2; ++i) + { + const auto * array_type = checkAndGetDataType(arguments[i].get()); + if (!array_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be array, but it has type{}.", i + 1, getName(), arguments[i]->getName()); + } + return std::make_shared>(); + } + + template + static void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + { + size_t left_size; + size_t right_size; + for (size_t i = 0; i < res.size(); ++i) + { + if constexpr (is_const_left) + left_size = left_offsets[0]; + else + left_size = left_offsets[i] - left_offsets[i - 1]; + if constexpr (is_const_right) + right_size = right_offsets[0]; + else + right_size = right_offsets[i] - right_offsets[i - 1]; + + size_t intersect_size = intersect_offsets[i] - intersect_offsets[i - 1]; + res[i] = static_cast(intersect_size) / (left_size + right_size - intersect_size); + if (unlikely(isnan(res[i]))) + res[i] = 1; + } + } + + template + static void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + { + size_t left_size; + size_t right_size; + for (size_t i = 0; i < res.size(); ++i) + { + if constexpr (is_const_left) + left_size = left_offsets[0]; + else + left_size = left_offsets[i] - left_offsets[i - 1]; + if constexpr (is_const_right) + right_size = right_offsets[0]; + else + right_size = right_offsets[i] - right_offsets[i - 1]; + + res[i] = static_cast(left_size + right_size == 0); + } + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + bool is_const_left; + bool is_const_right; + const ColumnArray * left_array; + const ColumnArray * right_array; + + auto cast_array = [&](const ColumnWithTypeAndName & col) + { + const ColumnArray * res; + bool is_const = false; + if (typeid_cast(col.column.get())) + { + res = checkAndGetColumn(checkAndGetColumnConst(col.column.get())->getDataColumnPtr().get()); + is_const = true; + } + else if (!(res = checkAndGetColumn(col.column.get()))) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument for function {} must be array but it has type {}.", + col.column->getName(), getName()); + return std::make_pair(res, is_const); + }; + + std::tie(left_array, is_const_left) = cast_array(arguments[0]); + std::tie(right_array, is_const_right) = cast_array(arguments[1]); + + auto intersect_array = FunctionFactory::instance().get("arrayIntersect", context)->build(arguments); + ColumnWithTypeAndName intersect_column; + intersect_column.type = intersect_array->getResultType(); + intersect_column.column = intersect_array->execute(arguments, intersect_column.type, input_rows_count); + const auto * return_type_intersect = checkAndGetDataType(intersect_column.type.get()); + if (!return_type_intersect) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected return type for function arrayIntersect"); + + auto col_res = ColumnVector::create(); + typename ColumnVector::Container & vec_res = col_res->getData(); + vec_res.resize(input_rows_count); + +#define EXECUTE_VECTOR(is_const_left, is_const_right) \ + if (typeid_cast(return_type_intersect->getNestedType().get())) \ + vectorWithEmptyIntersect(left_array->getOffsets(), right_array->getOffsets(), vec_res); \ + else \ + { \ + const ColumnArray * col_array = checkAndGetColumn(intersect_column.column.get()); \ + vector(col_array->getOffsets(), left_array->getOffsets(), right_array->getOffsets(), vec_res); \ + } + + if (!is_const_left && !is_const_right) + EXECUTE_VECTOR(false, false) + else if (!is_const_left && is_const_right) + EXECUTE_VECTOR(false, true) + else if (is_const_left && !is_const_right) + EXECUTE_VECTOR(true, false) + else + EXECUTE_VECTOR(true, true) + +#undef EXECUTE_VECTOR + + return col_res; + } + +private: + ContextPtr context; +}; + +REGISTER_FUNCTION(ArrayJaccardIndex) +{ + factory.registerFunction(); +} + +} diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex.reference b/tests/queries/0_stateless/02737_arrayJaccardIndex.reference new file mode 100644 index 00000000000..e6934bfe092 --- /dev/null +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex.reference @@ -0,0 +1,32 @@ +0 +0.5 +1 +0.67 +1 +0 +0 +0 +1 +0 +0 +0 +0 +0.5 +1 +0.67 +0.5 +0.5 +0.5 +0.5 +1 +1 +1 +1 +1 +1 +1 +1 +0.33 +0.2 +1 +1 diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex.sql b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql new file mode 100644 index 00000000000..c3f04ba0b10 --- /dev/null +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql @@ -0,0 +1,26 @@ +drop table if exists array_jaccard_index; + +create table array_jaccard_index (arr Array(UInt8)) engine=MergeTree partition by arr order by arr; + +insert into array_jaccard_index values ([1,2,3]); +insert into array_jaccard_index values ([1,2]); +insert into array_jaccard_index values ([1]); +insert into array_jaccard_index values ([]); + +select round(arrayJaccardIndex(arr, [1,2]), 2) from array_jaccard_index order by arr; +select round(arrayJaccardIndex(arr, []), 2) from array_jaccard_index order by arr; +select round(arrayJaccardIndex([], arr), 2) from array_jaccard_index order by arr; +select round(arrayJaccardIndex([1,2], arr), 2) from array_jaccard_index order by arr; +select round(arrayJaccardIndex([1,2], [1,2,3,4]), 2) from array_jaccard_index order by arr; +select round(arrayJaccardIndex([], []), 2) from array_jaccard_index order by arr; +select round(arrayJaccardIndex(arr, arr), 2) from array_jaccard_index order by arr; + +drop table if exists array_jaccard_index; + +select round(arrayJaccardIndex(['a'], ['a', 'aa', 'aaa']), 2); + +select round(arrayJaccardIndex([1, 1.1, 2.2], [2.2, 3.3, 444]), 2); + +select round(arrayJaccardIndex([], []), 2); + +select round(arrayJaccardIndex([toUInt16(1)], [toUInt32(1)]), 2); From bde2cf96b135235908b71cc0bf071a175321dc4d Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 22 May 2023 12:24:16 +0000 Subject: [PATCH 0165/1997] Better --- programs/keeper/CMakeLists.txt | 7 +- programs/keeper/Keeper.cpp | 39 +++++--- programs/keeper/Keeper.h | 7 -- src/Coordination/Changelog.cpp | 55 +++++++---- src/Coordination/Changelog.h | 10 +- .../KeeperAsynchronousMetrics.cpp | 6 +- src/Coordination/KeeperAsynchronousMetrics.h | 6 +- src/Coordination/KeeperContext.cpp | 91 +++++++++++++++---- src/Coordination/KeeperContext.h | 16 +++- src/Coordination/KeeperLogStore.cpp | 9 +- src/Coordination/KeeperLogStore.h | 6 +- src/Coordination/KeeperServer.cpp | 21 +---- src/Coordination/KeeperSnapshotManager.cpp | 38 +++----- src/Coordination/KeeperSnapshotManager.h | 15 +-- src/Coordination/KeeperStateMachine.cpp | 2 - src/Coordination/KeeperStateMachine.h | 1 - src/Coordination/KeeperStateManager.cpp | 32 ++++--- src/Coordination/KeeperStateManager.h | 15 +-- .../{Context => Standalone}/Context.cpp | 58 ++++++++++++ .../{Context => Standalone}/Context.h | 9 ++ .../{Context => Standalone}/Settings.cpp | 0 .../ThreadStatusExt.cpp | 0 src/Coordination/TinyContext.cpp | 87 ------------------ src/Coordination/TinyContext.h | 36 -------- src/Interpreters/Context.h | 2 +- 25 files changed, 282 insertions(+), 286 deletions(-) rename src/Coordination/{Context => Standalone}/Context.cpp (80%) rename src/Coordination/{Context => Standalone}/Context.h (88%) rename src/Coordination/{Context => Standalone}/Settings.cpp (100%) rename src/Coordination/{Context => Standalone}/ThreadStatusExt.cpp (100%) delete mode 100644 src/Coordination/TinyContext.cpp delete mode 100644 src/Coordination/TinyContext.h diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index c0c0a6dd1b0..1b5b9e6a7b2 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -50,7 +50,6 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateManager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStorage.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperAsynchronousMetrics.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/TinyContext.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/pathUtils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SessionExpiryQueue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SummingStateMachine.cpp @@ -157,9 +156,9 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/GraphiteWriter.cpp ${CMAKE_CURRENT_BINARY_DIR}/../../src/Daemon/GitHash.generated.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Context/Context.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Context/Settings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Context/ThreadStatusExt.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/Context.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/Settings.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/ThreadStatusExt.cpp Keeper.cpp clickhouse-keeper.cpp diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 996c4678450..d5cf61daa6e 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -44,6 +44,8 @@ #include +#include + int mainEntryClickHouseKeeper(int argc, char ** argv) { @@ -280,9 +282,12 @@ void Keeper::defineOptions(Poco::Util::OptionSet & options) BaseDaemon::defineOptions(options); } -struct Keeper::KeeperHTTPContext : public IHTTPContext +namespace { - explicit KeeperHTTPContext(TinyContextPtr context_) + +struct KeeperHTTPContext : public IHTTPContext +{ + explicit KeeperHTTPContext(ContextPtr context_) : context(std::move(context_)) {} @@ -326,12 +331,14 @@ struct Keeper::KeeperHTTPContext : public IHTTPContext return {context->getConfigRef().getInt64("keeper_server.http_send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0}; } - TinyContextPtr context; + ContextPtr context; }; -HTTPContextPtr Keeper::httpContext() +HTTPContextPtr httpContext() { - return std::make_shared(tiny_context); + return std::make_shared(Context::getGlobalContextInstance()); +} + } int Keeper::main(const std::vector & /*args*/) @@ -419,12 +426,14 @@ try global_context->setPath(path); global_context->setRemoteHostFilter(config()); + if (config().has("macros")) + global_context->setMacros(std::make_unique(config(), "macros", log)); + registerDisks(/*global_skip_access_check=*/false); - tiny_context = std::make_shared(); /// This object will periodically calculate some metrics. KeeperAsynchronousMetrics async_metrics( - tiny_context, + global_context, config().getUInt("asynchronous_metrics_update_period_s", 1), [&]() -> std::vector { @@ -449,12 +458,12 @@ try } /// Initialize keeper RAFT. Do nothing if no keeper_server in config. - tiny_context->initializeKeeperDispatcher(/* start_async = */ true); - FourLetterCommandFactory::registerCommands(*tiny_context->getKeeperDispatcher()); + global_context->initializeKeeperDispatcher(/* start_async = */ true); + FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher()); - auto config_getter = [this] () -> const Poco::Util::AbstractConfiguration & + auto config_getter = [&] () -> const Poco::Util::AbstractConfiguration & { - return tiny_context->getConfigRef(); + return global_context->getConfigRef(); }; auto tcp_receive_timeout = config().getInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC); @@ -476,7 +485,7 @@ try "Keeper (tcp): " + address.toString(), std::make_unique( new KeeperTCPHandlerFactory( - config_getter, tiny_context->getKeeperDispatcher(), + config_getter, global_context->getKeeperDispatcher(), tcp_receive_timeout, tcp_send_timeout, false), server_pool, socket)); }); @@ -494,7 +503,7 @@ try "Keeper with secure protocol (tcp_secure): " + address.toString(), std::make_unique( new KeeperTCPHandlerFactory( - config_getter, tiny_context->getKeeperDispatcher(), + config_getter, global_context->getKeeperDispatcher(), tcp_receive_timeout, tcp_send_timeout, true), server_pool, socket)); #else UNUSED(port); @@ -546,7 +555,7 @@ try [&](ConfigurationPtr config, bool /* initial_loading */) { if (config->has("keeper_server")) - tiny_context->updateKeeperConfiguration(*config); + global_context->updateKeeperConfiguration(*config); }, /* already_loaded = */ false); /// Reload it right now (initial loading) @@ -577,7 +586,7 @@ try else LOG_INFO(log, "Closed connections to Keeper."); - tiny_context->shutdownKeeperDispatcher(); + global_context->shutdownKeeperDispatcher(); /// Wait server pool to avoid use-after-free of destroyed context in the handlers server_pool.joinAll(); diff --git a/programs/keeper/Keeper.h b/programs/keeper/Keeper.h index 8a7724acb85..f889ffa595b 100644 --- a/programs/keeper/Keeper.h +++ b/programs/keeper/Keeper.h @@ -1,9 +1,7 @@ #pragma once #include -#include #include -#include namespace Poco { @@ -68,11 +66,6 @@ protected: std::string getDefaultConfigFileName() const override; private: - TinyContextPtr tiny_context; - - struct KeeperHTTPContext; - HTTPContextPtr httpContext(); - Poco::Net::SocketAddress socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure = false) const; using CreateServerFunc = std::function; diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 1fda760cab0..852a21c1c45 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -85,17 +86,19 @@ class ChangelogWriter public: ChangelogWriter( std::map & existing_changelogs_, - DiskPtr disk_, + KeeperContextPtr keeper_context_, LogFileSettings log_file_settings_) : existing_changelogs(existing_changelogs_) , log_file_settings(log_file_settings_) - , disk(disk_) + , keeper_context(std::move(keeper_context_)) , log(&Poco::Logger::get("Changelog")) { } void setFile(ChangelogFileDescriptionPtr file_description, WriteMode mode) { + auto disk = getDisk(); + try { if (mode == WriteMode::Append && file_description->expectedEntriesCountInLog() != log_file_settings.rotate_interval) @@ -146,7 +149,7 @@ public: /// There is bug when compressed_buffer has value, file_buf's ownership transfer to compressed_buffer bool isFileSet() const { - return compressed_buffer.get() != nullptr || file_buf.get() != nullptr; + return compressed_buffer != nullptr || file_buf != nullptr; } bool appendRecord(ChangelogRecord && record) @@ -276,7 +279,7 @@ private: const auto * file_buffer = tryGetFileBuffer(); - if (log_file_settings.max_size != 0) + if (log_file_settings.max_size != 0 && isLocalDisk()) { int res = -1; do @@ -354,11 +357,12 @@ private: { initial_file_size = 0; prealloc_done = true; - LOG_WARNING(log, "Could not preallocate space on disk {} using fallocate", disk->getName()); + LOG_WARNING(log, "Could not preallocate space on disk {} using fallocate", getDisk()->getName()); return; } #ifdef OS_LINUX + if (isLocalDisk()) { int res = -1; do @@ -383,6 +387,16 @@ private: prealloc_done = true; } + DiskPtr getDisk() const + { + return keeper_context->getLogDisk(); + } + + bool isLocalDisk() const + { + return dynamic_cast(getDisk().get()) != nullptr; + } + std::map & existing_changelogs; ChangelogFileDescriptionPtr current_file_description{nullptr}; @@ -396,7 +410,7 @@ private: LogFileSettings log_file_settings; - DiskPtr disk; + KeeperContextPtr keeper_context; Poco::Logger * const log; }; @@ -533,18 +547,20 @@ private: }; Changelog::Changelog( - DiskPtr disk_, Poco::Logger * log_, - LogFileSettings log_file_settings) - : disk(disk_) - , changelogs_detached_dir("detached") + LogFileSettings log_file_settings, + KeeperContextPtr keeper_context_) + : changelogs_detached_dir("detached") , rotate_interval(log_file_settings.rotate_interval) , log(log_) , write_operations(std::numeric_limits::max()) , append_completion_queue(std::numeric_limits::max()) + , keeper_context(std::move(keeper_context_)) { /// Load all files in changelog directory + auto disk = getDisk(); + for (auto it = disk->iterateDirectory(""); it->isValid(); it->next()) { if (it->name() == changelogs_detached_dir) @@ -564,7 +580,7 @@ Changelog::Changelog( append_completion_thread = ThreadFromGlobalPool([this] { appendCompletionThread(); }); current_writer = std::make_unique( - existing_changelogs, disk, log_file_settings); + existing_changelogs, keeper_context, log_file_settings); } void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uint64_t logs_to_keep) @@ -636,7 +652,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin break; } - ChangelogReader reader(disk, changelog_description.path); + ChangelogReader reader(getDisk(), changelog_description.path); last_log_read_result = reader.readChangelog(logs, start_to_read_from, log); last_log_read_result->log_start_index = changelog_description.from_log_index; @@ -703,7 +719,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin if (last_log_read_result->last_read_index == 0 || last_log_read_result->error) /// If it's broken log then remove it { LOG_INFO(log, "Removing chagelog {} because it's empty or read finished with error", description->path); - disk->removeFile(description->path); + getDisk()->removeFile(description->path); existing_changelogs.erase(last_log_read_result->log_start_index); std::erase_if(logs, [last_log_read_result](const auto & item) { return item.first >= last_log_read_result->log_start_index; }); } @@ -748,8 +764,15 @@ std::string getCurrentTimestampFolder() } +DiskPtr Changelog::getDisk() const +{ + return keeper_context->getLogDisk(); +} + void Changelog::removeExistingLogs(ChangelogIter begin, ChangelogIter end) { + auto disk = getDisk(); + const auto timestamp_folder = (fs::path(changelogs_detached_dir) / getCurrentTimestampFolder()).generic_string(); for (auto itr = begin; itr != end;) @@ -920,7 +943,7 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry) auto to_remove_itr = existing_changelogs.upper_bound(index); for (auto itr = to_remove_itr; itr != existing_changelogs.end();) { - disk->removeFile(itr->second->path); + getDisk()->removeFile(itr->second->path); itr = existing_changelogs.erase(itr); } } @@ -974,7 +997,7 @@ void Changelog::compact(uint64_t up_to_log_index) { try { - disk->removeFile(itr->second->path); + getDisk()->removeFile(itr->second->path); LOG_INFO(log, "Removed changelog {} because of compaction.", itr->second->path); } catch (Exception & e) @@ -1179,7 +1202,7 @@ void Changelog::cleanLogThread() { try { - disk->removeFile(path); + getDisk()->removeFile(path); LOG_INFO(log, "Removed changelog {} because of compaction.", path); } catch (Exception & e) diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index 7204c80a356..6f0c4e45605 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB { @@ -87,9 +88,9 @@ class Changelog { public: Changelog( - DiskPtr disk_, Poco::Logger * log_, - LogFileSettings log_file_settings); + LogFileSettings log_file_settings, + KeeperContextPtr keeper_context_); Changelog(Changelog &&) = delete; @@ -152,6 +153,8 @@ private: /// Pack log_entry into changelog record static ChangelogRecord buildRecord(uint64_t index, const LogEntryPtr & log_entry); + DiskPtr getDisk() const; + /// Currently existing changelogs std::map existing_changelogs; @@ -169,7 +172,6 @@ private: /// Clean useless log files in a background thread void cleanLogThread(); - DiskPtr disk; const String changelogs_detached_dir; const uint64_t rotate_interval; Poco::Logger * log; @@ -223,6 +225,8 @@ private: nuraft::wptr raft_server; + KeeperContextPtr keeper_context; + bool initialized = false; }; diff --git a/src/Coordination/KeeperAsynchronousMetrics.cpp b/src/Coordination/KeeperAsynchronousMetrics.cpp index 2d523a26dcc..1427130b184 100644 --- a/src/Coordination/KeeperAsynchronousMetrics.cpp +++ b/src/Coordination/KeeperAsynchronousMetrics.cpp @@ -108,8 +108,8 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM } KeeperAsynchronousMetrics::KeeperAsynchronousMetrics( - TinyContextPtr tiny_context_, int update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_) - : AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_), tiny_context(std::move(tiny_context_)) + ContextPtr context_, int update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_) + : AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_), context(std::move(context_)) { } @@ -117,7 +117,7 @@ void KeeperAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values { #if USE_NURAFT { - auto keeper_dispatcher = tiny_context->tryGetKeeperDispatcher(); + auto keeper_dispatcher = context->tryGetKeeperDispatcher(); if (keeper_dispatcher) updateKeeperInformation(*keeper_dispatcher, new_values); } diff --git a/src/Coordination/KeeperAsynchronousMetrics.h b/src/Coordination/KeeperAsynchronousMetrics.h index 8fa27336bc5..14092c11c15 100644 --- a/src/Coordination/KeeperAsynchronousMetrics.h +++ b/src/Coordination/KeeperAsynchronousMetrics.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include namespace DB @@ -13,10 +13,10 @@ class KeeperAsynchronousMetrics : public AsynchronousMetrics { public: KeeperAsynchronousMetrics( - TinyContextPtr tiny_context_, int update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_); + ContextPtr context_, int update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_); private: - TinyContextPtr tiny_context; + ContextPtr context; void updateImpl(AsynchronousMetricValues & new_values, TimePoint update_time, TimePoint current_time) override; }; diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index 9e504f5aa07..ff3f96f199b 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -20,10 +20,10 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config) disk_selector->initialize(config, "storage_configuration.disks", Context::getGlobalContextInstance()); - log_storage_path = getLogsPathFromConfig(config); - snapshot_storage_path = getSnapshotsPathFromConfig(config); + log_storage = getLogsPathFromConfig(config); + snapshot_storage = getSnapshotsPathFromConfig(config); - state_file_path = getStateFilePathFromConfig(config); + state_file_storage = getStatePathFromConfig(config); } KeeperContext::Phase KeeperContext::getServerState() const @@ -51,54 +51,109 @@ void KeeperContext::setDigestEnabled(bool digest_enabled_) digest_enabled = digest_enabled_; } +DiskPtr KeeperContext::getDisk(const Storage & storage) const +{ + if (const auto * storage_disk = std::get_if(&storage)) + return *storage_disk; + + const auto & disk_name = std::get(storage); + + return disk_selector->get(disk_name); +} + +DiskPtr KeeperContext::getLogDisk() const +{ + return getDisk(log_storage); +} + +DiskPtr KeeperContext::getSnapshotsDisk() const +{ + return getDisk(snapshot_storage); +} + +DiskPtr KeeperContext::getStateFileDisk() const +{ + return getDisk(state_file_storage); +} + KeeperContext::Storage KeeperContext::getLogsPathFromConfig(const Poco::Util::AbstractConfiguration & config) const { + const auto create_local_disk = [](const auto & path) + { + if (!fs::exists(path)) + fs::create_directories(path); + + return std::make_shared("LogDisk", path, 0); + }; + /// the most specialized path if (config.has("keeper_server.log_storage_path")) - return std::make_shared("LogDisk", config.getString("keeper_server.log_storage_path"), 0); + return create_local_disk(config.getString("keeper_server.log_storage_path")); if (config.has("keeper_server.log_storage_disk")) return config.getString("keeper_server.log_storage_disk"); if (config.has("keeper_server.storage_path")) - return std::make_shared("LogDisk", std::filesystem::path{config.getString("keeper_server.storage_path")} / "logs", 0); + return create_local_disk(std::filesystem::path{config.getString("keeper_server.storage_path")} / "logs"); if (standalone_keeper) - return std::make_shared("LogDisk", std::filesystem::path{config.getString("path", KEEPER_DEFAULT_PATH)} / "logs", 0); + return create_local_disk(std::filesystem::path{config.getString("path", KEEPER_DEFAULT_PATH)} / "logs"); else - return std::make_shared("LogDisk", std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination/logs", 0); + return create_local_disk(std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination/logs"); } -std::string KeeperContext::getSnapshotsPathFromConfig(const Poco::Util::AbstractConfiguration & config) +KeeperContext::Storage KeeperContext::getSnapshotsPathFromConfig(const Poco::Util::AbstractConfiguration & config) const { + const auto create_local_disk = [](const auto & path) + { + if (!fs::exists(path)) + fs::create_directories(path); + + return std::make_shared("SnapshotDisk", path, 0); + }; + /// the most specialized path if (config.has("keeper_server.snapshot_storage_path")) - return config.getString("keeper_server.snapshot_storage_path"); + return create_local_disk(config.getString("keeper_server.snapshot_storage_path")); + + if (config.has("keeper_server.snapshot_storage_disk")) + return config.getString("keeper_server.snapshot_storage_disk"); if (config.has("keeper_server.storage_path")) - return std::filesystem::path{config.getString("keeper_server.storage_path")} / "snapshots"; + return create_local_disk(std::filesystem::path{config.getString("keeper_server.storage_path")} / "snapshots"); if (standalone_keeper) - return std::filesystem::path{config.getString("path", KEEPER_DEFAULT_PATH)} / "snapshots"; + return create_local_disk(std::filesystem::path{config.getString("path", KEEPER_DEFAULT_PATH)} / "snapshots"); else - return std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination/snapshots"; + return create_local_disk(std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination/snapshots"); } -std::string KeeperContext::getStateFilePathFromConfig(const Poco::Util::AbstractConfiguration & config) +KeeperContext::Storage KeeperContext::getStatePathFromConfig(const Poco::Util::AbstractConfiguration & config) const { + const auto create_local_disk = [](const auto & path) + { + if (!fs::exists(path)) + fs::create_directories(path); + + return std::make_shared("SnapshotDisk", path, 0); + }; + + if (config.has("keeper_server.state_storage_disk")) + return config.getString("keeper_server.state_storage_disk"); + if (config.has("keeper_server.storage_path")) - return std::filesystem::path{config.getString("keeper_server.storage_path")} / "state"; + return create_local_disk(std::filesystem::path{config.getString("keeper_server.storage_path")}); if (config.has("keeper_server.snapshot_storage_path")) - return std::filesystem::path(config.getString("keeper_server.snapshot_storage_path")).parent_path() / "state"; + return create_local_disk(std::filesystem::path(config.getString("keeper_server.snapshot_storage_path")).parent_path()); if (config.has("keeper_server.log_storage_path")) - return std::filesystem::path(config.getString("keeper_server.log_storage_path")).parent_path() / "state"; + return create_local_disk(std::filesystem::path(config.getString("keeper_server.log_storage_path")).parent_path()); if (standalone_keeper) - return std::filesystem::path{config.getString("path", KEEPER_DEFAULT_PATH)} / "state"; + return create_local_disk(std::filesystem::path{config.getString("path", KEEPER_DEFAULT_PATH)}); else - return std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination/state"; + return create_local_disk(std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination"); } } diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index 2a215f9d58f..1fc01f12bba 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -31,13 +31,19 @@ public: bool digestEnabled() const; void setDigestEnabled(bool digest_enabled_); + + DiskPtr getLogDisk() const; + DiskPtr getSnapshotsDisk() const; + DiskPtr getStateFileDisk() const; private: /// local disk defined using path or disk name using Storage = std::variant; Storage getLogsPathFromConfig(const Poco::Util::AbstractConfiguration & config) const; - std::string getSnapshotsPathFromConfig(const Poco::Util::AbstractConfiguration & config); - std::string getStateFilePathFromConfig(const Poco::Util::AbstractConfiguration & config); + Storage getSnapshotsPathFromConfig(const Poco::Util::AbstractConfiguration & config) const; + Storage getStatePathFromConfig(const Poco::Util::AbstractConfiguration & config) const; + + DiskPtr getDisk(const Storage & storage) const; Phase server_state{Phase::INIT}; @@ -46,9 +52,9 @@ private: std::shared_ptr disk_selector; - Storage log_storage_path; - Storage snapshot_storage_path; - Storage state_file_path; + Storage log_storage; + Storage snapshot_storage; + Storage state_file_storage; bool standalone_keeper; }; diff --git a/src/Coordination/KeeperLogStore.cpp b/src/Coordination/KeeperLogStore.cpp index 7c4f76e0180..a9153475dbc 100644 --- a/src/Coordination/KeeperLogStore.cpp +++ b/src/Coordination/KeeperLogStore.cpp @@ -6,9 +6,9 @@ namespace DB { -KeeperLogStore::KeeperLogStore(DiskPtr disk_, LogFileSettings log_file_settings) +KeeperLogStore::KeeperLogStore(LogFileSettings log_file_settings, KeeperContextPtr keeper_context) : log(&Poco::Logger::get("KeeperLogStore")) - , changelog(disk_, log, log_file_settings) + , changelog(log, log_file_settings, keeper_context) { if (log_file_settings.force_sync) LOG_INFO(log, "force_sync enabled"); @@ -16,11 +16,6 @@ KeeperLogStore::KeeperLogStore(DiskPtr disk_, LogFileSettings log_file_settings) LOG_INFO(log, "force_sync disabled"); } -KeeperLogStore::KeeperLogStore(const std::string & changelogs_path, LogFileSettings log_file_settings) - : KeeperLogStore(std::make_shared("Keeper-logs", changelogs_path, 0), log_file_settings) -{ -} - uint64_t KeeperLogStore::start_index() const { std::lock_guard lock(changelog_lock); diff --git a/src/Coordination/KeeperLogStore.h b/src/Coordination/KeeperLogStore.h index 2902de129e7..6e71d8c55cf 100644 --- a/src/Coordination/KeeperLogStore.h +++ b/src/Coordination/KeeperLogStore.h @@ -4,6 +4,7 @@ #include #include #include +#include #include namespace DB @@ -13,10 +14,7 @@ namespace DB class KeeperLogStore : public nuraft::log_store { public: - KeeperLogStore(DiskPtr disk_, LogFileSettings log_file_settings); - - /// For gtest - KeeperLogStore(const std::string & changelogs_path, LogFileSettings log_file_settings); + KeeperLogStore(LogFileSettings log_file_settings, KeeperContextPtr keeper_context); /// Read log storage from filesystem starting from last_commited_log_index void init(uint64_t last_commited_log_index, uint64_t logs_to_keep); diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index d63593436f4..4cacf566df6 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -122,39 +122,22 @@ KeeperServer::KeeperServer( keeper_context->initialize(config); - //if (!fs::exists(keeper_context->snapshot_storage_path)) - // fs::create_directories(keeper_context->snapshot_storage_path); - auto snapshots_disk = std::make_shared("Keeper-snapshots", "", 0); - state_machine = nuraft::cs_new( responses_queue_, snapshots_queue_, - snapshots_disk, coordination_settings, keeper_context, config.getBool("keeper_server.upload_snapshot_on_exit", true) ? &snapshot_manager_s3 : nullptr, commit_callback, checkAndGetSuperdigest(configuration_and_settings_->super_digest)); - //auto state_path = fs::path(keeper_context->state_file_path).parent_path().generic_string(); - //auto state_file_name = fs::path(configuration_and_settings_->state_file_path).filename().generic_string(); - - //if (!fs::exists(state_path)) - // fs::create_directories(state_path); - auto state_disk = std::make_shared("Keeper-state", "", 0); - - //if (!fs::exists(configuration_and_settings_->log_storage_path)) - // fs::create_directories(configuration_and_settings_->log_storage_path); - auto logs_disk = std::make_shared("Keeper-logs", "", 0); - state_manager = nuraft::cs_new( server_id, "keeper_server", - logs_disk, - state_disk, "state", config, - coordination_settings); + coordination_settings, + keeper_context); } /** diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index e1c0c034cff..d47ea475c42 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -508,36 +508,18 @@ KeeperStorageSnapshot::~KeeperStorageSnapshot() } KeeperSnapshotManager::KeeperSnapshotManager( - const std::string & snapshots_path_, size_t snapshots_to_keep_, const KeeperContextPtr & keeper_context_, bool compress_snapshots_zstd_, const std::string & superdigest_, size_t storage_tick_time_) - : KeeperSnapshotManager( - std::make_shared("Keeper-snapshots", snapshots_path_, 0), - snapshots_to_keep_, - keeper_context_, - compress_snapshots_zstd_, - superdigest_, - storage_tick_time_) -{ -} - -KeeperSnapshotManager::KeeperSnapshotManager( - DiskPtr disk_, - size_t snapshots_to_keep_, - const KeeperContextPtr & keeper_context_, - bool compress_snapshots_zstd_, - const std::string & superdigest_, - size_t storage_tick_time_) - : disk(disk_) - , snapshots_to_keep(snapshots_to_keep_) + : snapshots_to_keep(snapshots_to_keep_) , compress_snapshots_zstd(compress_snapshots_zstd_) , superdigest(superdigest_) , storage_tick_time(storage_tick_time_) , keeper_context(keeper_context_) { + auto disk = getDisk(); for (auto it = disk->iterateDirectory(""); it->isValid(); it->next()) { const auto & name = it->name(); @@ -566,6 +548,7 @@ std::string KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer auto snapshot_file_name = getSnapshotFileName(up_to_log_idx, compress_snapshots_zstd); auto tmp_snapshot_file_name = "tmp_" + snapshot_file_name; + auto disk = getDisk(); auto plain_buf = disk->writeFile(tmp_snapshot_file_name); copyData(reader, *plain_buf); plain_buf->sync(); @@ -589,7 +572,7 @@ nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBuff } catch (const DB::Exception &) { - disk->removeFile(latest_itr->second); + getDisk()->removeFile(latest_itr->second); existing_snapshots.erase(latest_itr->first); tryLogCurrentException(__PRETTY_FUNCTION__); } @@ -602,7 +585,7 @@ nuraft::ptr KeeperSnapshotManager::deserializeSnapshotBufferFrom { const std::string & snapshot_path = existing_snapshots.at(up_to_log_idx); WriteBufferFromNuraftBuffer writer; - auto reader = disk->readFile(snapshot_path); + auto reader = getDisk()->readFile(snapshot_path); copyData(*reader, writer); return writer.getBuffer(); } @@ -664,6 +647,11 @@ SnapshotDeserializationResult KeeperSnapshotManager::restoreFromLatestSnapshot() return deserializeSnapshotFromBuffer(buffer); } +DiskPtr KeeperSnapshotManager::getDisk() const +{ + return keeper_context->getSnapshotsDisk(); +} + void KeeperSnapshotManager::removeOutdatedSnapshotsIfNeeded() { while (existing_snapshots.size() > snapshots_to_keep) @@ -675,7 +663,7 @@ void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx) auto itr = existing_snapshots.find(log_idx); if (itr == existing_snapshots.end()) throw Exception(ErrorCodes::UNKNOWN_SNAPSHOT, "Unknown snapshot with log index {}", log_idx); - disk->removeFile(itr->second); + getDisk()->removeFile(itr->second); existing_snapshots.erase(itr); } @@ -685,7 +673,7 @@ std::pair KeeperSnapshotManager::serializeSnapshot auto snapshot_file_name = getSnapshotFileName(up_to_log_idx, compress_snapshots_zstd); auto tmp_snapshot_file_name = "tmp_" + snapshot_file_name; - auto writer = disk->writeFile(tmp_snapshot_file_name); + auto writer = getDisk()->writeFile(tmp_snapshot_file_name); std::unique_ptr compressed_writer; if (compress_snapshots_zstd) compressed_writer = wrapWriteBufferWithCompressionMethod(std::move(writer), CompressionMethod::Zstd, 3); @@ -700,7 +688,7 @@ std::pair KeeperSnapshotManager::serializeSnapshot try { - disk->moveFile(tmp_snapshot_file_name, snapshot_file_name); + getDisk()->moveFile(tmp_snapshot_file_name, snapshot_file_name); } catch (fs::filesystem_error & e) { diff --git a/src/Coordination/KeeperSnapshotManager.h b/src/Coordination/KeeperSnapshotManager.h index 9babad9ed98..7b1129018d8 100644 --- a/src/Coordination/KeeperSnapshotManager.h +++ b/src/Coordination/KeeperSnapshotManager.h @@ -99,16 +99,6 @@ class KeeperSnapshotManager { public: KeeperSnapshotManager( - DiskPtr disk_, - size_t snapshots_to_keep_, - const KeeperContextPtr & keeper_context_, - bool compress_snapshots_zstd_ = true, - const std::string & superdigest_ = "", - size_t storage_tick_time_ = 500); - - /// For gtest - KeeperSnapshotManager( - const std::string & snapshots_path_, size_t snapshots_to_keep_, const KeeperContextPtr & keeper_context_, bool compress_snapshots_zstd_ = true, @@ -157,7 +147,7 @@ public: try { - if (disk->exists(path)) + if (getDisk()->exists(path)) return path; } catch (...) @@ -170,11 +160,12 @@ public: private: void removeOutdatedSnapshotsIfNeeded(); + DiskPtr getDisk() const; + /// Checks first 4 buffer bytes to became sure that snapshot compressed with /// ZSTD codec. static bool isZstdCompressed(nuraft::ptr buffer); - DiskPtr disk; /// How many snapshots to keep before remove const size_t snapshots_to_keep; /// All existing snapshots in our path (log_index -> path) diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index a7c845e5017..69c15db51da 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -43,7 +43,6 @@ namespace KeeperStateMachine::KeeperStateMachine( ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, - DiskPtr disk_, const CoordinationSettingsPtr & coordination_settings_, const KeeperContextPtr & keeper_context_, KeeperSnapshotManagerS3 * snapshot_manager_s3_, @@ -52,7 +51,6 @@ KeeperStateMachine::KeeperStateMachine( : commit_callback(commit_callback_) , coordination_settings(coordination_settings_) , snapshot_manager( - disk_, coordination_settings->snapshots_to_keep, keeper_context_, coordination_settings->compress_snapshots_with_zstd_format, diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 1fe4db4de68..afe11150f36 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -26,7 +26,6 @@ public: KeeperStateMachine( ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, - DiskPtr disk_, const CoordinationSettingsPtr & coordination_settings_, const KeeperContextPtr & keeper_context_, KeeperSnapshotManagerS3 * snapshot_manager_s3_, diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index bcd5ce89e49..352e67e7c43 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -212,12 +212,14 @@ KeeperStateManager::parseServersConfiguration(const Poco::Util::AbstractConfigur return result; } -KeeperStateManager::KeeperStateManager( - int server_id_, const std::string & host, int port, const std::string & logs_path, const std::string & state_file_path) +KeeperStateManager::KeeperStateManager(int server_id_, const std::string & host, int port, KeeperContextPtr keeper_context_) : my_server_id(server_id_) , secure(false) - , log_store(nuraft::cs_new(std::make_shared("Keeper-logs", logs_path, 0), LogFileSettings{.force_sync =false, .compress_logs = false, .rotate_interval = 5000})) - , server_state_file_name(fs::path(state_file_path).filename().generic_string()) + , log_store(nuraft::cs_new( + LogFileSettings{.force_sync = false, .compress_logs = false, .rotate_interval = 5000}, + keeper_context_)) + , server_state_file_name("state") + , keeper_context(keeper_context_) , logger(&Poco::Logger::get("KeeperStateManager")) { auto peer_config = nuraft::cs_new(my_server_id, host + ":" + std::to_string(port)); @@ -230,17 +232,15 @@ KeeperStateManager::KeeperStateManager( KeeperStateManager::KeeperStateManager( int my_server_id_, const std::string & config_prefix_, - DiskPtr log_disk_, - DiskPtr state_disk_, const std::string & server_state_file_name_, const Poco::Util::AbstractConfiguration & config, - const CoordinationSettingsPtr & coordination_settings) + const CoordinationSettingsPtr & coordination_settings, + KeeperContextPtr keeper_context_) : my_server_id(my_server_id_) , secure(config.getBool(config_prefix_ + ".raft_configuration.secure", false)) , config_prefix(config_prefix_) , configuration_wrapper(parseServersConfiguration(config, false)) , log_store(nuraft::cs_new( - log_disk_, LogFileSettings { .force_sync = coordination_settings->force_sync, @@ -248,9 +248,10 @@ KeeperStateManager::KeeperStateManager( .rotate_interval = coordination_settings->rotate_log_storage_interval, .max_size = coordination_settings->max_log_file_size, .overallocate_size = coordination_settings->log_file_overallocate_size - })) - , disk(state_disk_) + }, + keeper_context_)) , server_state_file_name(server_state_file_name_) + , keeper_context(keeper_context_) , logger(&Poco::Logger::get("KeeperStateManager")) { } @@ -299,6 +300,11 @@ const String & KeeperStateManager::getOldServerStatePath() return old_path; } +DiskPtr KeeperStateManager::getStateFileDisk() const +{ + return keeper_context->getStateFileDisk(); +} + namespace { enum ServerStateVersion : uint8_t @@ -314,6 +320,8 @@ void KeeperStateManager::save_state(const nuraft::srv_state & state) { const auto & old_path = getOldServerStatePath(); + auto disk = getStateFileDisk(); + if (disk->exists(server_state_file_name)) disk->moveFile(server_state_file_name, old_path); @@ -338,7 +346,9 @@ nuraft::ptr KeeperStateManager::read_state() { const auto & old_path = getOldServerStatePath(); - const auto try_read_file = [this](const auto & path) -> nuraft::ptr + auto disk = getStateFileDisk(); + + const auto try_read_file = [&](const auto & path) -> nuraft::ptr { try { diff --git a/src/Coordination/KeeperStateManager.h b/src/Coordination/KeeperStateManager.h index d8369100d1c..f24f0c2b1e5 100644 --- a/src/Coordination/KeeperStateManager.h +++ b/src/Coordination/KeeperStateManager.h @@ -39,19 +39,17 @@ public: KeeperStateManager( int server_id_, const std::string & config_prefix_, - DiskPtr logs_disk_, - DiskPtr state_disk_, - const std::string & state_file_path, + const std::string & server_state_file_name_, const Poco::Util::AbstractConfiguration & config, - const CoordinationSettingsPtr & coordination_settings); + const CoordinationSettingsPtr & coordination_settings, + KeeperContextPtr keeper_context_); /// Constructor for tests KeeperStateManager( int server_id_, const std::string & host, int port, - const std::string & logs_path, - const std::string & state_file_path); + KeeperContextPtr keeper_context_); void loadLogStore(uint64_t last_commited_index, uint64_t logs_to_keep); @@ -114,6 +112,8 @@ public: private: const String & getOldServerStatePath(); + DiskPtr getStateFileDisk() const; + /// Wrapper struct for Keeper cluster config. We parse this /// info from XML files. struct KeeperConfigurationWrapper @@ -137,9 +137,10 @@ private: nuraft::ptr log_store; - DiskPtr disk; const String server_state_file_name; + KeeperContextPtr keeper_context; + Poco::Logger * logger; public: diff --git a/src/Coordination/Context/Context.cpp b/src/Coordination/Standalone/Context.cpp similarity index 80% rename from src/Coordination/Context/Context.cpp rename to src/Coordination/Standalone/Context.cpp index aeb4e405938..667fabf4d66 100644 --- a/src/Coordination/Context/Context.cpp +++ b/src/Coordination/Standalone/Context.cpp @@ -38,6 +38,9 @@ struct ContextSharedPart : boost::noncopyable /// For access of most of shared objects. Recursive mutex. mutable std::recursive_mutex mutex; + mutable std::mutex keeper_dispatcher_mutex; + mutable std::shared_ptr keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex); + ServerSettings server_settings; String path; /// Path to the data directory, with a slash at the end. @@ -129,6 +132,11 @@ MultiVersion::Version Context::getMacros() const return shared->macros.get(); } +void Context::setMacros(std::unique_ptr && macros) +{ + shared->macros.set(std::move(macros)); +} + BackgroundSchedulePool & Context::getSchedulePool() const { auto lock = getLock(); @@ -256,4 +264,54 @@ ReadSettings Context::getReadSettings() const return ReadSettings{}; } +void Context::initializeKeeperDispatcher([[maybe_unused]] bool start_async) const +{ + const auto & config_ref = getConfigRef(); + + std::lock_guard lock(shared->keeper_dispatcher_mutex); + + if (shared->keeper_dispatcher) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to initialize Keeper multiple times"); + + if (config_ref.has("keeper_server")) + { + shared->keeper_dispatcher = std::make_shared(); + shared->keeper_dispatcher->initialize(config_ref, true, start_async, getMacros()); + } +} + +std::shared_ptr Context::getKeeperDispatcher() const +{ + std::lock_guard lock(shared->keeper_dispatcher_mutex); + if (!shared->keeper_dispatcher) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Keeper must be initialized before requests"); + + return shared->keeper_dispatcher; +} + +std::shared_ptr Context::tryGetKeeperDispatcher() const +{ + std::lock_guard lock(shared->keeper_dispatcher_mutex); + return shared->keeper_dispatcher; +} + +void Context::shutdownKeeperDispatcher() const +{ + std::lock_guard lock(shared->keeper_dispatcher_mutex); + if (shared->keeper_dispatcher) + { + shared->keeper_dispatcher->shutdown(); + shared->keeper_dispatcher.reset(); + } +} + +void Context::updateKeeperConfiguration([[maybe_unused]] const Poco::Util::AbstractConfiguration & config_) +{ + std::lock_guard lock(shared->keeper_dispatcher_mutex); + if (!shared->keeper_dispatcher) + return; + + shared->keeper_dispatcher->updateConfiguration(getConfigRef(), getMacros()); +} + } diff --git a/src/Coordination/Context/Context.h b/src/Coordination/Standalone/Context.h similarity index 88% rename from src/Coordination/Context/Context.h rename to src/Coordination/Standalone/Context.h index 683209d942e..c2eee981aaa 100644 --- a/src/Coordination/Context/Context.h +++ b/src/Coordination/Standalone/Context.h @@ -2,6 +2,8 @@ #include +#include + #include #include @@ -85,6 +87,7 @@ public: void setPath(const String & path); MultiVersion::Version getMacros() const; + void setMacros(std::unique_ptr && macros); BackgroundSchedulePool & getSchedulePool() const; @@ -106,6 +109,12 @@ public: ThrottlerPtr getLocalWriteThrottler() const; ReadSettings getReadSettings() const; + + std::shared_ptr getKeeperDispatcher() const; + std::shared_ptr tryGetKeeperDispatcher() const; + void initializeKeeperDispatcher(bool start_async) const; + void shutdownKeeperDispatcher() const; + void updateKeeperConfiguration(const Poco::Util::AbstractConfiguration & config); }; } diff --git a/src/Coordination/Context/Settings.cpp b/src/Coordination/Standalone/Settings.cpp similarity index 100% rename from src/Coordination/Context/Settings.cpp rename to src/Coordination/Standalone/Settings.cpp diff --git a/src/Coordination/Context/ThreadStatusExt.cpp b/src/Coordination/Standalone/ThreadStatusExt.cpp similarity index 100% rename from src/Coordination/Context/ThreadStatusExt.cpp rename to src/Coordination/Standalone/ThreadStatusExt.cpp diff --git a/src/Coordination/TinyContext.cpp b/src/Coordination/TinyContext.cpp deleted file mode 100644 index 47b0a48dcda..00000000000 --- a/src/Coordination/TinyContext.cpp +++ /dev/null @@ -1,87 +0,0 @@ -#include - -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -void TinyContext::setConfig(const ConfigurationPtr & config_) -{ - std::lock_guard lock(keeper_dispatcher_mutex); - config = config_; -} - -const Poco::Util::AbstractConfiguration & TinyContext::getConfigRef() const -{ - std::lock_guard lock(keeper_dispatcher_mutex); - return config ? *config : Poco::Util::Application::instance().config(); -} - - -void TinyContext::initializeKeeperDispatcher([[maybe_unused]] bool start_async) const -{ - const auto & config_ref = getConfigRef(); - - std::lock_guard lock(keeper_dispatcher_mutex); - - if (keeper_dispatcher) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to initialize Keeper multiple times"); - - if (config_ref.has("keeper_server")) - { - keeper_dispatcher = std::make_shared(); - - MultiVersion::Version macros; - - if (config_ref.has("macros")) - macros = std::make_unique(config_ref, "macros", &Poco::Logger::get("TinyContext")); - keeper_dispatcher->initialize(config_ref, true, start_async, macros); - } -} - -std::shared_ptr TinyContext::getKeeperDispatcher() const -{ - std::lock_guard lock(keeper_dispatcher_mutex); - if (!keeper_dispatcher) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Keeper must be initialized before requests"); - - return keeper_dispatcher; -} - -std::shared_ptr TinyContext::tryGetKeeperDispatcher() const -{ - std::lock_guard lock(keeper_dispatcher_mutex); - return keeper_dispatcher; -} - -void TinyContext::shutdownKeeperDispatcher() const -{ - std::lock_guard lock(keeper_dispatcher_mutex); - if (keeper_dispatcher) - { - keeper_dispatcher->shutdown(); - keeper_dispatcher.reset(); - } -} - -void TinyContext::updateKeeperConfiguration([[maybe_unused]] const Poco::Util::AbstractConfiguration & config_) -{ - std::lock_guard lock(keeper_dispatcher_mutex); - if (!keeper_dispatcher) - return; - - MultiVersion::Version macros; - - if (config_.has("macros")) - macros = std::make_unique(config_, "macros", &Poco::Logger::get("TinyContext")); - - keeper_dispatcher->updateConfiguration(config_, macros); -} - -} diff --git a/src/Coordination/TinyContext.h b/src/Coordination/TinyContext.h deleted file mode 100644 index b966d445004..00000000000 --- a/src/Coordination/TinyContext.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once -#include -#include - -#include -#include - -namespace DB -{ - -class KeeperDispatcher; - -class TinyContext : public std::enable_shared_from_this -{ -public: - std::shared_ptr getKeeperDispatcher() const; - std::shared_ptr tryGetKeeperDispatcher() const; - void initializeKeeperDispatcher(bool start_async) const; - void shutdownKeeperDispatcher() const; - void updateKeeperConfiguration(const Poco::Util::AbstractConfiguration & config); - - using ConfigurationPtr = Poco::AutoPtr; - - void setConfig(const ConfigurationPtr & config); - const Poco::Util::AbstractConfiguration & getConfigRef() const; - -private: - mutable std::mutex keeper_dispatcher_mutex; - mutable std::shared_ptr keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex); - - ConfigurationPtr config TSA_GUARDED_BY(keeper_dispatcher_mutex); -}; - -using TinyContextPtr = std::shared_ptr; - -} diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index b2aee8d9ec0..0ec39f18757 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1237,6 +1237,6 @@ struct HTTPContext : public IHTTPContext #else -#include +#include #endif From 656d6abb2ee2d221df01367bcb53465289e4981c Mon Sep 17 00:00:00 2001 From: FFFFFFFHHHHHHH <916677625@qq.com> Date: Mon, 22 May 2023 22:49:02 +0800 Subject: [PATCH 0166/1997] fix style --- src/Functions/array/arrayJaccardIndex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp index 8cce98ab64d..c1ec8b53d25 100644 --- a/src/Functions/array/arrayJaccardIndex.cpp +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -100,7 +100,7 @@ public: { const ColumnArray * res; bool is_const = false; - if (typeid_cast(col.column.get())) + if (typeid_cast(col.column.get())) { res = checkAndGetColumn(checkAndGetColumnConst(col.column.get())->getDataColumnPtr().get()); is_const = true; From b8305503d89783b6700ae2c43f69b96798181b03 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 22 May 2023 19:07:18 +0200 Subject: [PATCH 0167/1997] more flexible cleanup thread scheduling --- base/base/interpolate.h | 5 + src/Storages/MergeTree/IMergeTreeDataPart.cpp | 10 +- src/Storages/MergeTree/IMergeTreeDataPart.h | 3 + src/Storages/MergeTree/MergeTreeData.cpp | 26 ++- src/Storages/MergeTree/MergeTreeData.h | 6 + src/Storages/MergeTree/MergeTreeSettings.h | 4 +- .../ReplicatedMergeTreeCleanupThread.cpp | 171 +++++++++++++++--- .../ReplicatedMergeTreeCleanupThread.h | 28 ++- .../MergeTree/SimpleMergeSelector.cpp | 8 +- src/Storages/StorageReplicatedMergeTree.cpp | 9 +- src/Storages/StorageReplicatedMergeTree.h | 4 +- tests/config/config.d/merge_tree.xml | 2 + .../test.py | 3 +- .../test_broken_part_during_merge/test.py | 2 +- .../test.py | 6 +- tests/integration/test_drop_replica/test.py | 15 +- tests/integration/test_jbod_balancer/test.py | 1 + tests/integration/test_jbod_ha/test.py | 1 + tests/integration/test_lost_part/test.py | 8 +- tests/integration/test_multiple_disks/test.py | 6 +- .../test_old_parts_finally_removed/test.py | 5 +- .../test_parts_delete_zookeeper/test.py | 2 +- .../integration/test_recovery_replica/test.py | 2 +- tests/integration/test_storage_nats/test.py | 3 +- .../integration/test_storage_rabbitmq/test.py | 6 +- tests/integration/test_system_metrics/test.py | 4 +- tests/integration/test_ttl_replicated/test.py | 3 +- ..._replace_partition_from_table_zookeeper.sh | 8 +- .../00652_replicated_mutations_zookeeper.sh | 6 +- ...ated_minimalistic_part_header_zookeeper.sh | 6 +- ...0953_zookeeper_suetin_deduplication_bug.sh | 2 +- .../00988_parallel_parts_removal.sql | 4 +- ...tem_parts_race_condition_zookeeper_long.sh | 10 +- ...tem_parts_race_condition_drop_zookeeper.sh | 3 +- ...034_move_partition_from_table_zookeeper.sh | 6 +- ...ent_move_partition_from_table_zookeeper.sh | 3 +- ...076_parallel_alter_replicated_zookeeper.sh | 3 +- ...9_parallel_alter_detach_table_zookeeper.sh | 5 +- .../01103_optimize_drop_race_zookeeper.sh | 4 +- .../0_stateless/01158_zookeeper_log_long.sql | 2 +- ...nactive_replica_cleanup_nodes_zookeeper.sh | 6 +- ...e_condition_rename_clear_zookeeper_long.sh | 4 +- .../01509_parallel_quorum_and_merge_long.sh | 3 +- ...nt_ttl_and_normal_merges_zookeeper_long.sh | 3 +- .../0_stateless/02067_lost_part_s3.sql | 12 +- .../02370_lost_part_intersecting_merges.sh | 2 +- .../02396_system_parts_race_condition_rm.sh | 8 +- ...397_system_parts_race_condition_drop_rm.sh | 4 +- .../02432_s3_parallel_parts_cleanup.sql | 4 +- .../02448_clone_replica_lost_part.sql | 6 +- ..._projection_and_mutation_work_together.sql | 6 +- .../02515_cleanup_async_insert_block_ids.sh | 2 +- 52 files changed, 353 insertions(+), 112 deletions(-) diff --git a/base/base/interpolate.h b/base/base/interpolate.h index 1d4fc0b6257..4c27f70c95b 100644 --- a/base/base/interpolate.h +++ b/base/base/interpolate.h @@ -11,3 +11,8 @@ constexpr double interpolateExponential(double min, double max, double ratio) assert(min > 0 && ratio >= 0 && ratio <= 1); return min * std::pow(max / min, ratio); } + +constexpr double interpolateLinear(double min, double max, double ratio) +{ + return std::lerp(min, max, ratio); +} diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index d27b03fff44..3d2b6ecc540 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -211,9 +211,9 @@ void IMergeTreeDataPart::MinMaxIndex::appendFiles(const MergeTreeData & data, St } -static void incrementStateMetric(MergeTreeDataPartState state) +void IMergeTreeDataPart::incrementStateMetric(MergeTreeDataPartState state_) const { - switch (state) + switch (state_) { case MergeTreeDataPartState::Temporary: CurrentMetrics::add(CurrentMetrics::PartsTemporary); @@ -227,6 +227,7 @@ static void incrementStateMetric(MergeTreeDataPartState state) CurrentMetrics::add(CurrentMetrics::PartsCommitted); return; case MergeTreeDataPartState::Outdated: + storage.total_outdated_parts_count.fetch_add(1, std::memory_order_relaxed); CurrentMetrics::add(CurrentMetrics::PartsOutdated); return; case MergeTreeDataPartState::Deleting: @@ -238,9 +239,9 @@ static void incrementStateMetric(MergeTreeDataPartState state) } } -static void decrementStateMetric(MergeTreeDataPartState state) +void IMergeTreeDataPart::decrementStateMetric(MergeTreeDataPartState state_) const { - switch (state) + switch (state_) { case MergeTreeDataPartState::Temporary: CurrentMetrics::sub(CurrentMetrics::PartsTemporary); @@ -254,6 +255,7 @@ static void decrementStateMetric(MergeTreeDataPartState state) CurrentMetrics::sub(CurrentMetrics::PartsCommitted); return; case MergeTreeDataPartState::Outdated: + storage.total_outdated_parts_count.fetch_sub(1, std::memory_order_relaxed); CurrentMetrics::sub(CurrentMetrics::PartsOutdated); return; case MergeTreeDataPartState::Deleting: diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 388d96314c0..ecc1523b6c0 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -623,6 +623,9 @@ private: /// for this column with default parameters. CompressionCodecPtr detectDefaultCompressionCodec() const; + void incrementStateMetric(MergeTreeDataPartState state) const; + void decrementStateMetric(MergeTreeDataPartState state) const; + mutable MergeTreeDataPartState state{MergeTreeDataPartState::Temporary}; /// This ugly flag is needed for debug assertions only diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index b21f44baeb5..5cfc4c577dc 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -81,6 +81,7 @@ #include #include +#include #include #include @@ -4311,6 +4312,29 @@ size_t MergeTreeData::getActivePartsCount() const } +size_t MergeTreeData::getOutdatedPartsCount() const +{ + return total_outdated_parts_count.load(std::memory_order_relaxed); +} + +size_t MergeTreeData::getNumberOfOutdatedPartsWithExpiredRemovalTime() const +{ + size_t res = 0; + + auto time_now = time(nullptr); + + auto parts_lock = lockParts(); + auto outdated_parts_range = getDataPartsStateRange(DataPartState::Outdated); + for (const auto & part : outdated_parts_range) + { + auto part_remove_time = part->remove_time.load(std::memory_order_relaxed); + if (part_remove_time <= time_now && time_now - part_remove_time >= getSettings()->old_parts_lifetime.totalSeconds() && part.unique()) + ++res; + } + + return res; +} + std::pair MergeTreeData::getMaxPartsCountAndSizeForPartitionWithState(DataPartState state) const { auto lock = lockParts(); @@ -4519,7 +4543,7 @@ void MergeTreeData::delayMutationOrThrowIfNeeded(Poco::Event * until, const Cont size_t allowed_mutations_over_threshold = num_mutations_to_throw - num_mutations_to_delay; double delay_factor = std::min(static_cast(mutations_over_threshold) / allowed_mutations_over_threshold, 1.0); - size_t delay_milliseconds = static_cast(std::lerp(settings->min_delay_to_mutate_ms, settings->max_delay_to_mutate_ms, delay_factor)); + size_t delay_milliseconds = static_cast(interpolateLinear(settings->min_delay_to_mutate_ms, settings->max_delay_to_mutate_ms, delay_factor)); ProfileEvents::increment(ProfileEvents::DelayedMutations); ProfileEvents::increment(ProfileEvents::DelayedMutationsMilliseconds, delay_milliseconds); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 5488ce72631..4a71c24e6d3 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -532,6 +532,10 @@ public: size_t getActivePartsCount() const; + size_t getOutdatedPartsCount() const; + + size_t getNumberOfOutdatedPartsWithExpiredRemovalTime() const; + /// Returns a pair with: max number of parts in partition across partitions; sum size of parts inside that partition. /// (if there are multiple partitions with max number of parts, the sum size of parts is returned for arbitrary of them) std::pair getMaxPartsCountAndSizeForPartitionWithState(DataPartState state) const; @@ -1491,6 +1495,8 @@ private: std::atomic total_active_size_rows = 0; std::atomic total_active_size_parts = 0; + mutable std::atomic total_outdated_parts_count = 0; + // Record all query ids which access the table. It's guarded by `query_id_set_mutex` and is always mutable. mutable std::set query_id_set TSA_GUARDED_BY(query_id_set_mutex); mutable std::mutex query_id_set_mutex; diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index c9e81ce9103..78d703e795c 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -120,8 +120,10 @@ struct Settings; \ /** Check delay of replicas settings. */ \ M(UInt64, min_relative_delay_to_measure, 120, "Calculate relative replica delay only if absolute delay is not less that this value.", 0) \ - M(UInt64, cleanup_delay_period, 30, "Period to clean old queue logs, blocks hashes and parts.", 0) \ + M(UInt64, cleanup_delay_period, 30, "Minimum period to clean old queue logs, blocks hashes and parts.", 0) \ + M(UInt64, max_cleanup_delay_period, 300, "Maximum period to clean old queue logs, blocks hashes and parts.", 0) \ M(UInt64, cleanup_delay_period_random_add, 10, "Add uniformly distributed value from 0 to x seconds to cleanup_delay_period to avoid thundering herd effect and subsequent DoS of ZooKeeper in case of very large number of tables.", 0) \ + M(UInt64, cleanup_thread_preferred_points_per_iteration, 150, "Preferred batch size for background cleanup (points are abstract but 1 point is approximately equivalent to 1 inserted block).", 0) \ M(UInt64, min_relative_delay_to_close, 300, "Minimal delay from other replicas to close, stop serving requests and not return Ok during status check.", 0) \ M(UInt64, min_absolute_delay_to_close, 0, "Minimal absolute delay to close, stop serving requests and not return Ok during status check.", 0) \ M(UInt64, enable_vertical_merge_algorithm, 1, "Enable usage of Vertical merge algorithm.", 0) \ diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 0409cadc1e9..35a860ebb42 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -25,19 +25,22 @@ ReplicatedMergeTreeCleanupThread::ReplicatedMergeTreeCleanupThread(StorageReplic : storage(storage_) , log_name(storage.getStorageID().getFullTableName() + " (ReplicatedMergeTreeCleanupThread)") , log(&Poco::Logger::get(log_name)) + , sleep_ms(storage.getSettings()->cleanup_delay_period * 1000) { task = storage.getContext()->getSchedulePool().createTask(log_name, [this]{ run(); }); } void ReplicatedMergeTreeCleanupThread::run() { - auto storage_settings = storage.getSettings(); - const auto sleep_ms = storage_settings->cleanup_delay_period * 1000 - + std::uniform_int_distribution(0, storage_settings->cleanup_delay_period_random_add * 1000)(rng); + SCOPE_EXIT({ is_running.store(false, std::memory_order_relaxed); }); + is_running.store(true, std::memory_order_relaxed); + auto storage_settings = storage.getSettings(); + + Float32 cleanup_points = 0; try { - iterate(); + cleanup_points = iterate(); } catch (const Coordination::Exception & e) { @@ -51,39 +54,144 @@ void ReplicatedMergeTreeCleanupThread::run() tryLogCurrentException(log, __PRETTY_FUNCTION__); } + UInt64 prev_timestamp = prev_cleanup_timestamp_ms.load(std::memory_order_relaxed); + UInt64 now_ms = clock_gettime_ns_adjusted(prev_timestamp * 1'000'000) / 1'000'000; + + /// Do not adjust sleep_ms on the first run after starting the server + if (prev_timestamp && storage_settings->cleanup_thread_preferred_points_per_iteration) + { + /// We don't want to run the task too often when the table was barely changed and there's almost nothing to cleanup. + /// But we cannot simply sleep max_cleanup_delay_period (300s) when nothing was cleaned up and cleanup_delay_period (30s) + /// when we removed something, because inserting one part per 30s will lead to running cleanup each 30s just to remove one part. + /// So we need some interpolation based on preferred batch size. + auto expected_cleanup_points = storage_settings->cleanup_thread_preferred_points_per_iteration; + + /// How long should we sleep to remove cleanup_thread_preferred_points_per_iteration on the next iteration? + Float32 ratio = cleanup_points / expected_cleanup_points; + if (ratio == 0) + sleep_ms = storage_settings->max_cleanup_delay_period * 1000; + else + sleep_ms = static_cast(sleep_ms / ratio); + + if (sleep_ms < storage_settings->cleanup_delay_period * 1000) + sleep_ms = storage_settings->cleanup_delay_period * 1000; + if (storage_settings->max_cleanup_delay_period * 1000 < sleep_ms) + sleep_ms = storage_settings->max_cleanup_delay_period * 1000; + + UInt64 interval_ms = now_ms - prev_timestamp; + LOG_TRACE(log, "Scheduling next cleanup after {}ms (points: {}, interval: {}ms, ratio: {}, points per minute: {})", + sleep_ms, cleanup_points, interval_ms, ratio, cleanup_points / interval_ms * 60'000); + } + prev_cleanup_timestamp_ms.store(now_ms, std::memory_order_relaxed); + + sleep_ms += std::uniform_int_distribution(0, storage_settings->cleanup_delay_period_random_add * 1000)(rng); task->scheduleAfter(sleep_ms); } - -void ReplicatedMergeTreeCleanupThread::iterate() +void ReplicatedMergeTreeCleanupThread::wakeupEarlierIfNeeded() { - storage.clearOldPartsAndRemoveFromZK(); + /// It may happen that the tables was idle for a long time, but then a user started to aggressively insert (or mutate) data. + /// In this case, sleep_ms was set to the highest possible value, the task is not going to wake up soon, + /// but the number of objects to clean up is growing. We need to wakeup the task earlier. + auto storage_settings = storage.getSettings(); + if (!storage_settings->cleanup_thread_preferred_points_per_iteration) + return; + + /// The number of other objects (logs, blocks, etc) is usually correlated with the number of Outdated parts. + /// Do not wake up unless we have too many. + size_t number_of_outdated_objects = storage.getOutdatedPartsCount(); + if (number_of_outdated_objects < storage_settings->cleanup_thread_preferred_points_per_iteration * 2) + return; + + /// A race condition is possible here, but it's okay + if (is_running.load(std::memory_order_relaxed)) + return; + + /// Do not re-check all parts too often (avoid constantly calling getNumberOfOutdatedPartsWithExpiredRemovalTime()) + if (!wakeup_check_timer.compareAndRestart(storage_settings->cleanup_delay_period / 4)) + return; + + UInt64 prev_run_timestamp_ms = prev_cleanup_timestamp_ms.load(std::memory_order_relaxed); + UInt64 now_ms = clock_gettime_ns_adjusted(prev_run_timestamp_ms * 1'000'000) / 1'000'000; + if (!prev_run_timestamp_ms || now_ms <= prev_run_timestamp_ms) + return; + + /// Don't run it more often than cleanup_delay_period + UInt64 seconds_passed = (now_ms - prev_run_timestamp_ms) / 1000; + if (seconds_passed < storage_settings->cleanup_delay_period) + return; + + /// Do not count parts that cannot be removed anyway. Do not wake up unless we have too many. + number_of_outdated_objects = storage.getNumberOfOutdatedPartsWithExpiredRemovalTime(); + if (number_of_outdated_objects < storage_settings->cleanup_thread_preferred_points_per_iteration * 2) + return; + + LOG_TRACE(log, "Waking up cleanup thread because there are {} outdated objects and previous cleanup finished {}s ago", + number_of_outdated_objects, seconds_passed); + + wakeup(); +} + + +Float32 ReplicatedMergeTreeCleanupThread::iterate() +{ + size_t cleaned_logs = 0; + Float32 cleaned_blocks = 0; + size_t cleaned_other = 0; + size_t cleaned_part_like = 0; + size_t cleaned_parts = storage.clearOldPartsAndRemoveFromZK(); + + auto storage_settings = storage.getSettings(); { auto lock = storage.lockForShare(RWLockImpl::NO_QUERY, storage.getSettings()->lock_acquire_timeout_for_background_operations); /// Both use relative_data_path which changes during rename, so we /// do it under share lock - storage.clearOldWriteAheadLogs(); - storage.clearOldTemporaryDirectories(storage.getSettings()->temporary_directories_lifetime.totalSeconds()); + cleaned_other += storage.clearOldWriteAheadLogs(); + cleaned_part_like += storage.clearOldTemporaryDirectories(storage.getSettings()->temporary_directories_lifetime.totalSeconds()); if (storage.getSettings()->merge_tree_enable_clear_old_broken_detached) - storage.clearOldBrokenPartsFromDetachedDirectory(); + cleaned_part_like += storage.clearOldBrokenPartsFromDetachedDirectory(); } /// This is loose condition: no problem if we actually had lost leadership at this moment /// and two replicas will try to do cleanup simultaneously. if (storage.is_leader) { - clearOldLogs(); - auto storage_settings = storage.getSettings(); - clearOldBlocks("blocks", storage_settings->replicated_deduplication_window_seconds, storage_settings->replicated_deduplication_window, cached_block_stats_for_sync_inserts); - clearOldBlocks("async_blocks", storage_settings->replicated_deduplication_window_seconds_for_async_inserts, storage_settings->replicated_deduplication_window_for_async_inserts, cached_block_stats_for_async_inserts); - clearOldMutations(); - storage.clearEmptyParts(); + cleaned_logs = clearOldLogs(); + size_t normal_blocks = clearOldBlocks("blocks", storage_settings->replicated_deduplication_window_seconds, + storage_settings->replicated_deduplication_window, cached_block_stats_for_sync_inserts); + + size_t async_blocks = clearOldBlocks("async_blocks", + storage_settings->replicated_deduplication_window_seconds_for_async_inserts, + storage_settings->replicated_deduplication_window_for_async_inserts, + cached_block_stats_for_async_inserts); + + /// Many async blocks are transformed into one ordinary block + Float32 async_blocks_per_block = static_cast(storage_settings->replicated_deduplication_window) / + (storage_settings->replicated_deduplication_window_for_async_inserts + 1); + cleaned_blocks = (normal_blocks + async_blocks * async_blocks_per_block) / 2; + + cleaned_other += clearOldMutations(); + cleaned_part_like += storage.clearEmptyParts(); } + + /// We need to measure the number of removed objects somehow (for better scheduling), + /// but just summing the number of removed async blocks, logs, and empty parts does not make any sense. + /// So we are trying to (approximately) measure the number of inserted blocks/parts, so we will be able to compare apples to apples. + + /// Each inserted block produces 3 objects that have to be cleaned up: one block, one log entry and one part. + /// A few new parts get merged together producing one log entry and one part. + + /// Other objects (like mutations and WALs) are much more rare than Outdated parts (because mutations usually produce + /// many Outdated parts, and WALs usually contain many parts too). We count then as one part for simplicity. + + constexpr Float32 parts_number_amplification = 1.3f; /// Assuming we merge 4-5 parts each time + Float32 cleaned_inserted_parts = (cleaned_blocks + (cleaned_logs + cleaned_parts) / parts_number_amplification) / 3; + return cleaned_inserted_parts + cleaned_part_like + cleaned_other; } -void ReplicatedMergeTreeCleanupThread::clearOldLogs() +size_t ReplicatedMergeTreeCleanupThread::clearOldLogs() { auto zookeeper = storage.getZooKeeper(); auto storage_settings = storage.getSettings(); @@ -102,7 +210,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() size_t min_replicated_logs_to_keep = static_cast(storage_settings->min_replicated_logs_to_keep * ratio); if (static_cast(children_count) < min_replicated_logs_to_keep) - return; + return 0; Strings replicas = zookeeper->getChildren(storage.zookeeper_path + "/replicas", &stat); @@ -114,7 +222,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() Strings entries = zookeeper->getChildren(storage.zookeeper_path + "/log"); if (entries.empty()) - return; + return 0; ::sort(entries.begin(), entries.end()); @@ -227,7 +335,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() entries.erase(std::lower_bound(entries.begin(), entries.end(), "log-" + padIndex(min_saved_log_pointer)), entries.end()); if (entries.empty()) - return; + return 0; markLostReplicas( host_versions_lost_replicas, @@ -268,6 +376,8 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() if (i != 0) LOG_DEBUG(log, "Removed {} old log entries: {} - {}", i, entries[0], entries[i - 1]); + + return i; } @@ -323,7 +433,7 @@ struct ReplicatedMergeTreeCleanupThread::NodeWithStat } }; -void ReplicatedMergeTreeCleanupThread::clearOldBlocks(const String & blocks_dir_name, UInt64 window_seconds, UInt64 window_size, NodeCTimeAndVersionCache & cached_block_stats) +size_t ReplicatedMergeTreeCleanupThread::clearOldBlocks(const String & blocks_dir_name, UInt64 window_seconds, UInt64 window_size, NodeCTimeAndVersionCache & cached_block_stats) { auto zookeeper = storage.getZooKeeper(); @@ -331,7 +441,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks(const String & blocks_dir_ getBlocksSortedByTime(blocks_dir_name, *zookeeper, timed_blocks, cached_block_stats); if (timed_blocks.empty()) - return; + return 0; /// Use ZooKeeper's first node (last according to time) timestamp as "current" time. Int64 current_time = timed_blocks.front().ctime; @@ -350,7 +460,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks(const String & blocks_dir_ auto num_nodes_to_delete = timed_blocks.end() - first_outdated_block; if (!num_nodes_to_delete) - return; + return 0; auto last_outdated_block = timed_blocks.end() - 1; LOG_TRACE(log, "Will clear {} old blocks from {} (ctime {}) to {} (ctime {})", num_nodes_to_delete, @@ -388,6 +498,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks(const String & blocks_dir_ } LOG_TRACE(log, "Cleared {} old blocks from ZooKeeper", num_nodes_to_delete); + return num_nodes_to_delete; } @@ -456,17 +567,17 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(const String & bloc } -void ReplicatedMergeTreeCleanupThread::clearOldMutations() +size_t ReplicatedMergeTreeCleanupThread::clearOldMutations() { auto storage_settings = storage.getSettings(); if (!storage_settings->finished_mutations_to_keep) - return; + return 0; if (storage.queue.countFinishedMutations() <= storage_settings->finished_mutations_to_keep) { /// Not strictly necessary, but helps to avoid unnecessary ZooKeeper requests. /// If even this replica hasn't finished enough mutations yet, then we don't need to clean anything. - return; + return 0; } auto zookeeper = storage.getZooKeeper(); @@ -481,7 +592,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldMutations() // No Need to check return value to delete mutations. zookeeper->tryGet(storage.zookeeper_path + "/replicas/" + replica + "/mutation_pointer", pointer); if (pointer.empty()) - return; /// One replica hasn't done anything yet so we can't delete any mutations. + return 0; /// One replica hasn't done anything yet so we can't delete any mutations. min_pointer = std::min(parse(pointer), min_pointer); } @@ -492,11 +603,11 @@ void ReplicatedMergeTreeCleanupThread::clearOldMutations() entries.erase(std::upper_bound(entries.begin(), entries.end(), padIndex(min_pointer)), entries.end()); /// Do not remove last `storage_settings->finished_mutations_to_keep` entries. if (entries.size() <= storage_settings->finished_mutations_to_keep) - return; + return 0; entries.erase(entries.end() - storage_settings->finished_mutations_to_keep, entries.end()); if (entries.empty()) - return; + return 0; Coordination::Requests ops; size_t batch_start_i = 0; @@ -526,6 +637,8 @@ void ReplicatedMergeTreeCleanupThread::clearOldMutations() ops.clear(); } } + + return entries.size(); } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h index 76b9ee4a575..57de7944970 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -31,6 +32,8 @@ public: void stop() { task->deactivate(); } + void wakeupEarlierIfNeeded(); + private: StorageReplicatedMergeTree & storage; String log_name; @@ -38,11 +41,20 @@ private: BackgroundSchedulePool::TaskHolder task; pcg64 rng{randomSeed()}; - void run(); - void iterate(); + UInt64 sleep_ms; - /// Remove old records from ZooKeeper. - void clearOldLogs(); + std::atomic prev_cleanup_timestamp_ms = 0; + std::atomic is_running = false; + + AtomicStopwatch wakeup_check_timer; + + void run(); + + /// Returns a number this is directly proportional to the number of cleaned up blocks + Float32 iterate(); + + /// Remove old records from ZooKeeper. Returns the number of removed logs + size_t clearOldLogs(); /// The replica is marked as "lost" if it is inactive and its log pointer /// is far behind and we are not going to keep logs for it. @@ -52,11 +64,11 @@ private: size_t replicas_count, const zkutil::ZooKeeperPtr & zookeeper); using NodeCTimeAndVersionCache = std::map>; - /// Remove old block hashes from ZooKeeper. This is done by the leader replica. - void clearOldBlocks(const String & blocks_dir_name, UInt64 window_seconds, UInt64 window_size, NodeCTimeAndVersionCache & cached_block_stats); + /// Remove old block hashes from ZooKeeper. This is done by the leader replica. Returns the number of removed blocks + size_t clearOldBlocks(const String & blocks_dir_name, UInt64 window_seconds, UInt64 window_size, NodeCTimeAndVersionCache & cached_block_stats); - /// Remove old mutations that are done from ZooKeeper. This is done by the leader replica. - void clearOldMutations(); + /// Remove old mutations that are done from ZooKeeper. This is done by the leader replica. Returns the number of removed mutations + size_t clearOldMutations(); NodeCTimeAndVersionCache cached_block_stats_for_sync_inserts; NodeCTimeAndVersionCache cached_block_stats_for_async_inserts; diff --git a/src/Storages/MergeTree/SimpleMergeSelector.cpp b/src/Storages/MergeTree/SimpleMergeSelector.cpp index af3373fd175..7e7539f71d5 100644 --- a/src/Storages/MergeTree/SimpleMergeSelector.cpp +++ b/src/Storages/MergeTree/SimpleMergeSelector.cpp @@ -28,7 +28,7 @@ struct Estimator { double difference = std::abs(log2(static_cast(sum_size) / size_prev_at_left)); if (difference < settings.heuristic_to_align_parts_max_absolute_difference_in_powers_of_two) - current_score *= std::lerp(settings.heuristic_to_align_parts_max_score_adjustment, 1, + current_score *= interpolateLinear(settings.heuristic_to_align_parts_max_score_adjustment, 1, difference / settings.heuristic_to_align_parts_max_absolute_difference_in_powers_of_two); } @@ -115,8 +115,8 @@ bool allow( // std::cerr << "size_normalized: " << size_normalized << "\n"; /// Calculate boundaries for age - double min_age_to_lower_base = std::lerp(settings.min_age_to_lower_base_at_min_size, settings.min_age_to_lower_base_at_max_size, size_normalized); - double max_age_to_lower_base = std::lerp(settings.max_age_to_lower_base_at_min_size, settings.max_age_to_lower_base_at_max_size, size_normalized); + double min_age_to_lower_base = interpolateLinear(settings.min_age_to_lower_base_at_min_size, settings.min_age_to_lower_base_at_max_size, size_normalized); + double max_age_to_lower_base = interpolateLinear(settings.max_age_to_lower_base_at_min_size, settings.max_age_to_lower_base_at_max_size, size_normalized); // std::cerr << "min_age_to_lower_base: " << min_age_to_lower_base << "\n"; // std::cerr << "max_age_to_lower_base: " << max_age_to_lower_base << "\n"; @@ -137,7 +137,7 @@ bool allow( // std::cerr << "combined_ratio: " << combined_ratio << "\n"; - double lowered_base = std::lerp(settings.base, 2.0, combined_ratio); + double lowered_base = interpolateLinear(settings.base, 2.0, combined_ratio); // std::cerr << "------- lowered_base: " << lowered_base << "\n"; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index d9c8f09ccf1..2b948e1fd60 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3147,6 +3147,8 @@ bool StorageReplicatedMergeTree::processQueueEntry(ReplicatedMergeTreeQueue::Sel bool StorageReplicatedMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assignee) { + cleanup_thread.wakeupEarlierIfNeeded(); + /// If replication queue is stopped exit immediately as we successfully executed the task if (queue.actions_blocker.isCancelled()) return false; @@ -6589,7 +6591,7 @@ bool StorageReplicatedMergeTree::hasLightweightDeletedMask() const return has_lightweight_delete_parts.load(std::memory_order_relaxed); } -void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK() +size_t StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK() { auto table_lock = lockForShare( RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); @@ -6598,8 +6600,9 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK() /// Now these parts are in Deleting state. If we fail to remove some of them we must roll them back to Outdated state. /// Otherwise they will not be deleted. DataPartsVector parts = grabOldParts(); + size_t total_parts_to_remove = parts.size(); if (parts.empty()) - return; + return total_parts_to_remove; DataPartsVector parts_to_delete_only_from_filesystem; // Only duplicates DataPartsVector parts_to_delete_completely; // All parts except duplicates @@ -6707,6 +6710,8 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK() /// Otherwise nobody will try to remove them again (see grabOldParts). delete_parts_from_fs_and_rollback_in_case_of_error(parts_to_remove_from_filesystem, "old"); } + + return total_parts_to_remove; } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 29b6a4d6817..01b86dd1425 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -342,8 +342,8 @@ public: private: std::atomic_bool are_restoring_replica {false}; - /// Delete old parts from disk and from ZooKeeper. - void clearOldPartsAndRemoveFromZK(); + /// Delete old parts from disk and from ZooKeeper. Returns the number of removed parts + size_t clearOldPartsAndRemoveFromZK(); template friend class ReplicatedMergeTreeSinkImpl; diff --git a/tests/config/config.d/merge_tree.xml b/tests/config/config.d/merge_tree.xml index 43bdb6aa07b..5521e5ba515 100644 --- a/tests/config/config.d/merge_tree.xml +++ b/tests/config/config.d/merge_tree.xml @@ -1,5 +1,7 @@ 8 + 60 + 10 diff --git a/tests/integration/test_broken_detached_part_clean_up/test.py b/tests/integration/test_broken_detached_part_clean_up/test.py index 5b18fa34494..9a70ebe0d48 100644 --- a/tests/integration/test_broken_detached_part_clean_up/test.py +++ b/tests/integration/test_broken_detached_part_clean_up/test.py @@ -141,7 +141,8 @@ def test_remove_broken_detached_part_replicated_merge_tree(started_cluster): merge_tree_enable_clear_old_broken_detached=1, merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds=5, cleanup_delay_period=1, - cleanup_delay_period_random_add=0; + cleanup_delay_period_random_add=0, + cleanup_thread_preferred_points_per_iteration=0; """ ) diff --git a/tests/integration/test_broken_part_during_merge/test.py b/tests/integration/test_broken_part_during_merge/test.py index f4110844466..26962236869 100644 --- a/tests/integration/test_broken_part_during_merge/test.py +++ b/tests/integration/test_broken_part_during_merge/test.py @@ -25,7 +25,7 @@ def test_merge_and_part_corruption(started_cluster): """ CREATE TABLE replicated_mt(date Date, id UInt32, value Int32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{replica}') ORDER BY id - SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1; + SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0; """.format( replica=node1.name ) diff --git a/tests/integration/test_consistant_parts_after_move_partition/test.py b/tests/integration/test_consistant_parts_after_move_partition/test.py index 63a51472773..91fa884c093 100644 --- a/tests/integration/test_consistant_parts_after_move_partition/test.py +++ b/tests/integration/test_consistant_parts_after_move_partition/test.py @@ -14,11 +14,13 @@ def initialize_database(nodes, shard): CREATE TABLE `{database}`.src (p UInt64, d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/{database}/tables/test_consistent_shard1{shard}/replicated', '{replica}') ORDER BY d PARTITION BY p - SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, cleanup_delay_period_random_add=0; + SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, + cleanup_delay_period=0, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0; CREATE TABLE `{database}`.dest (p UInt64, d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/{database}/tables/test_consistent_shard2{shard}/replicated', '{replica}') ORDER BY d PARTITION BY p - SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, cleanup_delay_period_random_add=0; + SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, + cleanup_delay_period=0, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0; """.format( shard=shard, replica=node.name, database=CLICKHOUSE_DATABASE ) diff --git a/tests/integration/test_drop_replica/test.py b/tests/integration/test_drop_replica/test.py index e87edb0a578..0941e664982 100644 --- a/tests/integration/test_drop_replica/test.py +++ b/tests/integration/test_drop_replica/test.py @@ -11,7 +11,8 @@ def fill_nodes(nodes, shard): CREATE DATABASE test; CREATE TABLE test.test_table(date Date, id UInt32) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/{shard}/replicated/test_table', '{replica}') ORDER BY id PARTITION BY toYYYYMM(date) SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, cleanup_delay_period_random_add=0; + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/{shard}/replicated/test_table', '{replica}') ORDER BY id PARTITION BY toYYYYMM(date) + SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0; """.format( shard=shard, replica=node.name ) @@ -22,7 +23,8 @@ def fill_nodes(nodes, shard): CREATE DATABASE test1; CREATE TABLE test1.test_table(date Date, id UInt32) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/test1/{shard}/replicated/test_table', '{replica}') ORDER BY id PARTITION BY toYYYYMM(date) SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, cleanup_delay_period_random_add=0; + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test1/{shard}/replicated/test_table', '{replica}') ORDER BY id PARTITION BY toYYYYMM(date) + SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0; """.format( shard=shard, replica=node.name ) @@ -33,7 +35,8 @@ def fill_nodes(nodes, shard): CREATE DATABASE test2; CREATE TABLE test2.test_table(date Date, id UInt32) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/test2/{shard}/replicated/test_table', '{replica}') ORDER BY id PARTITION BY toYYYYMM(date) SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, cleanup_delay_period_random_add=0; + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test2/{shard}/replicated/test_table', '{replica}') ORDER BY id PARTITION BY toYYYYMM(date) + SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0; """.format( shard=shard, replica=node.name ) @@ -44,7 +47,8 @@ def fill_nodes(nodes, shard): CREATE DATABASE test3; CREATE TABLE test3.test_table(date Date, id UInt32) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/test3/{shard}/replicated/test_table', '{replica}') ORDER BY id PARTITION BY toYYYYMM(date) SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, cleanup_delay_period_random_add=0; + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test3/{shard}/replicated/test_table', '{replica}') ORDER BY id PARTITION BY toYYYYMM(date) + SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0; """.format( shard=shard, replica=node.name ) @@ -55,7 +59,8 @@ def fill_nodes(nodes, shard): CREATE DATABASE test4; CREATE TABLE test4.test_table(date Date, id UInt32) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/test4/{shard}/replicated/test_table', '{replica}') ORDER BY id PARTITION BY toYYYYMM(date) SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, cleanup_delay_period_random_add=0; + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test4/{shard}/replicated/test_table', '{replica}') ORDER BY id PARTITION BY toYYYYMM(date) + SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0; """.format( shard=shard, replica=node.name ) diff --git a/tests/integration/test_jbod_balancer/test.py b/tests/integration/test_jbod_balancer/test.py index df34a075d5a..4797eec5381 100644 --- a/tests/integration/test_jbod_balancer/test.py +++ b/tests/integration/test_jbod_balancer/test.py @@ -134,6 +134,7 @@ def test_replicated_balanced_merge_fetch(start_cluster): old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 2, + cleanup_thread_preferred_points_per_iteration=0, min_bytes_to_rebalance_partition_over_jbod = 1024, max_bytes_to_merge_at_max_space_in_pool = 4096 """.format( diff --git a/tests/integration/test_jbod_ha/test.py b/tests/integration/test_jbod_ha/test.py index 5cbb5989ff3..033d751912a 100644 --- a/tests/integration/test_jbod_ha/test.py +++ b/tests/integration/test_jbod_ha/test.py @@ -58,6 +58,7 @@ def test_jbod_ha(start_cluster): old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 2, + cleanup_thread_preferred_points_per_iteration=0, max_bytes_to_merge_at_max_space_in_pool = 4096 """.format( i diff --git a/tests/integration/test_lost_part/test.py b/tests/integration/test_lost_part/test.py index dd4c2105d55..44cd19fd1fb 100644 --- a/tests/integration/test_lost_part/test.py +++ b/tests/integration/test_lost_part/test.py @@ -42,7 +42,7 @@ def test_lost_part_same_replica(start_cluster): for node in [node1, node2]: node.query( f"CREATE TABLE mt0 (id UInt64, date Date) ENGINE ReplicatedMergeTree('/clickhouse/tables/t', '{node.name}') ORDER BY tuple() PARTITION BY date " - "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1" + "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0" ) node1.query("SYSTEM STOP MERGES mt0") @@ -109,7 +109,7 @@ def test_lost_part_other_replica(start_cluster): for node in [node1, node2]: node.query( f"CREATE TABLE mt1 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t1', '{node.name}') ORDER BY tuple() " - "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1" + "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0" ) node1.query("SYSTEM STOP MERGES mt1") @@ -178,7 +178,7 @@ def test_lost_part_mutation(start_cluster): for node in [node1, node2]: node.query( f"CREATE TABLE mt2 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t2', '{node.name}') ORDER BY tuple() " - "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1" + "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0" ) node1.query("SYSTEM STOP MERGES mt2") @@ -241,7 +241,7 @@ def test_lost_last_part(start_cluster): for node in [node1, node2]: node.query( f"CREATE TABLE mt3 (id UInt64, p String) ENGINE ReplicatedMergeTree('/clickhouse/tables/t3', '{node.name}') " - "ORDER BY tuple() PARTITION BY p SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1" + "ORDER BY tuple() PARTITION BY p SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0" ) node1.query("SYSTEM STOP MERGES mt3") diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index 0e51df017b2..54e7f6dd8ee 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -1528,7 +1528,8 @@ def test_simple_replication_and_moves(start_cluster): s1 String ) ENGINE = ReplicatedMergeTree('/clickhouse/replicated_table_for_moves', '{}') ORDER BY tuple() - SETTINGS storage_policy='moving_jbod_with_external', old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=2 + SETTINGS storage_policy='moving_jbod_with_external', old_parts_lifetime=1, + cleanup_delay_period=1, cleanup_delay_period_random_add=2, cleanup_thread_preferred_points_per_iteration=0 """.format( i + 1 ) @@ -1609,7 +1610,8 @@ def test_download_appropriate_disk(start_cluster): s1 String ) ENGINE = ReplicatedMergeTree('/clickhouse/replicated_table_for_download', '{}') ORDER BY tuple() - SETTINGS storage_policy='moving_jbod_with_external', old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=2 + SETTINGS storage_policy='moving_jbod_with_external', old_parts_lifetime=1, + cleanup_delay_period=1, cleanup_delay_period_random_add=2, cleanup_thread_preferred_points_per_iteration=0 """.format( i + 1 ) diff --git a/tests/integration/test_old_parts_finally_removed/test.py b/tests/integration/test_old_parts_finally_removed/test.py index 5347d433419..cbd701588d5 100644 --- a/tests/integration/test_old_parts_finally_removed/test.py +++ b/tests/integration/test_old_parts_finally_removed/test.py @@ -27,7 +27,8 @@ def started_cluster(): def test_part_finally_removed(started_cluster): node1.query( - "CREATE TABLE drop_outdated_part (Key UInt64) ENGINE = ReplicatedMergeTree('/table/d', '1') ORDER BY tuple() SETTINGS old_parts_lifetime=10, cleanup_delay_period=10, cleanup_delay_period_random_add=1" + "CREATE TABLE drop_outdated_part (Key UInt64) ENGINE = ReplicatedMergeTree('/table/d', '1') ORDER BY tuple() " + "SETTINGS old_parts_lifetime=10, cleanup_delay_period=10, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0" ) node1.query("INSERT INTO drop_outdated_part VALUES (1)") @@ -44,7 +45,7 @@ def test_part_finally_removed(started_cluster): ) node1.query( - "ALTER TABLE drop_outdated_part MODIFY SETTING old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1" + "ALTER TABLE drop_outdated_part MODIFY SETTING old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0" ) for i in range(60): diff --git a/tests/integration/test_parts_delete_zookeeper/test.py b/tests/integration/test_parts_delete_zookeeper/test.py index a78aefa4595..9fd07e7b65d 100644 --- a/tests/integration/test_parts_delete_zookeeper/test.py +++ b/tests/integration/test_parts_delete_zookeeper/test.py @@ -21,7 +21,7 @@ def start_cluster(): CREATE DATABASE test; CREATE TABLE test_table(date Date, id UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/replicated', 'node1') - ORDER BY id PARTITION BY toYYYYMM(date) SETTINGS old_parts_lifetime=4, cleanup_delay_period=1; + ORDER BY id PARTITION BY toYYYYMM(date) SETTINGS old_parts_lifetime=4, cleanup_delay_period=1, cleanup_thread_preferred_points_per_iteration=0; """ ) diff --git a/tests/integration/test_recovery_replica/test.py b/tests/integration/test_recovery_replica/test.py index 0a63da4db22..582e018f5d2 100644 --- a/tests/integration/test_recovery_replica/test.py +++ b/tests/integration/test_recovery_replica/test.py @@ -4,7 +4,7 @@ import pytest from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry -SETTINGS = "SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, cleanup_delay_period_random_add=0" +SETTINGS = "SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0" def fill_nodes(nodes): diff --git a/tests/integration/test_storage_nats/test.py b/tests/integration/test_storage_nats/test.py index 1d7e046864b..4d7e4cf813d 100644 --- a/tests/integration/test_storage_nats/test.py +++ b/tests/integration/test_storage_nats/test.py @@ -931,7 +931,8 @@ def test_nats_overloaded_insert(nats_cluster): CREATE TABLE test.view_overload (key UInt64, value UInt64) ENGINE = MergeTree ORDER BY key - SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3; + SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3, + cleanup_thread_preferred_points_per_iteration=0; CREATE MATERIALIZED VIEW test.consumer_overload TO test.view_overload AS SELECT * FROM test.nats_consume; """ diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 4e1e28373e3..b4dcf86e0ba 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -642,7 +642,8 @@ def test_rabbitmq_sharding_between_queues_publish(rabbitmq_cluster): CREATE TABLE test.view (key UInt64, value UInt64, channel_id String) ENGINE = MergeTree ORDER BY key - SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3; + SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3, + cleanup_thread_preferred_points_per_iteration=0; CREATE MATERIALIZED VIEW test.consumer TO test.view AS SELECT *, _channel_id AS channel_id FROM test.rabbitmq; """ @@ -1116,7 +1117,8 @@ def test_rabbitmq_direct_exchange(rabbitmq_cluster): CREATE TABLE test.destination(key UInt64, value UInt64) ENGINE = MergeTree() ORDER BY key - SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3; + SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3, + cleanup_thread_preferred_points_per_iteration=0; """ ) diff --git a/tests/integration/test_system_metrics/test.py b/tests/integration/test_system_metrics/test.py index 9ebe198a109..338622b824e 100644 --- a/tests/integration/test_system_metrics/test.py +++ b/tests/integration/test_system_metrics/test.py @@ -13,7 +13,9 @@ def fill_nodes(nodes, shard): CREATE DATABASE test; CREATE TABLE test.test_table(date Date, id UInt32) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/test{shard}/replicated', '{replica}') ORDER BY id PARTITION BY toYYYYMM(date) SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, cleanup_delay_period_random_add=0; + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test{shard}/replicated', '{replica}') ORDER BY id PARTITION BY toYYYYMM(date) + SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, + cleanup_delay_period=0, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0; """.format( shard=shard, replica=node.name ) diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py index a3e7d6e4b8b..4ea4472b812 100644 --- a/tests/integration/test_ttl_replicated/test.py +++ b/tests/integration/test_ttl_replicated/test.py @@ -422,7 +422,8 @@ def test_ttl_empty_parts(started_cluster): ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_empty_parts', '{replica}') ORDER BY id SETTINGS max_bytes_to_merge_at_min_space_in_pool = 1, max_bytes_to_merge_at_max_space_in_pool = 1, - cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, old_parts_lifetime = 1 + cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, + cleanup_thread_preferred_points_per_iteration=0, old_parts_lifetime = 1 """.format( replica=node.name diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh index a0a3416e406..399511db701 100755 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh @@ -36,8 +36,12 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r1;" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r2;" $CLICKHOUSE_CLIENT --query="CREATE TABLE src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE dst_r1 (p UInt64, k String, d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst_1', '1') PARTITION BY p ORDER BY k SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE dst_r2 (p UInt64, k String, d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst_1', '2') PARTITION BY p ORDER BY k SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE dst_r1 (p UInt64, k String, d UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst_1', '1') PARTITION BY p ORDER BY k +SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE dst_r2 (p UInt64, k String, d UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst_1', '2') PARTITION BY p ORDER BY k +SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0;" $CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (0, '0', 1);" $CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1);" diff --git a/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.sh b/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.sh index 1f5bcbdc0d0..d8b1bdec328 100755 --- a/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.sh +++ b/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.sh @@ -56,11 +56,13 @@ ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS mutations_cleaner_r2 SYNC" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE mutations_cleaner_r1(x UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/mutations_cleaner', 'r1') ORDER BY x SETTINGS \ finished_mutations_to_keep = 2, cleanup_delay_period = 1, - cleanup_delay_period_random_add = 0" + cleanup_delay_period_random_add = 0, + cleanup_thread_preferred_points_per_iteration=0" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE mutations_cleaner_r2(x UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/mutations_cleaner', 'r2') ORDER BY x SETTINGS \ finished_mutations_to_keep = 2, cleanup_delay_period = 1, - cleanup_delay_period_random_add = 0" + cleanup_delay_period_random_add = 0, + cleanup_thread_preferred_points_per_iteration=0" # Insert some data ${CLICKHOUSE_CLIENT} --insert_keeper_fault_injection_probability=0 --query="INSERT INTO mutations_cleaner_r1(x) VALUES (1), (2), (3), (4), (5)" diff --git a/tests/queries/0_stateless/00814_replicated_minimalistic_part_header_zookeeper.sh b/tests/queries/0_stateless/00814_replicated_minimalistic_part_header_zookeeper.sh index 5fc3fa460e6..bab2304cec2 100755 --- a/tests/queries/0_stateless/00814_replicated_minimalistic_part_header_zookeeper.sh +++ b/tests/queries/0_stateless/00814_replicated_minimalistic_part_header_zookeeper.sh @@ -20,13 +20,15 @@ CREATE TABLE part_header_r1(x UInt32, y UInt32) SETTINGS use_minimalistic_part_header_in_zookeeper = 0, old_parts_lifetime = 1, cleanup_delay_period = 0, - cleanup_delay_period_random_add = 0; + cleanup_delay_period_random_add = 0, + cleanup_thread_preferred_points_per_iteration=0; CREATE TABLE part_header_r2(x UInt32, y UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_00814/part_header/{shard}', '2{replica}') ORDER BY x SETTINGS use_minimalistic_part_header_in_zookeeper = 1, old_parts_lifetime = 1, cleanup_delay_period = 0, - cleanup_delay_period_random_add = 0; + cleanup_delay_period_random_add = 0, + cleanup_thread_preferred_points_per_iteration=0; SELECT '*** Test fetches ***'; INSERT INTO part_header_r1 VALUES (1, 1); diff --git a/tests/queries/0_stateless/00953_zookeeper_suetin_deduplication_bug.sh b/tests/queries/0_stateless/00953_zookeeper_suetin_deduplication_bug.sh index c713c7c4926..ad0146b9d99 100755 --- a/tests/queries/0_stateless/00953_zookeeper_suetin_deduplication_bug.sh +++ b/tests/queries/0_stateless/00953_zookeeper_suetin_deduplication_bug.sh @@ -22,7 +22,7 @@ CREATE TABLE elog ( ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/elog/{shard}', '{replica}') PARTITION BY date ORDER BY (engine_id) -SETTINGS replicated_deduplication_window = 2, cleanup_delay_period=4, cleanup_delay_period_random_add=0;" +SETTINGS replicated_deduplication_window = 2, cleanup_delay_period=4, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0;" $CLICKHOUSE_CLIENT --query="INSERT INTO elog VALUES (toDate('2018-10-01'), 1, 'hello')" $CLICKHOUSE_CLIENT --query="INSERT INTO elog VALUES (toDate('2018-10-01'), 2, 'hello')" diff --git a/tests/queries/0_stateless/00988_parallel_parts_removal.sql b/tests/queries/0_stateless/00988_parallel_parts_removal.sql index bff9bbe6d8d..5bd31ba1baa 100644 --- a/tests/queries/0_stateless/00988_parallel_parts_removal.sql +++ b/tests/queries/0_stateless/00988_parallel_parts_removal.sql @@ -1,6 +1,8 @@ DROP TABLE IF EXISTS mt; -CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS max_part_removal_threads = 16, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, old_parts_lifetime = 1, parts_to_delay_insert = 100000, parts_to_throw_insert = 100000; +CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x + SETTINGS max_part_removal_threads = 16, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, + cleanup_thread_preferred_points_per_iteration=0, old_parts_lifetime = 1, parts_to_delay_insert = 100000, parts_to_throw_insert = 100000; SYSTEM STOP MERGES mt; diff --git a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh index 5b1c50262bf..e0b7ab29924 100755 --- a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh +++ b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh @@ -13,8 +13,14 @@ $CLICKHOUSE_CLIENT -n -q " DROP TABLE IF EXISTS alter_table0; DROP TABLE IF EXISTS alter_table1; - CREATE TABLE alter_table0 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, replicated_max_mutations_in_one_entry = $(($RANDOM / 50 + 100)); - CREATE TABLE alter_table1 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, replicated_max_mutations_in_one_entry = $(($RANDOM / 50 + 200)); + CREATE TABLE alter_table0 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a PARTITION BY b % 10 + SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, + cleanup_thread_preferred_points_per_iteration=0, replicated_max_mutations_in_one_entry = $(($RANDOM / 50 + 100)); + CREATE TABLE alter_table1 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a PARTITION BY b % 10 + SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, + cleanup_thread_preferred_points_per_iteration=0, replicated_max_mutations_in_one_entry = $(($RANDOM / 50 + 200)); " function thread1() diff --git a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh index f4f38ad9c83..811681794a5 100755 --- a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh +++ b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh @@ -58,7 +58,8 @@ function thread6() $CLICKHOUSE_CLIENT -n -q "DROP TABLE IF EXISTS alter_table_$REPLICA; CREATE TABLE alter_table_$REPLICA (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r_$REPLICA') ORDER BY a PARTITION BY b % 10 - SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0, replicated_max_mutations_in_one_entry = $(($RANDOM / 50));"; + SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0, + cleanup_thread_preferred_points_per_iteration=0, replicated_max_mutations_in_one_entry = $(($RANDOM / 50));"; sleep 0.$RANDOM; done } diff --git a/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh index 5e9e69d999d..e0a84323dbd 100755 --- a/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh @@ -28,7 +28,8 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst;" $CLICKHOUSE_CLIENT --query="CREATE TABLE src (p UInt64, k String, d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/src1', '1') PARTITION BY p ORDER BY k;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE dst (p UInt64, k String, d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst1', '1') PARTITION BY p ORDER BY k SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE dst (p UInt64, k String, d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst1', '1') PARTITION BY p ORDER BY k +SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0;" $CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (0, '0', 1);" $CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1);" @@ -58,7 +59,8 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE dst;" $CLICKHOUSE_CLIENT --query="SELECT 'MOVE incompatible schema missing column';" $CLICKHOUSE_CLIENT --query="CREATE TABLE src (p UInt64, k String, d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/src2', '1') PARTITION BY p ORDER BY (d, p);" -$CLICKHOUSE_CLIENT --query="CREATE TABLE dst (p UInt64, d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst2', '1') PARTITION BY p ORDER BY (d, p) SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE dst (p UInt64, d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst2', '1') PARTITION BY p ORDER BY (d, p) +SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0;" $CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (0, '0', 1);" $CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1);" diff --git a/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh index 8ef03be02b6..06a460f3600 100755 --- a/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh @@ -11,7 +11,8 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst;" $CLICKHOUSE_CLIENT --query="CREATE TABLE src (p UInt64, k String) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/src', '1') PARTITION BY p ORDER BY k;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE dst (p UInt64, k String) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst', '1') PARTITION BY p ORDER BY k SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE dst (p UInt64, k String) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst', '1') PARTITION BY p ORDER BY k +SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0;" function thread1() { diff --git a/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh b/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh index 7f53bf2a627..5f69427c0cd 100755 --- a/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh +++ b/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh @@ -31,7 +31,8 @@ for i in $(seq $REPLICAS); do max_replicated_merges_in_queue = 1000, temporary_directories_lifetime = 10, cleanup_delay_period = 3, - cleanup_delay_period_random_add = 0" + cleanup_delay_period_random_add = 0, + cleanup_thread_preferred_points_per_iteration=0" done $CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_mutate_mt_1 SELECT number, number + 10, toString(number) from numbers(10)" diff --git a/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh index aec27792603..e508b77a0c2 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh @@ -12,7 +12,10 @@ for i in $(seq $REPLICAS); do done for i in $(seq $REPLICAS); do - $CLICKHOUSE_CLIENT --query "CREATE TABLE concurrent_alter_detach_$i (key UInt64, value1 UInt8, value2 UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_alter_detach', '$i') ORDER BY key SETTINGS max_replicated_mutations_in_queue=1000, number_of_free_entries_in_pool_to_execute_mutation=0,max_replicated_merges_in_queue=1000,temporary_directories_lifetime=10,cleanup_delay_period=3,cleanup_delay_period_random_add=0" + $CLICKHOUSE_CLIENT --query "CREATE TABLE concurrent_alter_detach_$i (key UInt64, value1 UInt8, value2 UInt8) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_alter_detach', '$i') ORDER BY key + SETTINGS max_replicated_mutations_in_queue=1000, number_of_free_entries_in_pool_to_execute_mutation=0,max_replicated_merges_in_queue=1000, + temporary_directories_lifetime=10,cleanup_delay_period=3,cleanup_delay_period_random_add=0,cleanup_thread_preferred_points_per_iteration=0" done $CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_alter_detach_1 SELECT number, number + 10, number from numbers(10)" diff --git a/tests/queries/0_stateless/01103_optimize_drop_race_zookeeper.sh b/tests/queries/0_stateless/01103_optimize_drop_race_zookeeper.sh index 95f8dfc0377..3461283b5ea 100755 --- a/tests/queries/0_stateless/01103_optimize_drop_race_zookeeper.sh +++ b/tests/queries/0_stateless/01103_optimize_drop_race_zookeeper.sh @@ -27,7 +27,9 @@ function thread3() { while true; do $CLICKHOUSE_CLIENT -n -q "DROP TABLE IF EXISTS concurrent_optimize_table; - CREATE TABLE concurrent_optimize_table (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_optimize_table', '1') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0;"; + CREATE TABLE concurrent_optimize_table (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_optimize_table', '1') ORDER BY a PARTITION BY b % 10 + SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0, cleanup_thread_preferred_points_per_iteration=0;"; sleep 0.$RANDOM; sleep 0.$RANDOM; sleep 0.$RANDOM; diff --git a/tests/queries/0_stateless/01158_zookeeper_log_long.sql b/tests/queries/0_stateless/01158_zookeeper_log_long.sql index 45771494af6..9b5ae7ad7c6 100644 --- a/tests/queries/0_stateless/01158_zookeeper_log_long.sql +++ b/tests/queries/0_stateless/01158_zookeeper_log_long.sql @@ -6,7 +6,7 @@ SET insert_keeper_fault_injection_probability=0; -- disable fault injection; par drop table if exists rmt sync; -- cleanup code will perform extra Exists -- (so the .reference will not match) -create table rmt (n int) engine=ReplicatedMergeTree('/test/01158/{database}/rmt', '1') order by n settings cleanup_delay_period=86400, replicated_can_become_leader=0; +create table rmt (n int) engine=ReplicatedMergeTree('/test/01158/{database}/rmt', '1') order by n settings cleanup_delay_period=86400, max_cleanup_delay_period=86400, replicated_can_become_leader=0; system sync replica rmt; insert into rmt values (1); insert into rmt values (1); diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh index 411705e0469..2d761df998e 100755 --- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh +++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh @@ -13,8 +13,10 @@ SCALE=5000 $CLICKHOUSE_CLIENT -n --query " DROP TABLE IF EXISTS r1; DROP TABLE IF EXISTS r2; - CREATE TABLE r1 (x UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/{shard}', '1{replica}') ORDER BY x SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 1, parts_to_throw_insert = 100000, max_replicated_logs_to_keep = 10; - CREATE TABLE r2 (x UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/{shard}', '2{replica}') ORDER BY x SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 1, parts_to_throw_insert = 100000, max_replicated_logs_to_keep = 10; + CREATE TABLE r1 (x UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/{shard}', '1{replica}') ORDER BY x + SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 1, cleanup_thread_preferred_points_per_iteration=0, parts_to_throw_insert = 100000, max_replicated_logs_to_keep = 10; + CREATE TABLE r2 (x UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/{shard}', '2{replica}') ORDER BY x + SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 1, cleanup_thread_preferred_points_per_iteration=0, parts_to_throw_insert = 100000, max_replicated_logs_to_keep = 10; DETACH TABLE r2; " diff --git a/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh b/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh index 80318ba67fb..c3c87eeaf8b 100755 --- a/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh +++ b/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh @@ -8,7 +8,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS table_for_renames0" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS table_for_renames50" -$CLICKHOUSE_CLIENT --query "CREATE TABLE table_for_renames0 (value UInt64, data String) ENGINE ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_rename', '1') ORDER BY tuple() SETTINGS cleanup_delay_period = 1, cleanup_delay_period_random_add = 0" +$CLICKHOUSE_CLIENT --query "CREATE TABLE table_for_renames0 (value UInt64, data String) +ENGINE ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_rename', '1') ORDER BY tuple() +SETTINGS cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, cleanup_thread_preferred_points_per_iteration=0" $CLICKHOUSE_CLIENT --query "INSERT INTO table_for_renames0 SELECT number, toString(number) FROM numbers(1000)" diff --git a/tests/queries/0_stateless/01509_parallel_quorum_and_merge_long.sh b/tests/queries/0_stateless/01509_parallel_quorum_and_merge_long.sh index 445706e35bf..bf88ad0e0b2 100755 --- a/tests/queries/0_stateless/01509_parallel_quorum_and_merge_long.sh +++ b/tests/queries/0_stateless/01509_parallel_quorum_and_merge_long.sh @@ -13,7 +13,8 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parallel_q1 SYNC" $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parallel_q2 SYNC" -$CLICKHOUSE_CLIENT -q "CREATE TABLE parallel_q1 (x UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/parallel_q', 'r1') ORDER BY tuple() SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0" +$CLICKHOUSE_CLIENT -q "CREATE TABLE parallel_q1 (x UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/parallel_q', 'r1') ORDER BY tuple() +SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0, cleanup_thread_preferred_points_per_iteration=0" $CLICKHOUSE_CLIENT -q "CREATE TABLE parallel_q2 (x UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/parallel_q', 'r2') ORDER BY tuple() SETTINGS always_fetch_merged_part = 1" diff --git a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh index a3682a3a74b..5e1600a0673 100755 --- a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh +++ b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh @@ -24,7 +24,8 @@ for i in $(seq 1 $NUM_REPLICAS); do ENGINE ReplicatedMergeTree('/test/01921_concurrent_ttl_and_normal_merges/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/ttl_table', '$i') ORDER BY tuple() TTL key + INTERVAL 1 SECOND - SETTINGS merge_with_ttl_timeout=1, max_replicated_merges_with_ttl_in_queue=100, max_number_of_merges_with_ttl_in_pool=100, cleanup_delay_period=1, cleanup_delay_period_random_add=0;" + SETTINGS merge_with_ttl_timeout=1, max_replicated_merges_with_ttl_in_queue=100, max_number_of_merges_with_ttl_in_pool=100, + cleanup_delay_period=1, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0;" done function optimize_thread diff --git a/tests/queries/0_stateless/02067_lost_part_s3.sql b/tests/queries/0_stateless/02067_lost_part_s3.sql index 12afdcd4421..7df15ab33c4 100644 --- a/tests/queries/0_stateless/02067_lost_part_s3.sql +++ b/tests/queries/0_stateless/02067_lost_part_s3.sql @@ -4,11 +4,17 @@ DROP TABLE IF EXISTS partslost_0; DROP TABLE IF EXISTS partslost_1; DROP TABLE IF EXISTS partslost_2; -CREATE TABLE partslost_0 (x String) ENGINE=ReplicatedMergeTree('/clickhouse/table/{database}_02067_lost/partslost', '0') ORDER BY tuple() SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 1; +CREATE TABLE partslost_0 (x String) ENGINE=ReplicatedMergeTree('/clickhouse/table/{database}_02067_lost/partslost', '0') ORDER BY tuple() + SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, old_parts_lifetime = 1, + cleanup_delay_period = 1, cleanup_delay_period_random_add = 1, cleanup_thread_preferred_points_per_iteration=0; -CREATE TABLE partslost_1 (x String) ENGINE=ReplicatedMergeTree('/clickhouse/table/{database}_02067_lost/partslost', '1') ORDER BY tuple() SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 1; +CREATE TABLE partslost_1 (x String) ENGINE=ReplicatedMergeTree('/clickhouse/table/{database}_02067_lost/partslost', '1') ORDER BY tuple() + SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, old_parts_lifetime = 1, + cleanup_delay_period = 1, cleanup_delay_period_random_add = 1, cleanup_thread_preferred_points_per_iteration=0; -CREATE TABLE partslost_2 (x String) ENGINE=ReplicatedMergeTree('/clickhouse/table/{database}_02067_lost/partslost', '2') ORDER BY tuple() SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 1; +CREATE TABLE partslost_2 (x String) ENGINE=ReplicatedMergeTree('/clickhouse/table/{database}_02067_lost/partslost', '2') ORDER BY tuple() + SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, old_parts_lifetime = 1, + cleanup_delay_period = 1, cleanup_delay_period_random_add = 1, cleanup_thread_preferred_points_per_iteration=0; INSERT INTO partslost_0 SELECT toString(number) AS x from system.numbers LIMIT 10000; diff --git a/tests/queries/0_stateless/02370_lost_part_intersecting_merges.sh b/tests/queries/0_stateless/02370_lost_part_intersecting_merges.sh index bc297cbb963..e34163d0502 100755 --- a/tests/queries/0_stateless/02370_lost_part_intersecting_merges.sh +++ b/tests/queries/0_stateless/02370_lost_part_intersecting_merges.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT -q "drop table if exists rmt1 sync;" $CLICKHOUSE_CLIENT -q "drop table if exists rmt2 sync;" $CLICKHOUSE_CLIENT -q "create table rmt1 (n int) engine=ReplicatedMergeTree('/test/02369/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/{database}', '1') order by n - settings cleanup_delay_period=0, cleanup_delay_period_random_add=0, old_parts_lifetime=0" + settings cleanup_delay_period=0, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0, old_parts_lifetime=0" $CLICKHOUSE_CLIENT -q "create table rmt2 (n int) engine=ReplicatedMergeTree('/test/02369/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/{database}', '2') order by n" $CLICKHOUSE_CLIENT -q "system stop replicated sends rmt2" diff --git a/tests/queries/0_stateless/02396_system_parts_race_condition_rm.sh b/tests/queries/0_stateless/02396_system_parts_race_condition_rm.sh index 5df1a9ba095..e31a091ff45 100755 --- a/tests/queries/0_stateless/02396_system_parts_race_condition_rm.sh +++ b/tests/queries/0_stateless/02396_system_parts_race_condition_rm.sh @@ -15,8 +15,12 @@ $CLICKHOUSE_CLIENT -n -q " DROP TABLE IF EXISTS alter_table0; DROP TABLE IF EXISTS alter_table1; - CREATE TABLE alter_table0 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0; - CREATE TABLE alter_table1 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0 + CREATE TABLE alter_table0 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a PARTITION BY b % 10 + SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, cleanup_thread_preferred_points_per_iteration=0; + CREATE TABLE alter_table1 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a PARTITION BY b % 10 + SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, cleanup_thread_preferred_points_per_iteration=0 " function thread1() diff --git a/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh b/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh index 548179b94c9..39e513f6be4 100755 --- a/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh +++ b/tests/queries/0_stateless/02397_system_parts_race_condition_drop_rm.sh @@ -58,7 +58,9 @@ function thread6() while true; do REPLICA=$(($RANDOM % 10)) $CLICKHOUSE_CLIENT -n -q "DROP TABLE IF EXISTS alter_table_$REPLICA; - CREATE TABLE alter_table_$REPLICA (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r_$REPLICA') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0;"; + CREATE TABLE alter_table_$REPLICA (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r_$REPLICA') ORDER BY a PARTITION BY b % 10 + SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0, cleanup_thread_preferred_points_per_iteration=0;"; sleep 0.$RANDOM; done } diff --git a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql index 88fb2cdf9b1..bab4bf7881c 100644 --- a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql +++ b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql @@ -8,7 +8,7 @@ drop table if exists rmt2; -- Disable compact parts, because we need hardlinks in mutations. create table rmt (n int, m int, k int) engine=ReplicatedMergeTree('/test/02432/{database}', '1') order by tuple() settings storage_policy = 's3_cache', allow_remote_fs_zero_copy_replication=1, - max_part_removal_threads=10, concurrent_part_removal_threshold=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1, + max_part_removal_threads=10, concurrent_part_removal_threshold=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0, max_replicated_merges_in_queue=0, max_replicated_mutations_in_queue=0, min_bytes_for_wide_part=0, min_rows_for_wide_part=0; insert into rmt(n, m) values (1, 42); @@ -38,7 +38,7 @@ select count(), sum(n), sum(m) from rmt; -- New table can assign merges/mutations and can remove old parts create table rmt2 (n int, m int, k String) engine=ReplicatedMergeTree('/test/02432/{database}', '2') order by tuple() settings storage_policy = 's3_cache', allow_remote_fs_zero_copy_replication=1, - max_part_removal_threads=10, concurrent_part_removal_threshold=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1, + max_part_removal_threads=10, concurrent_part_removal_threshold=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0, min_bytes_for_wide_part=0, min_rows_for_wide_part=0, max_replicated_merges_in_queue=1, old_parts_lifetime=0; diff --git a/tests/queries/0_stateless/02448_clone_replica_lost_part.sql b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql index 4befe952a14..44303a1c532 100644 --- a/tests/queries/0_stateless/02448_clone_replica_lost_part.sql +++ b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql @@ -5,9 +5,11 @@ SET insert_keeper_fault_injection_probability=0; -- disable fault injection; par drop table if exists rmt1; drop table if exists rmt2; create table rmt1 (n int) engine=ReplicatedMergeTree('/test/02448/{database}/rmt', '1') order by tuple() - settings min_replicated_logs_to_keep=1, max_replicated_logs_to_keep=2, cleanup_delay_period=0, cleanup_delay_period_random_add=1, old_parts_lifetime=0, max_parts_to_merge_at_once=4; + settings min_replicated_logs_to_keep=1, max_replicated_logs_to_keep=2, cleanup_delay_period=0, cleanup_delay_period_random_add=1, + cleanup_thread_preferred_points_per_iteration=0, old_parts_lifetime=0, max_parts_to_merge_at_once=4; create table rmt2 (n int) engine=ReplicatedMergeTree('/test/02448/{database}/rmt', '2') order by tuple() - settings min_replicated_logs_to_keep=1, max_replicated_logs_to_keep=2, cleanup_delay_period=0, cleanup_delay_period_random_add=1, old_parts_lifetime=0, max_parts_to_merge_at_once=4; + settings min_replicated_logs_to_keep=1, max_replicated_logs_to_keep=2, cleanup_delay_period=0, cleanup_delay_period_random_add=1, + cleanup_thread_preferred_points_per_iteration=0, old_parts_lifetime=0, max_parts_to_merge_at_once=4; -- insert part only on one replica system stop replicated sends rmt1; diff --git a/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql b/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql index 98427874160..b4504a55643 100644 --- a/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql +++ b/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql @@ -24,7 +24,8 @@ CREATE TABLE wikistat1 ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02494_zero_copy_and_projection', '1') ORDER BY (path, time) -SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0, allow_remote_fs_zero_copy_replication=1, min_bytes_for_wide_part=0; +SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0, + cleanup_thread_preferred_points_per_iteration=0, allow_remote_fs_zero_copy_replication=1, min_bytes_for_wide_part=0; CREATE TABLE wikistat2 ( @@ -49,7 +50,8 @@ CREATE TABLE wikistat2 ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02494_zero_copy_and_projection', '2') ORDER BY (path, time) -SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0, allow_remote_fs_zero_copy_replication=1, min_bytes_for_wide_part=0; +SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0, + cleanup_thread_preferred_points_per_iteration=0, allow_remote_fs_zero_copy_replication=1, min_bytes_for_wide_part=0; INSERT INTO wikistat1 SELECT toDateTime('2020-10-01 00:00:00'), 'hello', 'world', '/data/path', 10 from numbers(100); diff --git a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh index 458a5e95faa..bc6e7eeb214 100755 --- a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh +++ b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh @@ -13,7 +13,7 @@ $CLICKHOUSE_CLIENT -n --query " CREATE TABLE t_async_insert_cleanup ( KeyID UInt32 ) Engine = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup', '{replica}') - ORDER BY (KeyID) SETTINGS cleanup_delay_period = 1, cleanup_delay_period_random_add = 1, replicated_deduplication_window_for_async_inserts=10 + ORDER BY (KeyID) SETTINGS cleanup_delay_period = 1, cleanup_delay_period_random_add = 1, cleanup_thread_preferred_points_per_iteration=0, replicated_deduplication_window_for_async_inserts=10 " for i in {1..100}; do From dbf08b25fb8f569a33dc3a8b05862af9e61eb72a Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 23 May 2023 01:25:17 +0200 Subject: [PATCH 0168/1997] better scheduling of merge selecting task --- src/Storages/MergeTree/MergeTreeSettings.cpp | 24 +++ src/Storages/MergeTree/MergeTreeSettings.h | 4 +- .../ReplicatedMergeTreeCleanupThread.cpp | 2 +- src/Storages/StorageMergeTree.cpp | 3 +- src/Storages/StorageReplicatedMergeTree.cpp | 198 +++++++++++------- src/Storages/StorageReplicatedMergeTree.h | 2 + .../test.py | 3 +- .../test_merge_tree_empty_parts/test.py | 2 +- 8 files changed, 157 insertions(+), 81 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index 479e50fdebb..6df841059b9 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -175,5 +175,29 @@ void MergeTreeSettings::sanityCheck(size_t background_pool_tasks) const min_bytes_to_rebalance_partition_over_jbod, max_bytes_to_merge_at_max_space_in_pool / 1024); } + + if (max_cleanup_delay_period < cleanup_delay_period) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "The value of max_cleanup_delay_period setting ({}) must be greater than the value of cleanup_delay_period setting ({})", + max_cleanup_delay_period, cleanup_delay_period); + } + + if (max_merge_selecting_sleep_ms < merge_selecting_sleep_ms) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "The value of max_merge_selecting_sleep_ms setting ({}) must be greater than the value of merge_selecting_sleep_ms setting ({})", + max_merge_selecting_sleep_ms, merge_selecting_sleep_ms); + } + + if (merge_selecting_sleep_slowdown_factor < 1.f) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "The value of merge_selecting_sleep_slowdown_factor setting ({}) cannot be less than 1.0", + merge_selecting_sleep_slowdown_factor); + } } } diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 78d703e795c..56860342038 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -57,7 +57,9 @@ struct Settings; M(Bool, fsync_part_directory, false, "Do fsync for part directory after all part operations (writes, renames, etc.).", 0) \ M(UInt64, non_replicated_deduplication_window, 0, "How many last blocks of hashes should be kept on disk (0 - disabled).", 0) \ M(UInt64, max_parts_to_merge_at_once, 100, "Max amount of parts which can be merged at once (0 - disabled). Doesn't affect OPTIMIZE FINAL query.", 0) \ - M(UInt64, merge_selecting_sleep_ms, 5000, "Sleep time for merge selecting when no part selected, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ + M(UInt64, merge_selecting_sleep_ms, 5000, "Maximum sleep time for merge selecting, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ + M(UInt64, max_merge_selecting_sleep_ms, 60000, "Maximum sleep time for merge selecting, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ + M(Float, merge_selecting_sleep_slowdown_factor, 1.2f, "The sleep time for merge selecting task is multiplied by this factor when there's nothing to merge and divided when a merge was assigned", 0) \ M(UInt64, merge_tree_clear_old_temporary_directories_interval_seconds, 60, "The period of executing the clear old temporary directories operation in background.", 0) \ M(UInt64, merge_tree_clear_old_parts_interval_seconds, 1, "The period of executing the clear old parts operation in background.", 0) \ M(UInt64, merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds, 1ULL * 3600 * 24 * 30, "Remove old broken detached parts in the background if they remained intouched for a specified by this setting period of time.", 0) \ diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 35a860ebb42..bcc4dc749fb 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -108,7 +108,7 @@ void ReplicatedMergeTreeCleanupThread::wakeupEarlierIfNeeded() return; /// Do not re-check all parts too often (avoid constantly calling getNumberOfOutdatedPartsWithExpiredRemovalTime()) - if (!wakeup_check_timer.compareAndRestart(storage_settings->cleanup_delay_period / 4)) + if (!wakeup_check_timer.compareAndRestart(storage_settings->cleanup_delay_period / 4.0)) return; UInt64 prev_run_timestamp_ms = prev_cleanup_timestamp_ms.load(std::memory_order_relaxed); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 2c19d3ba122..cb8b78b4e0a 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1298,8 +1298,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign /// which is equal or more fresh than commands themselves. In extremely rare case it can happen that we will have alter /// in between we took snapshot above and selected commands. That is why we take new snapshot here. auto task = std::make_shared(*this, getInMemoryMetadataPtr(), mutate_entry, shared_lock, common_assignee_trigger); - assignee.scheduleMergeMutateTask(task); - return true; + return assignee.scheduleMergeMutateTask(task); } if (has_mutations) { diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 0698ab7bf38..a6152c22148 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -324,6 +325,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( /// Will be activated if we will achieve leader state. merge_selecting_task->deactivate(); + merge_selecting_sleep_ms = getSettings()->merge_selecting_sleep_ms; mutations_finalizing_task = getContext()->getSchedulePool().createTask( getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::mutationsFinalizingTask)", [this] { mutationsFinalizingTask(); }); @@ -414,6 +416,19 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( loadDataParts(skip_sanity_checks); + if (attach) + { + /// Provide better initial value of merge_selecting_sleep_ms on server startup + auto settings = getSettings(); + size_t max_parts_in_partition = getMaxPartsCountAndSizeForPartition().first; + if (settings->parts_to_delay_insert && max_parts_in_partition < settings->parts_to_delay_insert) + { + Float64 ratio = 1.0 - static_cast(max_parts_in_partition) / settings->parts_to_delay_insert; + merge_selecting_sleep_ms = static_cast(interpolateLinear(settings->merge_selecting_sleep_ms, + settings->max_merge_selecting_sleep_ms, ratio)); + } + } + if (!current_zookeeper) { if (!attach) @@ -3237,7 +3252,15 @@ void StorageReplicatedMergeTree::mergeSelectingTask() const bool cleanup = (storage_settings_ptr->clean_deleted_rows != CleanDeletedRows::Never); CreateMergeEntryResult create_result = CreateMergeEntryResult::Other; - try + enum class AttemptStatus + { + EntryCreated, + NeedRetry, + Limited, + CannotSelect, + }; + + auto try_assign_merge = [&]() -> AttemptStatus { /// We must select parts for merge under merge_selecting_mutex because other threads /// (OPTIMIZE queries) can assign new merges. @@ -3259,108 +3282,133 @@ void StorageReplicatedMergeTree::mergeSelectingTask() "Current background tasks memory usage: {}.", formatReadableSizeWithBinarySuffix(background_memory_tracker.getSoftLimit()), formatReadableSizeWithBinarySuffix(background_memory_tracker.get())); + return AttemptStatus::Limited; } - else if (merges_and_mutations_sum >= storage_settings_ptr->max_replicated_merges_in_queue) + + if (merges_and_mutations_sum >= storage_settings_ptr->max_replicated_merges_in_queue) { LOG_TRACE(log, "Number of queued merges ({}) and part mutations ({})" " is greater than max_replicated_merges_in_queue ({}), so won't select new parts to merge or mutate.", merges_and_mutations_queued.merges, merges_and_mutations_queued.mutations, storage_settings_ptr->max_replicated_merges_in_queue); + return AttemptStatus::Limited; } - else + + UInt64 max_source_parts_size_for_merge = merger_mutator.getMaxSourcePartsSizeForMerge( + storage_settings_ptr->max_replicated_merges_in_queue, merges_and_mutations_sum); + + UInt64 max_source_part_size_for_mutation = merger_mutator.getMaxSourcePartSizeForMutation(); + + bool merge_with_ttl_allowed = merges_and_mutations_queued.merges_with_ttl < storage_settings_ptr->max_replicated_merges_with_ttl_in_queue && + getTotalMergesWithTTLInMergeList() < storage_settings_ptr->max_number_of_merges_with_ttl_in_pool; + + auto future_merged_part = std::make_shared(); + if (storage_settings.get()->assign_part_uuids) + future_merged_part->uuid = UUIDHelpers::generateV4(); + + bool can_assign_merge = max_source_parts_size_for_merge > 0; + PartitionIdsHint partitions_to_merge_in; + if (can_assign_merge) { - UInt64 max_source_parts_size_for_merge = merger_mutator.getMaxSourcePartsSizeForMerge( - storage_settings_ptr->max_replicated_merges_in_queue, merges_and_mutations_sum); + auto lightweight_merge_pred = LocalMergePredicate(queue); + partitions_to_merge_in = merger_mutator.getPartitionsThatMayBeMerged( + max_source_parts_size_for_merge, lightweight_merge_pred, merge_with_ttl_allowed, NO_TRANSACTION_PTR); + if (partitions_to_merge_in.empty()) + can_assign_merge = false; + else + merge_pred.emplace(queue.getMergePredicate(zookeeper, partitions_to_merge_in)); + } - UInt64 max_source_part_size_for_mutation = merger_mutator.getMaxSourcePartSizeForMutation(); + if (can_assign_merge && + merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, *merge_pred, + merge_with_ttl_allowed, NO_TRANSACTION_PTR, nullptr, &partitions_to_merge_in) == SelectPartsDecision::SELECTED) + { + create_result = createLogEntryToMergeParts( + zookeeper, + future_merged_part->parts, + future_merged_part->name, + future_merged_part->uuid, + future_merged_part->part_format, + deduplicate, + deduplicate_by_columns, + cleanup, + nullptr, + merge_pred->getVersion(), + future_merged_part->merge_type); - bool merge_with_ttl_allowed = merges_and_mutations_queued.merges_with_ttl < storage_settings_ptr->max_replicated_merges_with_ttl_in_queue && - getTotalMergesWithTTLInMergeList() < storage_settings_ptr->max_number_of_merges_with_ttl_in_pool; - auto future_merged_part = std::make_shared(); - if (storage_settings.get()->assign_part_uuids) - future_merged_part->uuid = UUIDHelpers::generateV4(); + if (create_result == CreateMergeEntryResult::Ok) + return AttemptStatus::EntryCreated; + if (create_result == CreateMergeEntryResult::LogUpdated) + return AttemptStatus::NeedRetry; + } - bool can_assign_merge = max_source_parts_size_for_merge > 0; - PartitionIdsHint partitions_to_merge_in; - if (can_assign_merge) + /// If there are many mutations in queue, it may happen, that we cannot enqueue enough merges to merge all new parts + if (max_source_part_size_for_mutation == 0 || merges_and_mutations_queued.mutations >= storage_settings_ptr->max_replicated_mutations_in_queue) + return AttemptStatus::Limited; + + if (queue.countMutations() > 0) + { + /// We don't need the list of committing blocks to choose a part to mutate + if (!merge_pred) + merge_pred.emplace(queue.getMergePredicate(zookeeper, PartitionIdsHint{})); + + /// Choose a part to mutate. + DataPartsVector data_parts = getDataPartsVectorForInternalUsage(); + for (const auto & part : data_parts) { - auto lightweight_merge_pred = LocalMergePredicate(queue); - partitions_to_merge_in = merger_mutator.getPartitionsThatMayBeMerged( - max_source_parts_size_for_merge, lightweight_merge_pred, merge_with_ttl_allowed, NO_TRANSACTION_PTR); - if (partitions_to_merge_in.empty()) - can_assign_merge = false; - else - merge_pred.emplace(queue.getMergePredicate(zookeeper, partitions_to_merge_in)); - } + if (part->getBytesOnDisk() > max_source_part_size_for_mutation) + continue; - if (can_assign_merge && - merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, *merge_pred, - merge_with_ttl_allowed, NO_TRANSACTION_PTR, nullptr, &partitions_to_merge_in) == SelectPartsDecision::SELECTED) - { - create_result = createLogEntryToMergeParts( - zookeeper, - future_merged_part->parts, - future_merged_part->name, + std::optional> desired_mutation_version = merge_pred->getDesiredMutationVersion(part); + if (!desired_mutation_version) + continue; + + create_result = createLogEntryToMutatePart( + *part, future_merged_part->uuid, - future_merged_part->part_format, - deduplicate, - deduplicate_by_columns, - cleanup, - nullptr, - merge_pred->getVersion(), - future_merged_part->merge_type); - } - /// If there are many mutations in queue, it may happen, that we cannot enqueue enough merges to merge all new parts - else if (max_source_part_size_for_mutation > 0 && queue.countMutations() > 0 - && merges_and_mutations_queued.mutations < storage_settings_ptr->max_replicated_mutations_in_queue) - { - /// We don't need the list of committing blocks to choose a part to mutate - if (!merge_pred) - merge_pred.emplace(queue.getMergePredicate(zookeeper, PartitionIdsHint{})); + desired_mutation_version->first, + desired_mutation_version->second, + merge_pred->getVersion()); - /// Choose a part to mutate. - DataPartsVector data_parts = getDataPartsVectorForInternalUsage(); - for (const auto & part : data_parts) - { - if (part->getBytesOnDisk() > max_source_part_size_for_mutation) - continue; - - std::optional> desired_mutation_version = merge_pred->getDesiredMutationVersion(part); - if (!desired_mutation_version) - continue; - - create_result = createLogEntryToMutatePart( - *part, - future_merged_part->uuid, - desired_mutation_version->first, - desired_mutation_version->second, - merge_pred->getVersion()); - - if (create_result == CreateMergeEntryResult::Ok || - create_result == CreateMergeEntryResult::LogUpdated) - break; - } + if (create_result == CreateMergeEntryResult::Ok) + return AttemptStatus::EntryCreated; + if (create_result == CreateMergeEntryResult::LogUpdated) + return AttemptStatus::NeedRetry; } } + + return AttemptStatus::CannotSelect; + }; + + AttemptStatus result = AttemptStatus::CannotSelect; + try + { + result = try_assign_merge(); } catch (...) { tryLogCurrentException(log, __PRETTY_FUNCTION__); } - if (!is_leader) - return; - if (create_result != CreateMergeEntryResult::Ok - && create_result != CreateMergeEntryResult::LogUpdated) - { - merge_selecting_task->scheduleAfter(storage_settings_ptr->merge_selecting_sleep_ms); - } + if (result == AttemptStatus::EntryCreated || result == AttemptStatus::NeedRetry) + merge_selecting_sleep_ms = static_cast(merge_selecting_sleep_ms / storage_settings_ptr->merge_selecting_sleep_slowdown_factor); + else if (result == AttemptStatus::CannotSelect) + merge_selecting_sleep_ms = static_cast(merge_selecting_sleep_ms * storage_settings_ptr->merge_selecting_sleep_slowdown_factor); + + if (merge_selecting_sleep_ms < storage_settings_ptr->merge_selecting_sleep_ms) + merge_selecting_sleep_ms = storage_settings_ptr->merge_selecting_sleep_ms; + if (merge_selecting_sleep_ms > storage_settings_ptr->max_merge_selecting_sleep_ms) + merge_selecting_sleep_ms = storage_settings_ptr->max_merge_selecting_sleep_ms; + + if (result == AttemptStatus::EntryCreated) + merge_selecting_task->schedule(); else { - merge_selecting_task->schedule(); + LOG_TRACE(log, "Scheduling next merge selecting task after {}ms", merge_selecting_sleep_ms); + merge_selecting_task->scheduleAfter(merge_selecting_sleep_ms); } } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 01b86dd1425..5d877e4b7fa 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -456,6 +456,8 @@ private: /// It is acquired for each iteration of the selection of parts to merge or each OPTIMIZE query. std::mutex merge_selecting_mutex; + UInt64 merge_selecting_sleep_ms; + /// A task that marks finished mutations as done. BackgroundSchedulePool::TaskHolder mutations_finalizing_task; diff --git a/tests/integration/test_consistent_parts_after_clone_replica/test.py b/tests/integration/test_consistent_parts_after_clone_replica/test.py index 0c907340090..2771a874d68 100644 --- a/tests/integration/test_consistent_parts_after_clone_replica/test.py +++ b/tests/integration/test_consistent_parts_after_clone_replica/test.py @@ -13,7 +13,8 @@ def fill_nodes(nodes, shard): CREATE TABLE test_table(date Date, id UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test{shard}/replicated', '{replica}') ORDER BY id PARTITION BY toYYYYMM(date) - SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, cleanup_delay_period_random_add=0; + SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, + cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0; """.format( shard=shard, replica=node.name ) diff --git a/tests/integration/test_merge_tree_empty_parts/test.py b/tests/integration/test_merge_tree_empty_parts/test.py index 0f611408a67..212c0577c13 100644 --- a/tests/integration/test_merge_tree_empty_parts/test.py +++ b/tests/integration/test_merge_tree_empty_parts/test.py @@ -27,7 +27,7 @@ def test_empty_parts_alter_delete(started_cluster): "CREATE TABLE empty_parts_delete (d Date, key UInt64, value String) " "ENGINE = ReplicatedMergeTree('/clickhouse/tables/empty_parts_delete', 'r1') " "PARTITION BY toYYYYMM(d) ORDER BY key " - "SETTINGS old_parts_lifetime = 1" + "SETTINGS old_parts_lifetime = 1, cleanup_delay_period=0, cleanup_thread_preferred_points_per_iteration=0" ) node1.query("INSERT INTO empty_parts_delete VALUES (toDate('2020-10-10'), 1, 'a')") From c9aa3042b50ae1b691149ec9012c1521b01705ac Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 23 May 2023 02:28:23 +0200 Subject: [PATCH 0169/1997] fix --- .../02427_mutate_and_zero_copy_replication_zookeeper.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02427_mutate_and_zero_copy_replication_zookeeper.sql b/tests/queries/0_stateless/02427_mutate_and_zero_copy_replication_zookeeper.sql index 9b0a52b8dbd..e7e0f2f6c59 100644 --- a/tests/queries/0_stateless/02427_mutate_and_zero_copy_replication_zookeeper.sql +++ b/tests/queries/0_stateless/02427_mutate_and_zero_copy_replication_zookeeper.sql @@ -9,7 +9,7 @@ CREATE TABLE mutate_and_zero_copy_replication1 ) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_02427_mutate_and_zero_copy_replication/alter', '1') ORDER BY tuple() -SETTINGS old_parts_lifetime=0, cleanup_delay_period=300, cleanup_delay_period_random_add=300, min_bytes_for_wide_part = 0; +SETTINGS old_parts_lifetime=0, cleanup_delay_period=300, max_cleanup_delay_period=300, cleanup_delay_period_random_add=300, min_bytes_for_wide_part = 0; CREATE TABLE mutate_and_zero_copy_replication2 ( From b0b9f2a037918b8f745df952a9491b97de6fdada Mon Sep 17 00:00:00 2001 From: FFFFFFFHHHHHHH <916677625@qq.com> Date: Tue, 23 May 2023 10:39:23 +0800 Subject: [PATCH 0170/1997] fix test --- .../02415_all_new_functions_must_be_documented.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 79a6ad1fa2d..5ef83a57ecf 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -112,6 +112,7 @@ arrayFirstIndex arrayFirstOrNull arrayFlatten arrayIntersect +arrayJaccardIndex arrayJoin arrayLast arrayLastIndex From 84a97ca04a0f22becab1459bb1e557fe1a6104a8 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 23 May 2023 12:18:41 +0200 Subject: [PATCH 0171/1997] fix --- .../02427_mutate_and_zero_copy_replication_zookeeper.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02427_mutate_and_zero_copy_replication_zookeeper.sql b/tests/queries/0_stateless/02427_mutate_and_zero_copy_replication_zookeeper.sql index e7e0f2f6c59..e3c8583ccf4 100644 --- a/tests/queries/0_stateless/02427_mutate_and_zero_copy_replication_zookeeper.sql +++ b/tests/queries/0_stateless/02427_mutate_and_zero_copy_replication_zookeeper.sql @@ -19,7 +19,7 @@ CREATE TABLE mutate_and_zero_copy_replication2 ) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_02427_mutate_and_zero_copy_replication/alter', '2') ORDER BY tuple() -SETTINGS old_parts_lifetime=0, cleanup_delay_period=300, cleanup_delay_period_random_add=300; +SETTINGS old_parts_lifetime=0, cleanup_delay_period=300, max_cleanup_delay_period=300, cleanup_delay_period_random_add=300; INSERT INTO mutate_and_zero_copy_replication1 VALUES (1, '1', 1.0); From 616904cd790473ca8075a8175a6334dd837b5bca Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Tue, 23 May 2023 15:50:52 +0000 Subject: [PATCH 0172/1997] Add encryptConfig() --- src/Common/Config/ConfigProcessor.cpp | 34 +++++++++++++++++++++++++++ src/Common/Config/ConfigProcessor.h | 5 ++++ src/Common/Config/ConfigReloader.cpp | 1 + src/Daemon/BaseDaemon.cpp | 1 + 4 files changed, 41 insertions(+) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 5bbc8eae0de..76e4ea1ebd1 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -171,6 +171,33 @@ static void mergeAttributes(Element & config_element, Element & with_element) with_element_attributes->release(); } +void ConfigProcessor::encryptRecursive(Poco::XML::Node * config_root) +{ + for (Node * node = config_root->firstChild(); node;) + { + if (node->nodeType() == Node::ELEMENT_NODE) + { + // NamedNodeMapPtr attributes = node->attributes(); + Element & element = dynamic_cast(*node); + if (element.hasAttribute("enc_codec")) + { + LOG_DEBUG(log, "Encrypted node {} value '{}'.", node->nodeName(), element.getNodeValue()); + // for (Node * child_node = node->firstChild(); child_node;) + // { + // LOG_DEBUG(log, " Child node {} value '{}'.", child_node->nodeName(), child_node->getNodeValue()); + // child_node = child_node->nextSibling(); + // } + Node * child_node = node->firstChild(); + child_node->setNodeValue("encrypted_" + child_node->getNodeValue() + "_encrypted"); + } + } + + encryptRecursive(node); + + node = node->nextSibling(); + } +} + void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, const Node * with_root) { const NodeListPtr with_nodes = with_root->childNodes(); @@ -700,6 +727,13 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes( return LoadedConfig{configuration, has_zk_includes, !processed_successfully, config_xml, path}; } +void ConfigProcessor::encryptConfig(LoadedConfig & loaded_config) +{ + Node * config_root = getRootNode(loaded_config.preprocessed_xml.get()); + encryptRecursive(config_root); + loaded_config.configuration = new Poco::Util::XMLConfiguration(loaded_config.preprocessed_xml); +} + void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config, std::string preprocessed_dir) { try diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index 0ca3e46db88..2f0046bc39c 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -92,6 +92,9 @@ public: const zkutil::EventPtr & zk_changed_event, bool fallback_to_preprocessed = false); + /// Encrypt nodes in config with specified encryption attributes + void encryptConfig(LoadedConfig & loaded_config); + /// Save preprocessed config to specified directory. /// If preprocessed_dir is empty - calculate from loaded_config.path + /preprocessed_configs/ void savePreprocessedConfig(const LoadedConfig & loaded_config, std::string preprocessed_dir); @@ -124,6 +127,8 @@ private: using NodePtr = Poco::AutoPtr; + void encryptRecursive(Poco::XML::Node * config_root); + void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root); void merge(XMLDocumentPtr config, XMLDocumentPtr with); diff --git a/src/Common/Config/ConfigReloader.cpp b/src/Common/Config/ConfigReloader.cpp index de7011b67bf..896bd5949d9 100644 --- a/src/Common/Config/ConfigReloader.cpp +++ b/src/Common/Config/ConfigReloader.cpp @@ -130,6 +130,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac return; } config_processor.savePreprocessedConfig(loaded_config, preprocessed_dir); + config_processor.encryptConfig(loaded_config); /** We should remember last modification time if and only if config was successfully loaded * Otherwise a race condition could occur during config files update: diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 4780dfed4b2..2634439ee14 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -663,6 +663,7 @@ void BaseDaemon::initialize(Application & self) umask(umask_num); DB::ConfigProcessor(config_path).savePreprocessedConfig(loaded_config, ""); + DB::ConfigProcessor(config_path).encryptConfig(loaded_config); /// Write core dump on crash. { From 6de52e9fced5c71c24a2f40a2b13b9c3d5656b14 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 23 May 2023 18:36:02 +0000 Subject: [PATCH 0173/1997] Fixing some tests. --- src/Interpreters/InterpreterSelectQuery.cpp | 15 ++- src/Interpreters/PreparedSets.cpp | 36 ++++- src/Interpreters/PreparedSets.h | 32 +---- src/Planner/Planner.cpp | 15 ++- src/Processors/QueryPlan/CreatingSetsStep.h | 3 + .../Optimizations/filterPushDown.cpp | 13 ++ .../QueryPlan/ReadFromMergeTree.cpp | 107 +++++++++------ src/Processors/QueryPlan/ReadFromMergeTree.h | 44 +++++- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 127 ++++++------------ .../MergeTree/MergeTreeDataSelectExecutor.h | 2 +- .../MergeTreeIndexConditionBloomFilter.cpp | 10 ++ .../MergeTree/MergeTreeIndexInverted.cpp | 5 + src/Storages/MergeTree/RPNBuilder.cpp | 20 ++- src/Storages/SelectQueryInfo.h | 2 + 14 files changed, 259 insertions(+), 172 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index bd96ba693fe..8d305c07ce9 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -3088,12 +3088,17 @@ void InterpreterSelectQuery::executeExtremes(QueryPlan & query_plan) void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPlan & query_plan) { - auto step = std::make_unique( - query_plan.getCurrentDataStream(), - prepared_sets->detachSubqueries(context), - context); + auto subqueries = prepared_sets->detachSubqueries(context); - query_plan.addStep(std::move(step)); + if (!subqueries.empty()) + { + auto step = std::make_unique( + query_plan.getCurrentDataStream(), + std::move(subqueries), + context); + + query_plan.addStep(std::move(step)); + } } diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index 1d7d90432b0..cd6b2a81ba0 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -226,7 +226,7 @@ std::unique_ptr FutureSetFromSubquery::buildPlan(const ContextPtr & c if (set) return nullptr; - // std::cerr << StackTrace().toString() << std::endl; + //std::cerr << StackTrace().toString() << std::endl; auto set_cache = context->getPreparedSetsCache(); if (set_cache) @@ -294,4 +294,38 @@ FutureSetFromSubquery::FutureSetFromSubquery(SubqueryForSet subquery_) : subquer FutureSetFromStorage::FutureSetFromStorage(SetPtr set_) : set(std::move(set_)) {} +SetPtr FutureSetFromTuple::buildOrderedSetInplace(const ContextPtr & context) +{ + const auto & settings = context->getSettingsRef(); + auto size_limits = getSizeLimitsForSet(settings, true); + fill(size_limits, settings.transform_null_in, true); + return set; +} + +std::unique_ptr FutureSetFromTuple::build(const ContextPtr & context) +{ + const auto & settings = context->getSettingsRef(); + auto size_limits = getSizeLimitsForSet(settings, false); + fill(size_limits, settings.transform_null_in, false); + return nullptr; +} + +void FutureSetFromTuple::buildForTuple(SizeLimits size_limits, bool transform_null_in) +{ + fill(size_limits, transform_null_in, false); +} + +void FutureSetFromTuple::fill(SizeLimits size_limits, bool transform_null_in, bool create_ordered_set) +{ + //std::cerr << StackTrace().toString() << std::endl; + + if (set) + return; + + set = std::make_shared(size_limits, create_ordered_set, transform_null_in); + set->setHeader(block.cloneEmpty().getColumnsWithTypeAndName()); + set->insertFromBlock(block.getColumnsWithTypeAndName()); + set->finishInsert(); +} + }; diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index b4d01754ea8..ef7aba38f24 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -86,42 +86,18 @@ public: bool isFilled() const override { return true; } SetPtr get() const override { return set; } - SetPtr buildOrderedSetInplace(const ContextPtr & context) override - { - const auto & settings = context->getSettingsRef(); - auto size_limits = getSizeLimitsForSet(settings, true); - fill(size_limits, settings.transform_null_in, true); - return set; - } + SetPtr buildOrderedSetInplace(const ContextPtr & context) override; - std::unique_ptr build(const ContextPtr & context) override - { - const auto & settings = context->getSettingsRef(); - auto size_limits = getSizeLimitsForSet(settings, false); - fill(size_limits, settings.transform_null_in, false); - return nullptr; - } + std::unique_ptr build(const ContextPtr & context) override; - void buildForTuple(SizeLimits size_limits, bool transform_null_in) - { - fill(size_limits, transform_null_in, false); - } + void buildForTuple(SizeLimits size_limits, bool transform_null_in); private: Block block; SetPtr set; - void fill(SizeLimits size_limits, bool transform_null_in, bool create_ordered_set) - { - if (set) - return; - - set = std::make_shared(size_limits, create_ordered_set, transform_null_in); - set->setHeader(block.cloneEmpty().getColumnsWithTypeAndName()); - set->insertFromBlock(block.getColumnsWithTypeAndName()); - set->finishInsert(); - } + void fill(SizeLimits size_limits, bool transform_null_in, bool create_ordered_set); }; /// Information on how to build set for the [GLOBAL] IN section. diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 38d0aa29d24..b1780212e51 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1468,12 +1468,17 @@ void Planner::buildPlanForQueryNode() if (!select_query_options.only_analyze) { - auto step = std::make_unique( - query_plan.getCurrentDataStream(), - planner_context->getPreparedSets().detachSubqueries(planner_context->getQueryContext()), - planner_context->getQueryContext()); + auto subqueries = planner_context->getPreparedSets().detachSubqueries(planner_context->getQueryContext()); - query_plan.addStep(std::move(step)); + if (!subqueries.empty()) + { + auto step = std::make_unique( + query_plan.getCurrentDataStream(), + std::move(subqueries), + planner_context->getQueryContext()); + + query_plan.addStep(std::move(step)); + } //addCreatingSetsStep(query_plan, planner_context->getPreparedSets().detachSubqueries(planner_context->getQueryContext()), planner_context->getQueryContext()); //addBuildSubqueriesForSetsStepIfNeeded(query_plan, select_query_options, planner_context, result_actions_to_execute); diff --git a/src/Processors/QueryPlan/CreatingSetsStep.h b/src/Processors/QueryPlan/CreatingSetsStep.h index 96ab26077fc..244bb27ba78 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.h +++ b/src/Processors/QueryPlan/CreatingSetsStep.h @@ -58,6 +58,9 @@ public: static std::vector> makePlansForSets(DelayedCreatingSetsStep && step); + ContextPtr getContext() const { return context; } + PreparedSets::SubqueriesForSets detachSubqueries() { return std::move(subqueries_for_sets); } + private: PreparedSets::SubqueriesForSets subqueries_for_sets; ContextPtr context; diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 37bc894339f..63ba3d5b56c 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -268,6 +268,19 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes return 2; } + if (auto * delayed = typeid_cast(child.get())) + { + /// CreatingSets does not change header. + /// We can push down filter and update header. + /// Filter - DelayedCreatingSets - Something + child = std::make_unique(filter->getOutputStream(), delayed->detachSubqueries(), delayed->getContext()); + std::swap(parent, child); + std::swap(parent_node->children, child_node->children); + std::swap(parent_node->children.front(), child_node->children.front()); + /// DelayedCreatingSets - Filter - Something + return 2; + } + if (auto * totals_having = typeid_cast(child.get())) { /// If totals step has HAVING expression, skip it for now. diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index ef013a36069..a096538a298 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1127,7 +1127,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead(Merge real_column_names, sample_factor_column_queried, log, - key_condition); + indexes); } static ActionsDAGPtr buildFilterDAG( @@ -1171,14 +1171,14 @@ static ActionsDAGPtr buildFilterDAG( return ActionsDAG::buildFilterActionsDAG(nodes, node_name_to_input_node_column, context); } -static void buildKeyCondition( - std::optional & key_condition, +static void buildIndexes( + std::optional & indexes, ActionsDAGPtr filter_actions_dag, const ContextPtr & context, const SelectQueryInfo & query_info, const StorageMetadataPtr & metadata_snapshot) { - key_condition.reset(); + indexes.reset(); // Build and check if primary key is used when necessary const auto & primary_key = metadata_snapshot->getPrimaryKey(); @@ -1191,16 +1191,58 @@ static void buildKeyCondition( if (query_info.syntax_analyzer_result) array_join_name_set = query_info.syntax_analyzer_result->getArrayJoinSourceNameSet(); - key_condition.emplace(filter_actions_dag, + indexes.emplace(ReadFromMergeTree::Indexes{{ + filter_actions_dag, context, primary_key_column_names, primary_key.expression, - array_join_name_set); + array_join_name_set}, {}, false}); } else { - key_condition.emplace(query_info, context, primary_key_column_names, primary_key.expression); + indexes.emplace(ReadFromMergeTree::Indexes{{ + query_info, + context, + primary_key_column_names, + primary_key.expression}, {}, false}); } + + indexes->use_skip_indexes = settings.use_skip_indexes; + bool final = query_info.isFinal(); + + if (final && !settings.use_skip_indexes_if_final) + indexes->use_skip_indexes = false; + + if (!indexes->use_skip_indexes) + return; + + UsefulSkipIndexes skip_indexes; + using Key = std::pair; + std::map merged; + + for (const auto & index : metadata_snapshot->getSecondaryIndices()) + { + auto index_helper = MergeTreeIndexFactory::instance().get(index); + if (index_helper->isMergeable()) + { + auto [it, inserted] = merged.emplace(Key{index_helper->index.type, index_helper->getGranularity()}, skip_indexes.merged_indices.size()); + if (inserted) + { + skip_indexes.merged_indices.emplace_back(); + skip_indexes.merged_indices.back().condition = index_helper->createIndexMergedCondition(query_info, metadata_snapshot); + } + + skip_indexes.merged_indices[it->second].addIndex(index_helper); + } + else + { + auto condition = index_helper->createIndexCondition(query_info, context); + if (!condition->alwaysUnknownOrTrue()) + skip_indexes.useful_indices.emplace_back(index_helper, condition); + } + } + + indexes->skip_indexes = std::move(skip_indexes); } void ReadFromMergeTree::onAddFilterFinish() @@ -1208,7 +1250,7 @@ void ReadFromMergeTree::onAddFilterFinish() if (!filter_nodes.nodes.empty()) { auto filter_actions_dag = buildFilterDAG(context, prewhere_info, filter_nodes, query_info); - buildKeyCondition(key_condition, filter_actions_dag, context, query_info, metadata_for_reading); + buildIndexes(indexes, filter_actions_dag, context, query_info, metadata_for_reading); } } @@ -1226,7 +1268,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( const Names & real_column_names, bool sample_factor_column_queried, Poco::Logger * log, - std::optional & key_condition) + std::optional & indexes) { const auto & settings = context->getSettingsRef(); if (settings.allow_experimental_analyzer || settings.query_plan_optimize_primary_key) @@ -1246,7 +1288,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( real_column_names, sample_factor_column_queried, log, - key_condition); + indexes); } return selectRangesToReadImpl( @@ -1261,7 +1303,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( real_column_names, sample_factor_column_queried, log, - key_condition); + indexes); } MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( @@ -1276,7 +1318,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( const Names & real_column_names, bool sample_factor_column_queried, Poco::Logger * log, - std::optional & key_condition) + std::optional & indexes) { AnalysisResult result; const auto & settings = context->getSettingsRef(); @@ -1323,10 +1365,10 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( // } // } - if (!key_condition) - buildKeyCondition(key_condition, query_info.filter_actions_dag, context, query_info, metadata_snapshot); + if (!indexes) + buildIndexes(indexes, query_info.filter_actions_dag, context, query_info, metadata_snapshot); - if (settings.force_primary_key && key_condition->alwaysUnknownOrTrue()) + if (settings.force_primary_key && indexes->key_condition.alwaysUnknownOrTrue()) { return std::make_shared(MergeTreeDataSelectAnalysisResult{ .result = std::make_exception_ptr(Exception( @@ -1334,9 +1376,9 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( "Primary key ({}) is not used and setting 'force_primary_key' is set", fmt::join(primary_key_column_names, ", ")))}); } - LOG_DEBUG(log, "Key condition: {}", key_condition->toString()); + LOG_DEBUG(log, "Key condition: {}", indexes->key_condition.toString()); - if (key_condition->alwaysFalse()) + if (indexes->key_condition.alwaysFalse()) return std::make_shared(MergeTreeDataSelectAnalysisResult{.result = std::move(result)}); size_t total_marks_pk = 0; @@ -1358,7 +1400,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( query_info, metadata_snapshot->getColumns().getAllPhysical(), parts, - *key_condition, + indexes->key_condition, data, metadata_snapshot, context, @@ -1374,23 +1416,17 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( auto reader_settings = getMergeTreeReaderSettings(context, query_info); - bool use_skip_indexes = settings.use_skip_indexes; - bool final = isFinal(query_info); - - if (final && !settings.use_skip_indexes_if_final) - use_skip_indexes = false; - result.parts_with_ranges = MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipIndexes( std::move(parts), metadata_snapshot, - query_info, context, - *key_condition, + indexes->key_condition, + indexes->skip_indexes, reader_settings, log, num_streams, result.index_stats, - use_skip_indexes); + indexes->use_skip_indexes); } catch (...) { @@ -1438,7 +1474,7 @@ bool ReadFromMergeTree::requestReadingInOrder(size_t prefix_size, int direction, /// Disable read-in-order optimization for reverse order with final. /// Otherwise, it can lead to incorrect final behavior because the implementation may rely on the reading in direct order). - if (direction != 1 && isFinal(query_info)) + if (direction != 1 && query_info.isFinal()) return false; auto order_info = std::make_shared(SortDescription{}, prefix_size, direction, limit); @@ -1561,11 +1597,7 @@ ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() const bool ReadFromMergeTree::isQueryWithFinal() const { - const auto & select = query_info.query->as(); - if (query_info.table_expression_modifiers) - return query_info.table_expression_modifiers->hasFinal(); - else - return select.final(); + return query_info.isFinal(); } bool ReadFromMergeTree::isQueryWithSampling() const @@ -2009,15 +2041,6 @@ void ReadFromMergeTree::describeIndexes(JSONBuilder::JSONMap & map) const } } -bool ReadFromMergeTree::isFinal(const SelectQueryInfo & query_info) -{ - if (query_info.table_expression_modifiers) - return query_info.table_expression_modifiers->hasFinal(); - - const auto & select = query_info.query->as(); - return select.final(); -} - bool MergeTreeDataSelectAnalysisResult::error() const { return std::holds_alternative(result); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index f13f75bfebc..6610b463726 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -24,6 +24,35 @@ struct MergeTreeDataSelectSamplingData ActionsDAGPtr filter_expression; }; +struct UsefulSkipIndexes +{ + struct DataSkippingIndexAndCondition + { + MergeTreeIndexPtr index; + MergeTreeIndexConditionPtr condition; + + DataSkippingIndexAndCondition(MergeTreeIndexPtr index_, MergeTreeIndexConditionPtr condition_) + : index(index_), condition(condition_) + { + } + }; + + struct MergedDataSkippingIndexAndCondition + { + std::vector indices; + MergeTreeIndexMergedConditionPtr condition; + + void addIndex(const MergeTreeIndexPtr & index) + { + indices.push_back(index); + condition->addIndex(indices.back()); + } + }; + + std::vector useful_indices; + std::vector merged_indices; +}; + struct MergeTreeDataSelectAnalysisResult; using MergeTreeDataSelectAnalysisResultPtr = std::shared_ptr; @@ -132,6 +161,13 @@ public: UInt64 getSelectedRows() const { return selected_rows; } UInt64 getSelectedMarks() const { return selected_marks; } + struct Indexes + { + KeyCondition key_condition; + UsefulSkipIndexes skip_indexes; + bool use_skip_indexes; + }; + static MergeTreeDataSelectAnalysisResultPtr selectRangesToRead( MergeTreeData::DataPartsVector parts, const PrewhereInfoPtr & prewhere_info, @@ -146,7 +182,7 @@ public: const Names & real_column_names, bool sample_factor_column_queried, Poco::Logger * log, - std::optional & key_condition); + std::optional & indexes); MergeTreeDataSelectAnalysisResultPtr selectRangesToRead(MergeTreeData::DataPartsVector parts) const; @@ -159,8 +195,6 @@ public: bool requestReadingInOrder(size_t prefix_size, int direction, size_t limit); void updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value); - - static bool isFinal(const SelectQueryInfo & query_info); bool isQueryWithFinal() const; bool isQueryWithSampling() const; @@ -193,7 +227,7 @@ private: const Names & real_column_names, bool sample_factor_column_queried, Poco::Logger * log, - std::optional & key_condition); + std::optional & indexes); int getSortDirection() const { @@ -233,7 +267,7 @@ private: std::shared_ptr max_block_numbers_to_read; /// Pre-computed value, needed to trigger sets creating for PK - mutable std::optional key_condition; + mutable std::optional indexes; Poco::Logger * log; UInt64 selected_parts = 0; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 55dfc56d8ac..f99e15c0fc1 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -887,9 +887,9 @@ void MergeTreeDataSelectExecutor::filterPartsByPartition( RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipIndexes( MergeTreeData::DataPartsVector && parts, StorageMetadataPtr metadata_snapshot, - const SelectQueryInfo & query_info, const ContextPtr & context, const KeyCondition & key_condition, + const UsefulSkipIndexes & skip_indexes, const MergeTreeReaderSettings & reader_settings, Poco::Logger * log, size_t num_streams, @@ -900,66 +900,6 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd parts_with_ranges.resize(parts.size()); const Settings & settings = context->getSettingsRef(); - /// Let's start analyzing all useful indices - - struct IndexStat - { - std::atomic total_granules{0}; - std::atomic granules_dropped{0}; - std::atomic total_parts{0}; - std::atomic parts_dropped{0}; - }; - - struct DataSkippingIndexAndCondition - { - MergeTreeIndexPtr index; - MergeTreeIndexConditionPtr condition; - IndexStat stat; - - DataSkippingIndexAndCondition(MergeTreeIndexPtr index_, MergeTreeIndexConditionPtr condition_) - : index(index_), condition(condition_) - { - } - }; - - struct MergedDataSkippingIndexAndCondition - { - std::vector indices; - MergeTreeIndexMergedConditionPtr condition; - IndexStat stat; - - void addIndex(const MergeTreeIndexPtr & index) - { - indices.push_back(index); - condition->addIndex(indices.back()); - } - }; - - std::list useful_indices; - std::map, MergedDataSkippingIndexAndCondition> merged_indices; - - if (use_skip_indexes) - { - for (const auto & index : metadata_snapshot->getSecondaryIndices()) - { - auto index_helper = MergeTreeIndexFactory::instance().get(index); - if (index_helper->isMergeable()) - { - auto [it, inserted] = merged_indices.try_emplace({index_helper->index.type, index_helper->getGranularity()}); - if (inserted) - it->second.condition = index_helper->createIndexMergedCondition(query_info, metadata_snapshot); - - it->second.addIndex(index_helper); - } - else - { - auto condition = index_helper->createIndexCondition(query_info, context); - if (!condition->alwaysUnknownOrTrue()) - useful_indices.emplace_back(index_helper, condition); - } - } - } - if (use_skip_indexes && settings.force_data_skipping_indices.changed) { const auto & indices = settings.force_data_skipping_indices.toString(); @@ -977,7 +917,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "No indices parsed from force_data_skipping_indices ('{}')", indices); std::unordered_set useful_indices_names; - for (const auto & useful_index : useful_indices) + for (const auto & useful_index : skip_indexes.useful_indices) useful_indices_names.insert(useful_index.index->index.name); for (const auto & index_name : forced_indices) @@ -992,6 +932,17 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd } } + struct IndexStat + { + std::atomic total_granules{0}; + std::atomic granules_dropped{0}; + std::atomic total_parts{0}; + std::atomic parts_dropped{0}; + }; + + std::vector useful_indices_stat(skip_indexes.useful_indices.size()); + std::vector merged_indices_stat(skip_indexes.merged_indices.size()); + std::atomic sum_marks_pk = 0; std::atomic sum_parts_pk = 0; @@ -1018,12 +969,14 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd if (!ranges.ranges.empty()) sum_parts_pk.fetch_add(1, std::memory_order_relaxed); - for (auto & index_and_condition : useful_indices) + for (size_t idx = 0; idx < skip_indexes.useful_indices.size(); ++idx) { if (ranges.ranges.empty()) break; - index_and_condition.stat.total_parts.fetch_add(1, std::memory_order_relaxed); + auto & index_and_condition = skip_indexes.useful_indices[idx]; + auto & stat = useful_indices_stat[idx]; + stat.total_parts.fetch_add(1, std::memory_order_relaxed); size_t total_granules = 0; size_t granules_dropped = 0; @@ -1040,19 +993,21 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd uncompressed_cache.get(), log); - index_and_condition.stat.total_granules.fetch_add(total_granules, std::memory_order_relaxed); - index_and_condition.stat.granules_dropped.fetch_add(granules_dropped, std::memory_order_relaxed); + stat.total_granules.fetch_add(total_granules, std::memory_order_relaxed); + stat.granules_dropped.fetch_add(granules_dropped, std::memory_order_relaxed); if (ranges.ranges.empty()) - index_and_condition.stat.parts_dropped.fetch_add(1, std::memory_order_relaxed); + stat.parts_dropped.fetch_add(1, std::memory_order_relaxed); } - for (auto & [_, indices_and_condition] : merged_indices) + for (size_t idx = 0; idx < skip_indexes.merged_indices.size(); ++idx) { if (ranges.ranges.empty()) break; - indices_and_condition.stat.total_parts.fetch_add(1, std::memory_order_relaxed); + auto & indices_and_condition = skip_indexes.merged_indices[idx]; + auto & stat = merged_indices_stat[idx]; + stat.total_parts.fetch_add(1, std::memory_order_relaxed); size_t total_granules = 0; size_t granules_dropped = 0; @@ -1063,11 +1018,11 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd total_granules, granules_dropped, mark_cache.get(), uncompressed_cache.get(), log); - indices_and_condition.stat.total_granules.fetch_add(total_granules, std::memory_order_relaxed); - indices_and_condition.stat.granules_dropped.fetch_add(granules_dropped, std::memory_order_relaxed); + stat.total_granules.fetch_add(total_granules, std::memory_order_relaxed); + stat.granules_dropped.fetch_add(granules_dropped, std::memory_order_relaxed); if (ranges.ranges.empty()) - indices_and_condition.stat.parts_dropped.fetch_add(1, std::memory_order_relaxed); + stat.parts_dropped.fetch_add(1, std::memory_order_relaxed); } if (!ranges.ranges.empty()) @@ -1134,15 +1089,17 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd .num_granules_after = sum_marks_pk.load(std::memory_order_relaxed)}); } - for (const auto & index_and_condition : useful_indices) + for (size_t idx = 0; idx < skip_indexes.useful_indices.size(); ++idx) { + const auto & index_and_condition = skip_indexes.useful_indices[idx]; + const auto & stat = useful_indices_stat[idx]; const auto & index_name = index_and_condition.index->index.name; LOG_DEBUG( log, "Index {} has dropped {}/{} granules.", backQuote(index_name), - index_and_condition.stat.granules_dropped, - index_and_condition.stat.total_granules); + stat.granules_dropped, + stat.total_granules); std::string description = index_and_condition.index->index.type + " GRANULARITY " + std::to_string(index_and_condition.index->index.granularity); @@ -1151,25 +1108,27 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd .type = ReadFromMergeTree::IndexType::Skip, .name = index_name, .description = std::move(description), - .num_parts_after = index_and_condition.stat.total_parts - index_and_condition.stat.parts_dropped, - .num_granules_after = index_and_condition.stat.total_granules - index_and_condition.stat.granules_dropped}); + .num_parts_after = stat.total_parts - stat.parts_dropped, + .num_granules_after = stat.total_granules - stat.granules_dropped}); } - for (const auto & [type_with_granularity, index_and_condition] : merged_indices) + for (size_t idx = 0; idx < skip_indexes.merged_indices.size(); ++idx) { + const auto & index_and_condition = skip_indexes.merged_indices[idx]; + const auto & stat = merged_indices_stat[idx]; const auto & index_name = "Merged"; LOG_DEBUG(log, "Index {} has dropped {}/{} granules.", backQuote(index_name), - index_and_condition.stat.granules_dropped, index_and_condition.stat.total_granules); + stat.granules_dropped, stat.total_granules); - std::string description = "MERGED GRANULARITY " + std::to_string(type_with_granularity.second); + std::string description = "MERGED GRANULARITY " + std::to_string(index_and_condition.indices.at(0)->index.granularity); index_stats.emplace_back(ReadFromMergeTree::IndexStat{ .type = ReadFromMergeTree::IndexType::Skip, .name = index_name, .description = std::move(description), - .num_parts_after = index_and_condition.stat.total_parts - index_and_condition.stat.parts_dropped, - .num_granules_after = index_and_condition.stat.total_granules - index_and_condition.stat.granules_dropped}); + .num_parts_after = stat.total_parts - stat.parts_dropped, + .num_granules_after = stat.total_granules - stat.granules_dropped}); } return parts_with_ranges; @@ -1291,7 +1250,7 @@ MergeTreeDataSelectAnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar selectColumnNames(column_names_to_return, data, real_column_names, virt_column_names, sample_factor_column_queried); - std::optional key_condition; + std::optional indexes; return ReadFromMergeTree::selectRangesToRead( std::move(parts), prewhere_info, @@ -1306,7 +1265,7 @@ MergeTreeDataSelectAnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar real_column_names, sample_factor_column_queried, log, - key_condition); + indexes); } QueryPlanStepPtr MergeTreeDataSelectExecutor::readFromParts( diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index a337574bb64..8c8ce59bebe 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -189,9 +189,9 @@ public: static RangesInDataParts filterPartsByPrimaryKeyAndSkipIndexes( MergeTreeData::DataPartsVector && parts, StorageMetadataPtr metadata_snapshot, - const SelectQueryInfo & query_info, const ContextPtr & context, const KeyCondition & key_condition, + const UsefulSkipIndexes & skip_indexes, const MergeTreeReaderSettings & reader_settings, Poco::Logger * log, size_t num_streams, diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index 5e186c25b83..2bd9db12b93 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -310,14 +310,24 @@ bool MergeTreeIndexConditionBloomFilter::traverseFunction(const RPNBuilderTreeNo if (functionIsInOrGlobalInOperator(function_name)) { + //std::cerr << StackTrace().toString() << std::endl; + auto future_set = rhs_argument.tryGetPreparedSet(); + + //std::cerr << "==== Finding set for MergeTreeBF " << bool(future_set) << std::endl; + if (future_set && !future_set->isReady()) + { + //std::cerr << "==== not ready, building " << std::endl; future_set->buildOrderedSetInplace(rhs_argument.getTreeContext().getQueryContext()); + } ConstSetPtr prepared_set; if (future_set) prepared_set = future_set->get(); + //std::cerr << "==== Prep set for MergeTreeBF " << bool(prepared_set) << ' ' << (prepared_set ? prepared_set->hasExplicitSetElements() : false) << std::endl; + if (prepared_set && prepared_set->hasExplicitSetElements()) { const auto prepared_info = getPreparedSetInfo(prepared_set); diff --git a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp index 6ffba0ad029..a64f81807ae 100644 --- a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp @@ -655,7 +655,12 @@ bool MergeTreeConditionInverted::tryPrepareSetGinFilter( if (key_tuple_mapping.empty()) return false; + //std::cerr << "==== Finding set for MergeTreeConditionInverted\n"; + auto future_set = rhs.tryGetPreparedSet(); + + //std::cerr << "==== Set for MergeTreeConditionInverted" << bool(future_set) << std::endl; + if (future_set && !future_set->isReady()) future_set->buildOrderedSetInplace(rhs.getTreeContext().getQueryContext()); diff --git a/src/Storages/MergeTree/RPNBuilder.cpp b/src/Storages/MergeTree/RPNBuilder.cpp index e8843ff1489..cc7ec45be6a 100644 --- a/src/Storages/MergeTree/RPNBuilder.cpp +++ b/src/Storages/MergeTree/RPNBuilder.cpp @@ -298,11 +298,29 @@ FutureSetPtr RPNBuilderTreeNode::tryGetPreparedSet() const if (ast_node && prepared_sets) { - return prepared_sets->getFuture(PreparedSetKey::forSubquery(ast_node->getTreeHash())); + auto hash = ast_node->getTreeHash(); + auto key = PreparedSetKey::forSubquery(hash); + + // std::cerr << ".........Getting from AST \n" << ast_node->dumpTree() << std::endl + // << key.toString() << std::endl; + + for (const auto & [k, v] : prepared_sets->getSets()) + { + // std::cerr << "........... " << k.toString() << std::endl; + if (k.ast_hash == hash) + return v; + } + + //return prepared_sets->getFuture(PreparedSetKey::forSubquery(ast_node->getTreeHash())); } else if (dag_node) { + + // std::cerr << "...........Getting from DAG\n"; const auto * node_without_alias = getNodeWithoutAlias(dag_node); + // std::cerr << ".......... node_without_alias : " << node_without_alias->result_name + // << ' ' << node_without_alias->result_type->getName() + // << ' ' << (node_without_alias->column ? node_without_alias->column->getName() : "") << std::endl; return tryGetSetFromDAGNode(node_without_alias); } diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index fb895d04b8f..a8eb00adc87 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -264,5 +264,7 @@ struct SelectQueryInfo { return input_order_info ? input_order_info : (projection ? projection->input_order_info : nullptr); } + + bool isFinal() const; }; } From f3b4959e059640a9b786f421b3fe42f9a1fb4be6 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 23 May 2023 19:37:35 +0200 Subject: [PATCH 0174/1997] fix --- src/Storages/StorageReplicatedMergeTree.cpp | 7 +++++-- tests/integration/test_merge_tree_empty_parts/test.py | 2 +- .../queries/0_stateless/02448_clone_replica_lost_part.sql | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index a6152c22148..fc90ff550c7 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3393,10 +3393,13 @@ void StorageReplicatedMergeTree::mergeSelectingTask() } + Float32 new_sleep_ms = merge_selecting_sleep_ms; if (result == AttemptStatus::EntryCreated || result == AttemptStatus::NeedRetry) - merge_selecting_sleep_ms = static_cast(merge_selecting_sleep_ms / storage_settings_ptr->merge_selecting_sleep_slowdown_factor); + new_sleep_ms /= storage_settings_ptr->merge_selecting_sleep_slowdown_factor; else if (result == AttemptStatus::CannotSelect) - merge_selecting_sleep_ms = static_cast(merge_selecting_sleep_ms * storage_settings_ptr->merge_selecting_sleep_slowdown_factor); + new_sleep_ms *= storage_settings_ptr->merge_selecting_sleep_slowdown_factor; + new_sleep_ms *= std::uniform_real_distribution(1.f, 1.1f)(thread_local_rng); + merge_selecting_sleep_ms = static_cast(new_sleep_ms); if (merge_selecting_sleep_ms < storage_settings_ptr->merge_selecting_sleep_ms) merge_selecting_sleep_ms = storage_settings_ptr->merge_selecting_sleep_ms; diff --git a/tests/integration/test_merge_tree_empty_parts/test.py b/tests/integration/test_merge_tree_empty_parts/test.py index 212c0577c13..c6a96f3ed1b 100644 --- a/tests/integration/test_merge_tree_empty_parts/test.py +++ b/tests/integration/test_merge_tree_empty_parts/test.py @@ -48,7 +48,7 @@ def test_empty_parts_summing(started_cluster): "CREATE TABLE empty_parts_summing (d Date, key UInt64, value Int64) " "ENGINE = ReplicatedSummingMergeTree('/clickhouse/tables/empty_parts_summing', 'r1') " "PARTITION BY toYYYYMM(d) ORDER BY key " - "SETTINGS old_parts_lifetime = 1" + "SETTINGS old_parts_lifetime = 1, cleanup_delay_period=0, cleanup_thread_preferred_points_per_iteration=0" ) node1.query("INSERT INTO empty_parts_summing VALUES (toDate('2020-10-10'), 1, 1)") diff --git a/tests/queries/0_stateless/02448_clone_replica_lost_part.sql b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql index 44303a1c532..7ad25d75fbe 100644 --- a/tests/queries/0_stateless/02448_clone_replica_lost_part.sql +++ b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql @@ -144,6 +144,7 @@ select sleep(2) format Null; -- increases probability of reproducing the issue -- rmt1 will mimic rmt2, but will not be able to fetch parts for a while system stop replicated sends rmt2; attach table rmt1; +system sync replica rmt1; -- rmt1 should not show the value (200) from dropped part select throwIf(n = 200) from rmt1 format Null; select 11, arraySort(groupArray(n)) from rmt2; From 9db7e8ed62415aac040ee5d220d6dabc749ccc7b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 23 May 2023 20:47:35 +0000 Subject: [PATCH 0175/1997] Fixing build. --- src/Storages/SelectQueryInfo.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 src/Storages/SelectQueryInfo.cpp diff --git a/src/Storages/SelectQueryInfo.cpp b/src/Storages/SelectQueryInfo.cpp new file mode 100644 index 00000000000..665da7fee70 --- /dev/null +++ b/src/Storages/SelectQueryInfo.cpp @@ -0,0 +1,16 @@ +#include +#include + +namespace DB +{ + +bool SelectQueryInfo::isFinal() const +{ + if (table_expression_modifiers) + return table_expression_modifiers->hasFinal(); + + const auto & select = query->as(); + return select.final(); +} + +} From 87907dafa7a8179382c98cb1718b58a002617e08 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 24 May 2023 14:27:37 +0800 Subject: [PATCH 0176/1997] fix code style --- src/Functions/geohashEncode.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index ff61bf7d27c..7c353b822aa 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -17,7 +17,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int ILLEGAL_COLUMN; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; } From 161afea266bc8f3a13dacfb99f7333ab98f2ac46 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 24 May 2023 07:36:39 +0000 Subject: [PATCH 0177/1997] Add support for changelog --- programs/keeper/Keeper.cpp | 2 - src/Coordination/Changelog.cpp | 324 +++++++++++------- src/Coordination/Changelog.h | 4 +- src/Coordination/KeeperContext.cpp | 12 +- src/Coordination/KeeperContext.h | 2 + src/Coordination/KeeperSnapshotManager.cpp | 47 ++- src/Coordination/KeeperStateManager.cpp | 3 +- src/Coordination/Standalone/Context.cpp | 4 + src/Core/SettingsFields.cpp | 5 + .../MetadataStorageFromPlainObjectStorage.cpp | 1 + .../MetadataStorageFromPlainObjectStorage.h | 2 +- src/IO/ZstdDeflatingAppendableWriteBuffer.cpp | 11 +- src/IO/ZstdDeflatingAppendableWriteBuffer.h | 3 + src/Interpreters/Context.h | 2 +- 14 files changed, 277 insertions(+), 145 deletions(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index d5cf61daa6e..002ce413095 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -44,8 +44,6 @@ #include -#include - int mainEntryClickHouseKeeper(int argc, char ** argv) { diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 852a21c1c45..c94633d6dbd 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -3,17 +3,17 @@ #include #include #include +#include #include #include +#include #include #include #include -#include #include #include +#include #include -#include -#include namespace DB @@ -29,50 +29,58 @@ namespace ErrorCodes namespace { + void moveFileBetweenDisks(DiskPtr disk_from, ChangelogFileDescriptionPtr description, DiskPtr disk_to, const std::string & path_to) + { + disk_from->copyFile(description->path, *disk_to, path_to, {}); + disk_from->removeFile(description->path); + description->path = path_to; + description->disk = disk_to; + } -constexpr auto DEFAULT_PREFIX = "changelog"; + constexpr auto DEFAULT_PREFIX = "changelog"; -inline std::string formatChangelogPath(const std::string & name_prefix, uint64_t from_index, uint64_t to_index, const std::string & extension) -{ - return fmt::format("{}_{}_{}.{}", name_prefix, from_index, to_index, extension); -} + inline std::string + formatChangelogPath(const std::string & name_prefix, uint64_t from_index, uint64_t to_index, const std::string & extension) + { + return fmt::format("{}_{}_{}.{}", name_prefix, from_index, to_index, extension); + } -ChangelogFileDescriptionPtr getChangelogFileDescription(const std::filesystem::path & path) -{ - // we can have .bin.zstd so we cannot use std::filesystem stem and extension - std::string filename_with_extension = path.filename(); - std::string_view filename_with_extension_view = filename_with_extension; + ChangelogFileDescriptionPtr getChangelogFileDescription(const std::filesystem::path & path) + { + // we can have .bin.zstd so we cannot use std::filesystem stem and extension + std::string filename_with_extension = path.filename(); + std::string_view filename_with_extension_view = filename_with_extension; - auto first_dot = filename_with_extension.find('.'); - if (first_dot == std::string::npos) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid changelog file {}", path.generic_string()); + auto first_dot = filename_with_extension.find('.'); + if (first_dot == std::string::npos) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid changelog file {}", path.generic_string()); - Strings filename_parts; - boost::split(filename_parts, filename_with_extension_view.substr(0, first_dot), boost::is_any_of("_")); - if (filename_parts.size() < 3) - throw Exception(ErrorCodes::CORRUPTED_DATA, "Invalid changelog {}", path.generic_string()); + Strings filename_parts; + boost::split(filename_parts, filename_with_extension_view.substr(0, first_dot), boost::is_any_of("_")); + if (filename_parts.size() < 3) + throw Exception(ErrorCodes::CORRUPTED_DATA, "Invalid changelog {}", path.generic_string()); - auto result = std::make_shared(); - result->prefix = filename_parts[0]; - result->from_log_index = parse(filename_parts[1]); - result->to_log_index = parse(filename_parts[2]); - result->extension = std::string(filename_with_extension.substr(first_dot + 1)); - result->path = path.generic_string(); - return result; -} + auto result = std::make_shared(); + result->prefix = filename_parts[0]; + result->from_log_index = parse(filename_parts[1]); + result->to_log_index = parse(filename_parts[2]); + result->extension = std::string(filename_with_extension.substr(first_dot + 1)); + result->path = path.generic_string(); + return result; + } -Checksum computeRecordChecksum(const ChangelogRecord & record) -{ - SipHash hash; - hash.update(record.header.version); - hash.update(record.header.index); - hash.update(record.header.term); - hash.update(record.header.value_type); - hash.update(record.header.blob_size); - if (record.header.blob_size != 0) - hash.update(reinterpret_cast(record.blob->data_begin()), record.blob->size()); - return hash.get64(); -} + Checksum computeRecordChecksum(const ChangelogRecord & record) + { + SipHash hash; + hash.update(record.header.version); + hash.update(record.header.index); + hash.update(record.header.term); + hash.update(record.header.value_type); + hash.update(record.header.blob_size); + if (record.header.blob_size != 0) + hash.update(reinterpret_cast(record.blob->data_begin()), record.blob->size()); + return hash.get64(); + } } @@ -117,25 +125,55 @@ public: // if we wrote at least 1 log in the log file we can rename the file to reflect correctly the // contained logs // file can be deleted from disk earlier by compaction - if (!current_file_description->deleted && last_index_written - && *last_index_written != current_file_description->to_log_index) + if (!current_file_description->deleted) { - auto new_path = formatChangelogPath( - current_file_description->prefix, - current_file_description->from_log_index, - *last_index_written, - current_file_description->extension); - disk->moveFile(current_file_description->path, new_path); - current_file_description->path = std::move(new_path); + auto log_disk = current_file_description->disk; + const auto & path = current_file_description->path; + std::string new_path = path; + if (last_index_written && *last_index_written != current_file_description->to_log_index) + { + new_path = formatChangelogPath( + current_file_description->prefix, + current_file_description->from_log_index, + *last_index_written, + current_file_description->extension); + } + + if (disk == log_disk) + { + if (path != new_path) + { + try + { + disk->moveFile(path, new_path); + } + catch (...) + { + tryLogCurrentException(log, fmt::format("File rename failed on disk {}", disk->getName())); + } + current_file_description->path = std::move(new_path); + } + } + else + { + moveFileBetweenDisks(log_disk, current_file_description, disk, new_path); + } } } - file_buf = disk->writeFile(file_description->path, DBMS_DEFAULT_BUFFER_SIZE, mode); + auto current_log_disk = getCurrentLogDisk(); + assert(file_description->disk == current_log_disk); + file_buf = current_log_disk->writeFile(file_description->path, DBMS_DEFAULT_BUFFER_SIZE, mode); + assert(file_buf); last_index_written.reset(); current_file_description = std::move(file_description); if (log_file_settings.compress_logs) - compressed_buffer = std::make_unique(std::move(file_buf), /* compression level = */ 3, /* append_to_existing_file_ = */ mode == WriteMode::Append); + compressed_buffer = std::make_unique( + std::move(file_buf), + /* compressi)on level = */ 3, + /* append_to_existing_file_ = */ mode == WriteMode::Append, + [current_log_disk, path = current_file_description->path] { return current_log_disk->readFile(path); }); prealloc_done = false; } @@ -147,10 +185,7 @@ public: } /// There is bug when compressed_buffer has value, file_buf's ownership transfer to compressed_buffer - bool isFileSet() const - { - return compressed_buffer != nullptr || file_buf != nullptr; - } + bool isFileSet() const { return compressed_buffer != nullptr || file_buf != nullptr; } bool appendRecord(ChangelogRecord && record) { @@ -236,6 +271,7 @@ public: new_description->from_log_index = new_start_log_index; new_description->to_log_index = new_start_log_index + log_file_settings.rotate_interval - 1; new_description->extension = "bin"; + new_description->disk = getCurrentLogDisk(); if (log_file_settings.compress_logs) new_description->extension += "." + toContentEncodingName(CompressionMethod::Zstd); @@ -259,7 +295,6 @@ public: } private: - void finalizeCurrentFile() { assert(prealloc_done); @@ -279,14 +314,13 @@ private: const auto * file_buffer = tryGetFileBuffer(); - if (log_file_settings.max_size != 0 && isLocalDisk()) + if (log_file_settings.max_size != 0 && file_buffer) { int res = -1; do { res = ftruncate(file_buffer->getFD(), initial_file_size + file_buffer->count()); - } - while (res < 0 && errno == EINTR); + } while (res < 0 && errno == EINTR); if (res != 0) LOG_WARNING(log, "Could not ftruncate file. Error: {}, errno: {}", errnoToString(), errno); @@ -321,10 +355,7 @@ private: return *file_buffer; } - const WriteBufferFromFile * tryGetFileBuffer() const - { - return const_cast(this)->tryGetFileBuffer(); - } + const WriteBufferFromFile * tryGetFileBuffer() const { return const_cast(this)->tryGetFileBuffer(); } WriteBufferFromFile * tryGetFileBuffer() { @@ -344,30 +375,22 @@ private: void tryPreallocateForFile() { - if (log_file_settings.max_size == 0) - { - initial_file_size = 0; - prealloc_done = true; - return; - } - const auto * file_buffer = tryGetFileBuffer(); - if (!file_buffer) + if (log_file_settings.max_size == 0 || !file_buffer) { initial_file_size = 0; prealloc_done = true; - LOG_WARNING(log, "Could not preallocate space on disk {} using fallocate", getDisk()->getName()); return; } #ifdef OS_LINUX - if (isLocalDisk()) { int res = -1; do { - res = fallocate(file_buffer->getFD(), FALLOC_FL_KEEP_SIZE, 0, log_file_settings.max_size + log_file_settings.overallocate_size); + res = fallocate( + file_buffer->getFD(), FALLOC_FL_KEEP_SIZE, 0, log_file_settings.max_size + log_file_settings.overallocate_size); } while (res < 0 && errno == EINTR); if (res != 0) @@ -387,15 +410,11 @@ private: prealloc_done = true; } - DiskPtr getDisk() const - { - return keeper_context->getLogDisk(); - } + DiskPtr getCurrentLogDisk() const { return keeper_context->getCurrentLogDisk(); } - bool isLocalDisk() const - { - return dynamic_cast(getDisk().get()) != nullptr; - } + DiskPtr getDisk() const { return keeper_context->getLogDisk(); } + + bool isLocalDisk() const { return dynamic_cast(getDisk().get()) != nullptr; } std::map & existing_changelogs; @@ -440,9 +459,7 @@ struct ChangelogReadResult class ChangelogReader { public: - explicit ChangelogReader(DiskPtr disk_, const std::string & filepath_) - : disk(disk_) - , filepath(filepath_) + explicit ChangelogReader(DiskPtr disk_, const std::string & filepath_) : disk(disk_), filepath(filepath_) { auto compression_method = chooseCompressionMethod(filepath, ""); auto read_buffer_from_file = disk->readFile(filepath); @@ -546,10 +563,7 @@ private: std::unique_ptr read_buf; }; -Changelog::Changelog( - Poco::Logger * log_, - LogFileSettings log_file_settings, - KeeperContextPtr keeper_context_) +Changelog::Changelog(Poco::Logger * log_, LogFileSettings log_file_settings, KeeperContextPtr keeper_context_) : changelogs_detached_dir("detached") , rotate_interval(log_file_settings.rotate_interval) , log(log_) @@ -557,18 +571,30 @@ Changelog::Changelog( , append_completion_queue(std::numeric_limits::max()) , keeper_context(std::move(keeper_context_)) { - /// Load all files in changelog directory + /// Load all files on changelog disks + + const auto load_from_disk = [&](const auto & disk) + { + for (auto it = disk->iterateDirectory(""); it->isValid(); it->next()) + { + if (it->name() == changelogs_detached_dir) + continue; + + auto file_description = getChangelogFileDescription(it->path()); + file_description->disk = disk; + + auto [changelog_it, inserted] = existing_changelogs.insert_or_assign(file_description->from_log_index, std::move(file_description)); + + if (!inserted) + LOG_WARNING(log, "Found duplicate entries for {}, will use the entry from {}", changelog_it->second->path, disk->getName()); + } + }; auto disk = getDisk(); + load_from_disk(disk); - for (auto it = disk->iterateDirectory(""); it->isValid(); it->next()) - { - if (it->name() == changelogs_detached_dir) - continue; - - auto file_description = getChangelogFileDescription(it->path()); - existing_changelogs[file_description->from_log_index] = std::move(file_description); - } + auto current_log_disk = getCurrentLogDisk(); + load_from_disk(current_log_disk); if (existing_changelogs.empty()) LOG_WARNING(log, "No logs exists in {}. It's Ok if it's the first run of clickhouse-keeper.", disk->getPath()); @@ -579,8 +605,7 @@ Changelog::Changelog( append_completion_thread = ThreadFromGlobalPool([this] { appendCompletionThread(); }); - current_writer = std::make_unique( - existing_changelogs, keeper_context, log_file_settings); + current_writer = std::make_unique(existing_changelogs, keeper_context, log_file_settings); } void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uint64_t logs_to_keep) @@ -652,7 +677,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin break; } - ChangelogReader reader(getDisk(), changelog_description.path); + ChangelogReader reader(changelog_description.disk, changelog_description.path); last_log_read_result = reader.readChangelog(logs, start_to_read_from, log); last_log_read_result->log_start_index = changelog_description.from_log_index; @@ -713,13 +738,13 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin assert(existing_changelogs.find(last_log_read_result->log_start_index) != existing_changelogs.end()); assert(existing_changelogs.find(last_log_read_result->log_start_index)->first == existing_changelogs.rbegin()->first); - /// Continue to write into incomplete existing log if it doesn't finished with error + /// Continue to write into incomplete existing log if it doesn't finish with error const auto & description = existing_changelogs[last_log_read_result->log_start_index]; if (last_log_read_result->last_read_index == 0 || last_log_read_result->error) /// If it's broken log then remove it { LOG_INFO(log, "Removing chagelog {} because it's empty or read finished with error", description->path); - getDisk()->removeFile(description->path); + description->disk->removeFile(description->path); existing_changelogs.erase(last_log_read_result->log_start_index); std::erase_if(logs, [last_log_read_result](const auto & item) { return item.first >= last_log_read_result->log_start_index; }); } @@ -728,6 +753,16 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin initWriter(description); } } + else if (last_log_read_result.has_value()) + { + /// check if we need to move it to another disk + auto current_log_disk = getCurrentLogDisk(); + auto disk = getDisk(); + + auto & description = existing_changelogs.at(last_log_read_result->log_start_index); + if (current_log_disk != disk && current_log_disk == description->disk) + moveFileBetweenDisks(current_log_disk, description, disk, description->path); + } /// Start new log if we don't initialize writer from previous log. All logs can be "complete". if (!current_writer->isFileSet()) @@ -740,27 +775,37 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin void Changelog::initWriter(ChangelogFileDescriptionPtr description) { if (description->expectedEntriesCountInLog() != rotate_interval) - LOG_TRACE(log, "Looks like rotate_logs_interval was changed, current {}, expected entries in last log {}", rotate_interval, description->expectedEntriesCountInLog()); + LOG_TRACE( + log, + "Looks like rotate_logs_interval was changed, current {}, expected entries in last log {}", + rotate_interval, + description->expectedEntriesCountInLog()); LOG_TRACE(log, "Continue to write into {}", description->path); + + auto log_disk = description->disk; + auto current_log_disk = getCurrentLogDisk(); + if (log_disk != current_log_disk) + moveFileBetweenDisks(log_disk, description, current_log_disk, description->path); + current_writer->setFile(std::move(description), WriteMode::Append); } namespace { -std::string getCurrentTimestampFolder() -{ - const auto timestamp = LocalDateTime{std::time(nullptr)}; - return fmt::format( - "{:02}{:02}{:02}T{:02}{:02}{:02}", - timestamp.year(), - timestamp.month(), - timestamp.day(), - timestamp.hour(), - timestamp.minute(), - timestamp.second()); -} + std::string getCurrentTimestampFolder() + { + const auto timestamp = LocalDateTime{std::time(nullptr)}; + return fmt::format( + "{:02}{:02}{:02}T{:02}{:02}{:02}", + timestamp.year(), + timestamp.month(), + timestamp.day(), + timestamp.hour(), + timestamp.minute(), + timestamp.second()); + } } @@ -769,6 +814,11 @@ DiskPtr Changelog::getDisk() const return keeper_context->getLogDisk(); } +DiskPtr Changelog::getCurrentLogDisk() const +{ + return keeper_context->getCurrentLogDisk(); +} + void Changelog::removeExistingLogs(ChangelogIter begin, ChangelogIter end) { auto disk = getDisk(); @@ -786,7 +836,23 @@ void Changelog::removeExistingLogs(ChangelogIter begin, ChangelogIter end) LOG_WARNING(log, "Removing changelog {}", itr->second->path); const std::filesystem::path & path = itr->second->path; const auto new_path = timestamp_folder / path.filename(); - disk->moveFile(path.generic_string(), new_path.generic_string()); + + auto changelog_disk = itr->second->disk; + if (changelog_disk == disk) + { + try + { + disk->moveFile(path.generic_string(), new_path.generic_string()); + } + catch (const DB::Exception & e) + { + if (e.code() == DB::ErrorCodes::NOT_IMPLEMENTED) + moveFileBetweenDisks(changelog_disk, itr->second, disk, new_path); + } + } + else + moveFileBetweenDisks(changelog_disk, itr->second, disk, new_path); + itr = existing_changelogs.erase(itr); } } @@ -921,7 +987,6 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry) throw Exception(ErrorCodes::LOGICAL_ERROR, "Changelog must be initialized before writing records"); { - std::lock_guard lock(writer_mutex); /// This write_at require to overwrite everything in this file and also in previous file(s) const bool go_to_previous_file = index < current_writer->getStartIndex(); @@ -937,13 +1002,18 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry) else description = std::prev(index_changelog)->second; + auto log_disk = description->disk; + auto current_log_disk = getCurrentLogDisk(); + if (log_disk != current_log_disk) + moveFileBetweenDisks(log_disk, description, current_log_disk, description->path); + current_writer->setFile(std::move(description), WriteMode::Append); /// Remove all subsequent files if overwritten something in previous one auto to_remove_itr = existing_changelogs.upper_bound(index); for (auto itr = to_remove_itr; itr != existing_changelogs.end();) { - getDisk()->removeFile(itr->second->path); + itr->second->disk->removeFile(itr->second->path); itr = existing_changelogs.erase(itr); } } @@ -993,16 +1063,17 @@ void Changelog::compact(uint64_t up_to_log_index) LOG_INFO(log, "Removing changelog {} because of compaction", changelog_description.path); /// If failed to push to queue for background removing, then we will remove it now - if (!log_files_to_delete_queue.tryPush(changelog_description.path, 1)) + if (!log_files_to_delete_queue.tryPush({changelog_description.path, changelog_description.disk}, 1)) { try { - getDisk()->removeFile(itr->second->path); - LOG_INFO(log, "Removed changelog {} because of compaction.", itr->second->path); + changelog_description.disk->removeFile(changelog_description.path); + LOG_INFO(log, "Removed changelog {} because of compaction.", changelog_description.path); } catch (Exception & e) { - LOG_WARNING(log, "Failed to remove changelog {} in compaction, error message: {}", itr->second->path, e.message()); + LOG_WARNING( + log, "Failed to remove changelog {} in compaction, error message: {}", changelog_description.path, e.message()); } catch (...) { @@ -1197,12 +1268,13 @@ Changelog::~Changelog() void Changelog::cleanLogThread() { - std::string path; - while (log_files_to_delete_queue.pop(path)) + std::pair path_with_disk; + while (log_files_to_delete_queue.pop(path_with_disk)) { + const auto & [path, disk] = path_with_disk; try { - getDisk()->removeFile(path); + disk->removeFile(path); LOG_INFO(log, "Removed changelog {} because of compaction.", path); } catch (Exception & e) diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index 6f0c4e45605..4054829ef19 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -60,6 +60,7 @@ struct ChangelogFileDescription uint64_t to_log_index; std::string extension; + DiskPtr disk; std::string path; bool deleted = false; @@ -154,6 +155,7 @@ private: static ChangelogRecord buildRecord(uint64_t index, const LogEntryPtr & log_entry); DiskPtr getDisk() const; + DiskPtr getCurrentLogDisk() const; /// Currently existing changelogs std::map existing_changelogs; @@ -187,7 +189,7 @@ private: uint64_t max_log_id = 0; /// For compaction, queue of delete not used logs /// 128 is enough, even if log is not removed, it's not a problem - ConcurrentBoundedQueue log_files_to_delete_queue{128}; + ConcurrentBoundedQueue> log_files_to_delete_queue{128}; ThreadFromGlobalPool clean_log_thread; struct AppendLog diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index ff3f96f199b..bacafe75d13 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -21,6 +21,12 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config) disk_selector->initialize(config, "storage_configuration.disks", Context::getGlobalContextInstance()); log_storage = getLogsPathFromConfig(config); + + if (config.has("keeper_server.current_log_storage_disk")) + current_log_storage = config.getString("keeper_server.current_log_storage_disk"); + else + current_log_storage = log_storage; + snapshot_storage = getSnapshotsPathFromConfig(config); state_file_storage = getStatePathFromConfig(config); @@ -57,7 +63,6 @@ DiskPtr KeeperContext::getDisk(const Storage & storage) const return *storage_disk; const auto & disk_name = std::get(storage); - return disk_selector->get(disk_name); } @@ -66,6 +71,11 @@ DiskPtr KeeperContext::getLogDisk() const return getDisk(log_storage); } +DiskPtr KeeperContext::getCurrentLogDisk() const +{ + return getDisk(current_log_storage); +} + DiskPtr KeeperContext::getSnapshotsDisk() const { return getDisk(snapshot_storage); diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index 1fc01f12bba..aec2061bddf 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -32,6 +32,7 @@ public: bool digestEnabled() const; void setDigestEnabled(bool digest_enabled_); + DiskPtr getCurrentLogDisk() const; DiskPtr getLogDisk() const; DiskPtr getSnapshotsDisk() const; DiskPtr getStateFileDisk() const; @@ -53,6 +54,7 @@ private: std::shared_ptr disk_selector; Storage log_storage; + Storage current_log_storage; Storage snapshot_storage; Storage state_file_storage; diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index d47ea475c42..bfadf3af9aa 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -520,19 +520,41 @@ KeeperSnapshotManager::KeeperSnapshotManager( , keeper_context(keeper_context_) { auto disk = getDisk(); + + std::unordered_set invalid_snapshots; + /// collect invalid snapshots for (auto it = disk->iterateDirectory(""); it->isValid(); it->next()) { const auto & name = it->name(); if (name.empty()) continue; + if (startsWith(name, "tmp_")) + { + disk->removeFile(it->path()); + invalid_snapshots.insert(name.substr(4)); + continue; + } + + } + + /// process snapshots + for (auto it = disk->iterateDirectory(""); it->isValid(); it->next()) + { + const auto & name = it->name(); + if (name.empty()) + continue; + + /// Not snapshot file + if (!startsWith(name, "snapshot_")) + continue; + + if (invalid_snapshots.contains(name)) { disk->removeFile(it->path()); continue; } - /// Not snapshot file - if (!startsWith(name, "snapshot_")) - continue; + size_t snapshot_up_to = getSnapshotPathUpToLogIdx(name); existing_snapshots[snapshot_up_to] = it->path(); } @@ -549,11 +571,16 @@ std::string KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer auto tmp_snapshot_file_name = "tmp_" + snapshot_file_name; auto disk = getDisk(); - auto plain_buf = disk->writeFile(tmp_snapshot_file_name); + + { + disk->writeFile(tmp_snapshot_file_name); + } + + auto plain_buf = disk->writeFile(snapshot_file_name); copyData(reader, *plain_buf); plain_buf->sync(); - disk->moveFile(tmp_snapshot_file_name, snapshot_file_name); + disk->removeFile(tmp_snapshot_file_name); existing_snapshots.emplace(up_to_log_idx, snapshot_file_name); removeOutdatedSnapshotsIfNeeded(); @@ -673,7 +700,12 @@ std::pair KeeperSnapshotManager::serializeSnapshot auto snapshot_file_name = getSnapshotFileName(up_to_log_idx, compress_snapshots_zstd); auto tmp_snapshot_file_name = "tmp_" + snapshot_file_name; - auto writer = getDisk()->writeFile(tmp_snapshot_file_name); + auto disk = getDisk(); + { + disk->writeFile(tmp_snapshot_file_name); + } + + auto writer = disk->writeFile(snapshot_file_name); std::unique_ptr compressed_writer; if (compress_snapshots_zstd) compressed_writer = wrapWriteBufferWithCompressionMethod(std::move(writer), CompressionMethod::Zstd, 3); @@ -688,7 +720,8 @@ std::pair KeeperSnapshotManager::serializeSnapshot try { - getDisk()->moveFile(tmp_snapshot_file_name, snapshot_file_name); + std::cout << "Removing file " << tmp_snapshot_file_name << std::endl; + disk->removeFile(tmp_snapshot_file_name); } catch (fs::filesystem_error & e) { diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index 352e67e7c43..6c1b14bc94f 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -385,7 +385,7 @@ nuraft::ptr KeeperStateManager::read_state() } auto state = nuraft::srv_state::deserialize(*state_buf); - LOG_INFO(logger, "Read state from {}", disk->getPath() + path); + LOG_INFO(logger, "Read state from {}", fs::path(disk->getPath()) / path); return state; } catch (const std::exception & e) @@ -408,7 +408,6 @@ nuraft::ptr KeeperStateManager::read_state() if (state) { disk->removeFileIfExists(old_path); - return state; } diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp index 667fabf4d66..84083169df3 100644 --- a/src/Coordination/Standalone/Context.cpp +++ b/src/Coordination/Standalone/Context.cpp @@ -28,6 +28,10 @@ namespace CurrentMetrics namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} struct ContextSharedPart : boost::noncopyable { diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index c1f9fa00f2a..7711ed7465b 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -381,6 +381,11 @@ void SettingFieldMap::readBinary(ReadBuffer & in) #else +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + SettingFieldMap::SettingFieldMap(const Field &) : value(Map()) {} String SettingFieldMap::toString() const { diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index a680a344746..650fde7bcd1 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -120,6 +120,7 @@ const IMetadataStorage & MetadataStorageFromPlainObjectStorageTransaction::getSt void MetadataStorageFromPlainObjectStorageTransaction::unlinkFile(const std::string & path) { auto object = StoredObject(metadata_storage.getAbsolutePath(path)); + std::cout << "Removing from plain " << path << std::endl; metadata_storage.object_storage->removeObject(object); } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index 0beed65879b..fb5b6d0757c 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -63,7 +63,7 @@ public: uint32_t getHardlinkCount(const std::string & /* path */) const override { - return 1; + return 0; } bool supportsChmod() const override { return false; } diff --git a/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp b/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp index 406de532216..81be8d8ce4d 100644 --- a/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp +++ b/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp @@ -1,5 +1,6 @@ #include #include +#include "IO/ReadBufferFromFileBase.h" #include namespace DB @@ -14,11 +15,13 @@ ZstdDeflatingAppendableWriteBuffer::ZstdDeflatingAppendableWriteBuffer( std::unique_ptr out_, int compression_level, bool append_to_existing_file_, + std::function()> read_buffer_creator_, size_t buf_size, char * existing_memory, size_t alignment) : BufferWithOwnMemory(buf_size, existing_memory, alignment) , out(std::move(out_)) + , read_buffer_creator(std::move(read_buffer_creator_)) , append_to_existing_file(append_to_existing_file_) { cctx = ZSTD_createCCtx(); @@ -194,13 +197,13 @@ void ZstdDeflatingAppendableWriteBuffer::addEmptyBlock() bool ZstdDeflatingAppendableWriteBuffer::isNeedToAddEmptyBlock() { - ReadBufferFromFile reader(out->getFileName()); - auto fsize = reader.getFileSize(); + auto reader = read_buffer_creator(); + auto fsize = reader->getFileSize(); if (fsize > 3) { std::array result; - reader.seek(fsize - 3, SEEK_SET); - reader.readStrict(result.data(), 3); + reader->seek(fsize - 3, SEEK_SET); + reader->readStrict(result.data(), 3); /// If we don't have correct block in the end, then we need to add it manually. /// NOTE: maybe we can have the same bytes in case of data corruption/unfinished write. diff --git a/src/IO/ZstdDeflatingAppendableWriteBuffer.h b/src/IO/ZstdDeflatingAppendableWriteBuffer.h index b64e5d5c4cf..d9c4f32d6da 100644 --- a/src/IO/ZstdDeflatingAppendableWriteBuffer.h +++ b/src/IO/ZstdDeflatingAppendableWriteBuffer.h @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -32,6 +33,7 @@ public: std::unique_ptr out_, int compression_level, bool append_to_existing_file_, + std::function()> read_buffer_creator_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0); @@ -69,6 +71,7 @@ private: void addEmptyBlock(); std::unique_ptr out; + std::function()> read_buffer_creator; bool append_to_existing_file = false; ZSTD_CCtx * cctx; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 0ec39f18757..84e42830dee 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1235,7 +1235,7 @@ struct HTTPContext : public IHTTPContext } -#else +#else #include From 5db21607627cf244ce40cf12d8be3dd4430c218a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 24 May 2023 09:04:12 +0000 Subject: [PATCH 0178/1997] Fix build --- programs/keeper-converter/KeeperConverter.cpp | 3 +- src/Coordination/KeeperContext.cpp | 18 +- src/Coordination/KeeperContext.h | 7 +- src/Coordination/KeeperDispatcher.cpp | 44 +- src/Coordination/KeeperServer.cpp | 3 +- src/Coordination/KeeperServer.h | 1 + src/Coordination/KeeperSnapshotManager.cpp | 2 +- src/Coordination/tests/gtest_coordination.cpp | 463 ++++++++++++------ 8 files changed, 358 insertions(+), 183 deletions(-) diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp index 58d090ca8b9..f2389021cb6 100644 --- a/programs/keeper-converter/KeeperConverter.cpp +++ b/programs/keeper-converter/KeeperConverter.cpp @@ -42,6 +42,7 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv) { auto keeper_context = std::make_shared(true); keeper_context->setDigestEnabled(true); + keeper_context->setSnapshotDisk(std::make_shared("Keeper-snapshots", options["output-dir"].as(), 0)); DB::KeeperStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false); @@ -52,7 +53,7 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv) DB::SnapshotMetadataPtr snapshot_meta = std::make_shared(storage.getZXID(), 1, std::make_shared()); DB::KeeperStorageSnapshot snapshot(&storage, snapshot_meta); - DB::KeeperSnapshotManager manager(std::make_shared("Keeper-snapshots", options["output-dir"].as(), 0), 1, keeper_context); + DB::KeeperSnapshotManager manager(1, keeper_context); auto snp = manager.serializeSnapshotToBuffer(snapshot); auto path = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID()); std::cout << "Snapshot serialized to path:" << path << std::endl; diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index bacafe75d13..3c6411a3a24 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -76,16 +76,32 @@ DiskPtr KeeperContext::getCurrentLogDisk() const return getDisk(current_log_storage); } -DiskPtr KeeperContext::getSnapshotsDisk() const +void KeeperContext::setLogDisk(DiskPtr disk) +{ + log_storage = disk; + current_log_storage = std::move(disk); +} + +DiskPtr KeeperContext::getSnapshotDisk() const { return getDisk(snapshot_storage); } +void KeeperContext::setSnapshotDisk(DiskPtr disk) +{ + snapshot_storage = std::move(disk); +} + DiskPtr KeeperContext::getStateFileDisk() const { return getDisk(state_file_storage); } +void KeeperContext::setStateFileDisk(DiskPtr disk) +{ + state_file_storage = std::move(disk); +} + KeeperContext::Storage KeeperContext::getLogsPathFromConfig(const Poco::Util::AbstractConfiguration & config) const { const auto create_local_disk = [](const auto & path) diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index aec2061bddf..4e538c99649 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -34,8 +34,13 @@ public: DiskPtr getCurrentLogDisk() const; DiskPtr getLogDisk() const; - DiskPtr getSnapshotsDisk() const; + void setLogDisk(DiskPtr disk); + + DiskPtr getSnapshotDisk() const; + void setSnapshotDisk(DiskPtr disk); + DiskPtr getStateFileDisk() const; + void setStateFileDisk(DiskPtr disk); private: /// local disk defined using path or disk name using Storage = std::variant; diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 6632e58782f..17a15067301 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -336,28 +336,36 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf snapshot_s3.startup(config, macros); - server = std::make_unique(configuration_and_settings, config, responses_queue, snapshots_queue, snapshot_s3, [this](const KeeperStorage::RequestForSession & request_for_session) - { - /// check if we have queue of read requests depending on this request to be committed - std::lock_guard lock(read_request_queue_mutex); - if (auto it = read_request_queue.find(request_for_session.session_id); it != read_request_queue.end()) + server = std::make_unique( + configuration_and_settings, + config, + responses_queue, + snapshots_queue, + standalone_keeper, + snapshot_s3, + [this](const KeeperStorage::RequestForSession & request_for_session) { - auto & xid_to_request_queue = it->second; - - if (auto request_queue_it = xid_to_request_queue.find(request_for_session.request->xid); request_queue_it != xid_to_request_queue.end()) + /// check if we have queue of read requests depending on this request to be committed + std::lock_guard lock(read_request_queue_mutex); + if (auto it = read_request_queue.find(request_for_session.session_id); it != read_request_queue.end()) { - for (const auto & read_request : request_queue_it->second) - { - if (server->isLeaderAlive()) - server->putLocalReadRequest(read_request); - else - addErrorResponses({read_request}, Coordination::Error::ZCONNECTIONLOSS); - } + auto & xid_to_request_queue = it->second; - xid_to_request_queue.erase(request_queue_it); + if (auto request_queue_it = xid_to_request_queue.find(request_for_session.request->xid); + request_queue_it != xid_to_request_queue.end()) + { + for (const auto & read_request : request_queue_it->second) + { + if (server->isLeaderAlive()) + server->putLocalReadRequest(read_request); + else + addErrorResponses({read_request}, Coordination::Error::ZCONNECTIONLOSS); + } + + xid_to_request_queue.erase(request_queue_it); + } } - } - }); + }); try { diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 4cacf566df6..31c91e2de80 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -108,13 +108,14 @@ KeeperServer::KeeperServer( const Poco::Util::AbstractConfiguration & config, ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, + bool standalone_keeper, KeeperSnapshotManagerS3 & snapshot_manager_s3, KeeperStateMachine::CommitCallback commit_callback) : server_id(configuration_and_settings_->server_id) , coordination_settings(configuration_and_settings_->coordination_settings) , log(&Poco::Logger::get("KeeperServer")) , is_recovering(config.getBool("keeper_server.force_recovery", false)) - , keeper_context{std::make_shared(true)} + , keeper_context{std::make_shared(standalone_keeper)} , create_snapshot_on_exit(config.getBool("keeper_server.create_snapshot_on_exit", true)) { if (coordination_settings->quorum_reads) diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h index db4e9c1962e..63f9cc2bcea 100644 --- a/src/Coordination/KeeperServer.h +++ b/src/Coordination/KeeperServer.h @@ -72,6 +72,7 @@ public: const Poco::Util::AbstractConfiguration & config_, ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, + bool standalone_keeper, KeeperSnapshotManagerS3 & snapshot_manager_s3, KeeperStateMachine::CommitCallback commit_callback); diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index bfadf3af9aa..7d808e88b3d 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -676,7 +676,7 @@ SnapshotDeserializationResult KeeperSnapshotManager::restoreFromLatestSnapshot() DiskPtr KeeperSnapshotManager::getDisk() const { - return keeper_context->getSnapshotsDisk(); + return keeper_context->getSnapshotDisk(); } void KeeperSnapshotManager::removeOutdatedSnapshotsIfNeeded() diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index de5f2da262b..d3783a83bb0 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -2,35 +2,37 @@ #include #include "Common/ZooKeeper/IKeeper.h" -#include "Coordination/KeeperContext.h" -#include "Coordination/KeeperStorage.h" #include "Core/Defines.h" -#include "IO/WriteHelpers.h" #include "config.h" #if USE_NURAFT +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include #include #include -#include #include -#include -#include -#include -#include -#include -#include -#include + +#include #include @@ -39,9 +41,7 @@ struct ChangelogDirTest { std::string path; bool drop; - explicit ChangelogDirTest(std::string path_, bool drop_ = true) - : path(path_) - , drop(drop_) + explicit ChangelogDirTest(std::string path_, bool drop_ = true) : path(path_), drop(drop_) { if (fs::exists(path)) { @@ -66,8 +66,20 @@ struct CompressionParam class CoordinationTest : public ::testing::TestWithParam { protected: - DB::KeeperContextPtr keeper_context = std::make_shared(); + DB::KeeperContextPtr keeper_context = std::make_shared(true); Poco::Logger * log{&Poco::Logger::get("CoordinationTest")}; + + void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path, 0)); } + + void setSnapshotDirectory(const std::string & path) + { + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path, 0)); + } + + void setStateFileDirectory(const std::string & path) + { + keeper_context->setStateFileDisk(std::make_shared("StateFile", path, 0)); + } }; TEST_P(CoordinationTest, BuildTest) @@ -113,13 +125,14 @@ TEST_P(CoordinationTest, BufferSerde) template struct SimpliestRaftServer { - SimpliestRaftServer(int server_id_, const std::string & hostname_, int port_, const std::string & logs_path, const std::string & state_path) + SimpliestRaftServer( + int server_id_, const std::string & hostname_, int port_, DB::KeeperContextPtr keeper_context) : server_id(server_id_) , hostname(hostname_) , port(port_) , endpoint(hostname + ":" + std::to_string(port)) , state_machine(nuraft::cs_new()) - , state_manager(nuraft::cs_new(server_id, hostname, port, logs_path, state_path)) + , state_manager(nuraft::cs_new(server_id, hostname, port, keeper_context)) { state_manager->loadLogStore(1, 0); nuraft::raft_params params; @@ -135,8 +148,13 @@ struct SimpliestRaftServer nuraft::raft_server::init_options opts; opts.start_server_in_constructor_ = false; raft_instance = launcher.init( - state_machine, state_manager, nuraft::cs_new("ToyRaftLogger", DB::LogsLevel::trace), port, - nuraft::asio_service::options{}, params, opts); + state_machine, + state_manager, + nuraft::cs_new("ToyRaftLogger", DB::LogsLevel::trace), + port, + nuraft::asio_service::options{}, + params, + opts); if (!raft_instance) { @@ -195,15 +213,14 @@ nuraft::ptr getBuffer(int64_t number) return ret; } - TEST_P(CoordinationTest, TestSummingRaft1) { ChangelogDirTest test("./logs"); - SummingRaftServer s1(1, "localhost", 44444, "./logs", "./state"); - SCOPE_EXIT( - if (std::filesystem::exists("./state")) - std::filesystem::remove("./state"); - ); + setLogDirectory("./logs"); + setStateFileDirectory("."); + + SummingRaftServer s1(1, "localhost", 44444, keeper_context); + SCOPE_EXIT(if (std::filesystem::exists("./state")) std::filesystem::remove("./state");); /// Single node is leader EXPECT_EQ(s1.raft_instance->get_leader(), 1); @@ -235,8 +252,10 @@ TEST_P(CoordinationTest, ChangelogTestSimple) { auto params = GetParam(); ChangelogDirTest test("./logs"); + setLogDirectory("./logs"); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}); + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello world", 77); changelog.append(entry); @@ -263,7 +282,10 @@ TEST_P(CoordinationTest, ChangelogTestFile) { auto params = GetParam(); ChangelogDirTest test("./logs"); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}); + setLogDirectory("./logs"); + + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello world", 77); changelog.append(entry); @@ -292,7 +314,10 @@ TEST_P(CoordinationTest, ChangelogReadWrite) { auto params = GetParam(); ChangelogDirTest test("./logs"); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}); + setLogDirectory("./logs"); + + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 10; ++i) @@ -306,7 +331,8 @@ TEST_P(CoordinationTest, ChangelogReadWrite) waitDurableLogs(changelog); - DB::KeeperLogStore changelog_reader("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}); + DB::KeeperLogStore changelog_reader( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, keeper_context); changelog_reader.init(1, 0); EXPECT_EQ(changelog_reader.size(), 10); EXPECT_EQ(changelog_reader.last_entry()->get_term(), changelog.last_entry()->get_term()); @@ -326,7 +352,10 @@ TEST_P(CoordinationTest, ChangelogWriteAt) { auto params = GetParam(); ChangelogDirTest test("./logs"); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}); + setLogDirectory("./logs"); + + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 10; ++i) { @@ -348,7 +377,8 @@ TEST_P(CoordinationTest, ChangelogWriteAt) EXPECT_EQ(changelog.entry_at(7)->get_term(), 77); EXPECT_EQ(changelog.next_slot(), 8); - DB::KeeperLogStore changelog_reader("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}); + DB::KeeperLogStore changelog_reader( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, keeper_context); changelog_reader.init(1, 0); EXPECT_EQ(changelog_reader.size(), changelog.size()); @@ -362,7 +392,10 @@ TEST_P(CoordinationTest, ChangelogTestAppendAfterRead) { auto params = GetParam(); ChangelogDirTest test("./logs"); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}); + setLogDirectory("./logs"); + + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 7; ++i) { @@ -378,7 +411,8 @@ TEST_P(CoordinationTest, ChangelogTestAppendAfterRead) EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - DB::KeeperLogStore changelog_reader("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}); + DB::KeeperLogStore changelog_reader( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); changelog_reader.init(1, 0); EXPECT_EQ(changelog_reader.size(), 7); @@ -395,7 +429,7 @@ TEST_P(CoordinationTest, ChangelogTestAppendAfterRead) EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); size_t logs_count = 0; - for (const auto & _ [[maybe_unused]]: fs::directory_iterator("./logs")) + for (const auto & _ [[maybe_unused]] : fs::directory_iterator("./logs")) logs_count++; EXPECT_EQ(logs_count, 2); @@ -412,7 +446,7 @@ TEST_P(CoordinationTest, ChangelogTestAppendAfterRead) EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); logs_count = 0; - for (const auto & _ [[maybe_unused]]: fs::directory_iterator("./logs")) + for (const auto & _ [[maybe_unused]] : fs::directory_iterator("./logs")) logs_count++; EXPECT_EQ(logs_count, 3); @@ -440,7 +474,10 @@ TEST_P(CoordinationTest, ChangelogTestCompaction) { auto params = GetParam(); ChangelogDirTest test("./logs"); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}); + setLogDirectory("./logs"); + + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 3; ++i) @@ -489,7 +526,8 @@ TEST_P(CoordinationTest, ChangelogTestCompaction) EXPECT_EQ(changelog.next_slot(), 8); EXPECT_EQ(changelog.last_entry()->get_term(), 60); /// And we able to read it - DB::KeeperLogStore changelog_reader("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}); + DB::KeeperLogStore changelog_reader( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); changelog_reader.init(7, 0); EXPECT_EQ(changelog_reader.size(), 1); @@ -502,7 +540,10 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperations) { auto params = GetParam(); ChangelogDirTest test("./logs"); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}); + setLogDirectory("./logs"); + + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 10; ++i) { @@ -517,7 +558,8 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperations) auto entries = changelog.pack(1, 5); - DB::KeeperLogStore apply_changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}); + DB::KeeperLogStore apply_changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); apply_changelog.init(1, 0); for (size_t i = 0; i < 10; ++i) @@ -549,23 +591,31 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperationsEmpty) { auto params = GetParam(); ChangelogDirTest test("./logs"); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}); - changelog.init(1, 0); - for (size_t i = 0; i < 10; ++i) + setLogDirectory("./logs"); + + nuraft::ptr entries; { - auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10); - changelog.append(entry); + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); + changelog.init(1, 0); + for (size_t i = 0; i < 10; ++i) + { + auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10); + changelog.append(entry); + } + changelog.end_of_append_batch(0, 0); + + EXPECT_EQ(changelog.size(), 10); + + waitDurableLogs(changelog); + + entries = changelog.pack(5, 5); } - changelog.end_of_append_batch(0, 0); - - EXPECT_EQ(changelog.size(), 10); - - waitDurableLogs(changelog); - - auto entries = changelog.pack(5, 5); ChangelogDirTest test1("./logs1"); - DB::KeeperLogStore changelog_new("./logs1", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}); + setLogDirectory("./logs1"); + DB::KeeperLogStore changelog_new( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); changelog_new.init(1, 0); EXPECT_EQ(changelog_new.size(), 0); @@ -587,7 +637,8 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperationsEmpty) EXPECT_EQ(changelog_new.start_index(), 5); EXPECT_EQ(changelog_new.next_slot(), 11); - DB::KeeperLogStore changelog_reader("./logs1", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}); + DB::KeeperLogStore changelog_reader( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); changelog_reader.init(5, 0); } @@ -596,7 +647,10 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtPreviousFile) { auto params = GetParam(); ChangelogDirTest test("./logs"); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}); + setLogDirectory("./logs"); + + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 33; ++i) @@ -637,7 +691,8 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtPreviousFile) EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); - DB::KeeperLogStore changelog_read("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}); + DB::KeeperLogStore changelog_read( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); changelog_read.init(1, 0); EXPECT_EQ(changelog_read.size(), 7); EXPECT_EQ(changelog_read.start_index(), 1); @@ -649,7 +704,10 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtFileBorder) { auto params = GetParam(); ChangelogDirTest test("./logs"); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}); + setLogDirectory("./logs"); + + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 33; ++i) @@ -690,7 +748,8 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtFileBorder) EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); - DB::KeeperLogStore changelog_read("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}); + DB::KeeperLogStore changelog_read( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); changelog_read.init(1, 0); EXPECT_EQ(changelog_read.size(), 11); EXPECT_EQ(changelog_read.start_index(), 1); @@ -702,7 +761,10 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtAllFiles) { auto params = GetParam(); ChangelogDirTest test("./logs"); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}); + setLogDirectory("./logs"); + + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 33; ++i) { @@ -747,7 +809,10 @@ TEST_P(CoordinationTest, ChangelogTestStartNewLogAfterRead) { auto params = GetParam(); ChangelogDirTest test("./logs"); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}); + setLogDirectory("./logs"); + + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -768,7 +833,8 @@ TEST_P(CoordinationTest, ChangelogTestStartNewLogAfterRead) EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); EXPECT_FALSE(fs::exists("./logs/changelog_36_40.bin" + params.extension)); - DB::KeeperLogStore changelog_reader("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}); + DB::KeeperLogStore changelog_reader( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); changelog_reader.init(1, 0); auto entry = getLogEntry("36_hello_world", 360); @@ -792,16 +858,16 @@ namespace { void assertBrokenLogRemoved(const fs::path & log_folder, const fs::path & filename) { - EXPECT_FALSE(fs::exists(log_folder / filename)); - // broken logs are sent to the detached/{timestamp} folder - // we don't know timestamp so we iterate all of them - for (const auto & dir_entry : fs::recursive_directory_iterator(log_folder / "detached")) - { - if (dir_entry.path().filename() == filename) - return; - } + EXPECT_FALSE(fs::exists(log_folder / filename)); + // broken logs are sent to the detached/{timestamp} folder + // we don't know timestamp so we iterate all of them + for (const auto & dir_entry : fs::recursive_directory_iterator(log_folder / "detached")) + { + if (dir_entry.path().filename() == filename) + return; + } - FAIL() << "Broken log " << filename << " was not moved to the detached folder"; + FAIL() << "Broken log " << filename << " was not moved to the detached folder"; } } @@ -812,8 +878,10 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate) auto params = GetParam(); ChangelogDirTest test(log_folder); + setLogDirectory(log_folder); - DB::KeeperLogStore changelog(log_folder, DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}); + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -833,10 +901,12 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate) EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); - DB::WriteBufferFromFile plain_buf("./logs/changelog_11_15.bin" + params.extension, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY); + DB::WriteBufferFromFile plain_buf( + "./logs/changelog_11_15.bin" + params.extension, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY); plain_buf.truncate(0); - DB::KeeperLogStore changelog_reader("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}); + DB::KeeperLogStore changelog_reader( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); changelog_reader.init(1, 0); changelog_reader.end_of_append_batch(0, 0); @@ -869,7 +939,8 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate) assertBrokenLogRemoved(log_folder, "changelog_26_30.bin" + params.extension); assertBrokenLogRemoved(log_folder, "changelog_31_35.bin" + params.extension); - DB::KeeperLogStore changelog_reader2("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}); + DB::KeeperLogStore changelog_reader2( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); changelog_reader2.init(1, 0); EXPECT_EQ(changelog_reader2.size(), 11); EXPECT_EQ(changelog_reader2.last_entry()->get_term(), 7777); @@ -879,8 +950,10 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2) { auto params = GetParam(); ChangelogDirTest test("./logs"); + setLogDirectory("./logs"); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}); + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -894,10 +967,12 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2) EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin" + params.extension)); - DB::WriteBufferFromFile plain_buf("./logs/changelog_1_20.bin" + params.extension, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY); + DB::WriteBufferFromFile plain_buf( + "./logs/changelog_1_20.bin" + params.extension, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY); plain_buf.truncate(30); - DB::KeeperLogStore changelog_reader("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}); + DB::KeeperLogStore changelog_reader( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, keeper_context); changelog_reader.init(1, 0); EXPECT_EQ(changelog_reader.size(), 0); @@ -912,7 +987,8 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2) EXPECT_EQ(changelog_reader.size(), 1); EXPECT_EQ(changelog_reader.last_entry()->get_term(), 7777); - DB::KeeperLogStore changelog_reader2("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1}); + DB::KeeperLogStore changelog_reader2( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1}, keeper_context); changelog_reader2.init(1, 0); EXPECT_EQ(changelog_reader2.size(), 1); EXPECT_EQ(changelog_reader2.last_entry()->get_term(), 7777); @@ -922,8 +998,10 @@ TEST_P(CoordinationTest, ChangelogTestLostFiles) { auto params = GetParam(); ChangelogDirTest test("./logs"); + setLogDirectory("./logs"); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}); + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -939,7 +1017,8 @@ TEST_P(CoordinationTest, ChangelogTestLostFiles) fs::remove("./logs/changelog_1_20.bin" + params.extension); - DB::KeeperLogStore changelog_reader("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}); + DB::KeeperLogStore changelog_reader( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, keeper_context); /// It should print error message, but still able to start changelog_reader.init(5, 0); assertBrokenLogRemoved("./logs", "changelog_21_40.bin" + params.extension); @@ -949,8 +1028,10 @@ TEST_P(CoordinationTest, ChangelogTestLostFiles2) { auto params = GetParam(); ChangelogDirTest test("./logs"); + setLogDirectory("./logs"); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10}); + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10}, keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -970,7 +1051,8 @@ TEST_P(CoordinationTest, ChangelogTestLostFiles2) // we have a gap in our logs, we need to remove all the logs after the gap fs::remove("./logs/changelog_21_30.bin" + params.extension); - DB::KeeperLogStore changelog_reader("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10}); + DB::KeeperLogStore changelog_reader( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10}, keeper_context); /// It should print error message, but still able to start changelog_reader.init(5, 0); EXPECT_TRUE(fs::exists("./logs/changelog_1_10.bin" + params.extension)); @@ -1155,7 +1237,7 @@ TEST_P(CoordinationTest, SnapshotableHashMapDataSize) EXPECT_EQ(world.getApproximateDataSize(), 0); } -void addNode(DB::KeeperStorage & storage, const std::string & path, const std::string & data, int64_t ephemeral_owner=0) +void addNode(DB::KeeperStorage & storage, const std::string & path, const std::string & data, int64_t ephemeral_owner = 0) { using Node = DB::KeeperStorage::Node; Node node{}; @@ -1163,19 +1245,23 @@ void addNode(DB::KeeperStorage & storage, const std::string & path, const std::s node.stat.ephemeralOwner = ephemeral_owner; storage.container.insertOrReplace(path, node); auto child_it = storage.container.find(path); - auto child_path = DB::getBaseName(child_it->key); - storage.container.updateValue(DB::parentPath(StringRef{path}), [&](auto & parent) - { - parent.addChild(child_path); - parent.stat.numChildren++; - }); + auto child_path = DB::getBaseNodeName(child_it->key); + storage.container.updateValue( + DB::parentNodePath(StringRef{path}), + [&](auto & parent) + { + parent.addChild(child_path); + parent.stat.numChildren++; + }); } TEST_P(CoordinationTest, TestStorageSnapshotSimple) { auto params = GetParam(); ChangelogDirTest test("./snapshots"); - DB::KeeperSnapshotManager manager("./snapshots", 3, keeper_context, params.enable_compression); + setSnapshotDirectory("./snapshots"); + + DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); DB::KeeperStorage storage(500, "", keeper_context); addNode(storage, "/hello", "world", 1); @@ -1223,7 +1309,9 @@ TEST_P(CoordinationTest, TestStorageSnapshotMoreWrites) { auto params = GetParam(); ChangelogDirTest test("./snapshots"); - DB::KeeperSnapshotManager manager("./snapshots", 3, keeper_context, params.enable_compression); + setSnapshotDirectory("./snapshots"); + + DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); DB::KeeperStorage storage(500, "", keeper_context); storage.getSessionID(130); @@ -1264,7 +1352,9 @@ TEST_P(CoordinationTest, TestStorageSnapshotManySnapshots) { auto params = GetParam(); ChangelogDirTest test("./snapshots"); - DB::KeeperSnapshotManager manager("./snapshots", 3, keeper_context, params.enable_compression); + setSnapshotDirectory("./snapshots"); + + DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); DB::KeeperStorage storage(500, "", keeper_context); storage.getSessionID(130); @@ -1303,7 +1393,9 @@ TEST_P(CoordinationTest, TestStorageSnapshotMode) { auto params = GetParam(); ChangelogDirTest test("./snapshots"); - DB::KeeperSnapshotManager manager("./snapshots", 3, keeper_context, params.enable_compression); + setSnapshotDirectory("./snapshots"); + + DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); DB::KeeperStorage storage(500, "", keeper_context); for (size_t i = 0; i < 50; ++i) { @@ -1349,14 +1441,15 @@ TEST_P(CoordinationTest, TestStorageSnapshotMode) { EXPECT_EQ(restored_storage->container.getValue("/hello_" + std::to_string(i)).getData(), "world_" + std::to_string(i)); } - } TEST_P(CoordinationTest, TestStorageSnapshotBroken) { auto params = GetParam(); ChangelogDirTest test("./snapshots"); - DB::KeeperSnapshotManager manager("./snapshots", 3, keeper_context, params.enable_compression); + setSnapshotDirectory("./snapshots"); + + DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); DB::KeeperStorage storage(500, "", keeper_context); for (size_t i = 0; i < 50; ++i) { @@ -1370,7 +1463,8 @@ TEST_P(CoordinationTest, TestStorageSnapshotBroken) EXPECT_TRUE(fs::exists("./snapshots/snapshot_50.bin" + params.extension)); /// Let's corrupt file - DB::WriteBufferFromFile plain_buf("./snapshots/snapshot_50.bin" + params.extension, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY); + DB::WriteBufferFromFile plain_buf( + "./snapshots/snapshot_50.bin" + params.extension, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY); plain_buf.truncate(34); plain_buf.sync(); @@ -1390,25 +1484,35 @@ nuraft::ptr getBufferFromZKRequest(int64_t session_id, int64_t z return buf.getBuffer(); } -nuraft::ptr getLogEntryFromZKRequest(size_t term, int64_t session_id, int64_t zxid, const Coordination::ZooKeeperRequestPtr & request) +nuraft::ptr +getLogEntryFromZKRequest(size_t term, int64_t session_id, int64_t zxid, const Coordination::ZooKeeperRequestPtr & request) { auto buffer = getBufferFromZKRequest(session_id, zxid, request); return nuraft::cs_new(term, buffer); } -void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint64_t total_logs, bool enable_compression, Coordination::KeeperContextPtr keeper_context) +void testLogAndStateMachine( + Coordination::CoordinationSettingsPtr settings, + uint64_t total_logs, + bool enable_compression, + Coordination::KeeperContextPtr keeper_context) { using namespace Coordination; using namespace DB; ChangelogDirTest snapshots("./snapshots"); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots", 0)); ChangelogDirTest logs("./logs"); + keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs", 0)); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; - auto state_machine = std::make_shared(queue, snapshots_queue, "./snapshots", settings, keeper_context, nullptr); + auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); state_machine->init(); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = enable_compression, .rotate_interval = settings->rotate_log_storage_interval}); + DB::KeeperLogStore changelog( + DB::LogFileSettings{ + .force_sync = true, .compress_logs = enable_compression, .rotate_interval = settings->rotate_log_storage_interval}, + keeper_context); changelog.init(state_machine->last_commit_index() + 1, settings->reserved_log_items); for (size_t i = 1; i < total_logs + 1; ++i) { @@ -1426,7 +1530,8 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint if (i % settings->snapshot_distance == 0) { nuraft::snapshot s(i, 0, std::make_shared()); - nuraft::async_result::handler_type when_done = [&snapshot_created] (bool & ret, nuraft::ptr &/*exception*/) + nuraft::async_result::handler_type when_done + = [&snapshot_created](bool & ret, nuraft::ptr & /*exception*/) { snapshot_created = ret; LOG_INFO(&Poco::Logger::get("CoordinationTest"), "Snapshot finished"); @@ -1444,17 +1549,21 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint } SnapshotsQueue snapshots_queue1{1}; - auto restore_machine = std::make_shared(queue, snapshots_queue1, "./snapshots", settings, keeper_context, nullptr); + auto restore_machine = std::make_shared(queue, snapshots_queue1, settings, keeper_context, nullptr); restore_machine->init(); EXPECT_EQ(restore_machine->last_commit_index(), total_logs - total_logs % settings->snapshot_distance); - DB::KeeperLogStore restore_changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = enable_compression, .rotate_interval = settings->rotate_log_storage_interval}); + DB::KeeperLogStore restore_changelog( + DB::LogFileSettings{ + .force_sync = true, .compress_logs = enable_compression, .rotate_interval = settings->rotate_log_storage_interval}, + keeper_context); restore_changelog.init(restore_machine->last_commit_index() + 1, settings->reserved_log_items); EXPECT_EQ(restore_changelog.size(), std::min(settings->reserved_log_items + total_logs % settings->snapshot_distance, total_logs)); EXPECT_EQ(restore_changelog.next_slot(), total_logs + 1); if (total_logs > settings->reserved_log_items + 1) - EXPECT_EQ(restore_changelog.start_index(), total_logs - total_logs % settings->snapshot_distance - settings->reserved_log_items + 1); + EXPECT_EQ( + restore_changelog.start_index(), total_logs - total_logs % settings->snapshot_distance - settings->reserved_log_items + 1); else EXPECT_EQ(restore_changelog.start_index(), 1); @@ -1552,11 +1661,13 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove) using namespace DB; ChangelogDirTest snapshots("./snapshots"); + setSnapshotDirectory("./snapshots"); + CoordinationSettingsPtr settings = std::make_shared(); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; - auto state_machine = std::make_shared(queue, snapshots_queue, "./snapshots", settings, keeper_context, nullptr); + auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); state_machine->init(); std::shared_ptr request_c = std::make_shared(); @@ -1585,11 +1696,12 @@ TEST_P(CoordinationTest, TestCreateNodeWithAuthSchemeForAclWhenAuthIsPrecommitte using namespace DB; ChangelogDirTest snapshots("./snapshots"); + setSnapshotDirectory("./snapshots"); CoordinationSettingsPtr settings = std::make_shared(); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; - auto state_machine = std::make_shared(queue, snapshots_queue, "./snapshots", settings, keeper_context, nullptr); + auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); state_machine->init(); String user_auth_data = "test_user:test_password"; @@ -1635,11 +1747,13 @@ TEST_P(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitted) using namespace DB; ChangelogDirTest snapshots("./snapshots"); + setSnapshotDirectory("./snapshots"); + CoordinationSettingsPtr settings = std::make_shared(); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; - auto state_machine = std::make_shared(queue, snapshots_queue, "./snapshots", settings, keeper_context, nullptr); + auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); state_machine->init(); String user_auth_data = "test_user:test_password"; @@ -1691,8 +1805,10 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) using namespace Coordination; auto params = GetParam(); ChangelogDirTest snapshots("./logs"); + setLogDirectory("./logs"); { - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}); + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); changelog.init(0, 3); for (size_t i = 1; i < 55; ++i) @@ -1710,7 +1826,8 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) EXPECT_TRUE(fs::exists("./logs/changelog_1_100.bin" + params.extension)); - DB::KeeperLogStore changelog_1("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10}); + DB::KeeperLogStore changelog_1( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10}, keeper_context); changelog_1.init(0, 50); for (size_t i = 0; i < 55; ++i) { @@ -1726,7 +1843,8 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) EXPECT_TRUE(fs::exists("./logs/changelog_1_100.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_101_110.bin" + params.extension)); - DB::KeeperLogStore changelog_2("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 7}); + DB::KeeperLogStore changelog_2( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 7}, keeper_context); changelog_2.init(98, 55); for (size_t i = 0; i < 17; ++i) @@ -1749,7 +1867,8 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) EXPECT_TRUE(fs::exists("./logs/changelog_118_124.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_125_131.bin" + params.extension)); - DB::KeeperLogStore changelog_3("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}); + DB::KeeperLogStore changelog_3( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); changelog_3.init(116, 3); for (size_t i = 0; i < 17; ++i) { @@ -1796,8 +1915,10 @@ TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite) { using namespace Coordination; auto test_params = GetParam(); - ChangelogDirTest snapshots("./logs"); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}); + ChangelogDirTest logs("./logs"); + setLogDirectory("./logs"); + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, keeper_context); changelog.init(0, 3); for (size_t i = 1; i < 55; ++i) @@ -1811,7 +1932,8 @@ TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite) waitDurableLogs(changelog); - DB::KeeperLogStore changelog1("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}); + DB::KeeperLogStore changelog1( + DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, keeper_context); changelog1.init(0, 3); for (size_t i = 55; i < 70; ++i) { @@ -1822,7 +1944,8 @@ TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite) changelog1.end_of_append_batch(0, 0); } - DB::KeeperLogStore changelog2("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}); + DB::KeeperLogStore changelog2( + DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, keeper_context); changelog2.init(0, 3); for (size_t i = 70; i < 80; ++i) { @@ -1839,7 +1962,9 @@ TEST_P(CoordinationTest, TestStorageSnapshotDifferentCompressions) auto params = GetParam(); ChangelogDirTest test("./snapshots"); - DB::KeeperSnapshotManager manager("./snapshots", 3, keeper_context, params.enable_compression); + setSnapshotDirectory("./snapshots"); + + DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); DB::KeeperStorage storage(500, "", keeper_context); addNode(storage, "/hello", "world", 1); @@ -1857,7 +1982,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotDifferentCompressions) manager.serializeSnapshotBufferToDisk(*buf, 2); EXPECT_TRUE(fs::exists("./snapshots/snapshot_2.bin" + params.extension)); - DB::KeeperSnapshotManager new_manager("./snapshots", 3, keeper_context, !params.enable_compression); + DB::KeeperSnapshotManager new_manager(3, keeper_context, !params.enable_compression); auto debuf = new_manager.deserializeSnapshotBufferFromDisk(2); @@ -1883,9 +2008,10 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth) { auto params = GetParam(); ChangelogDirTest test("./logs"); + setLogDirectory("./logs"); { LOG_INFO(log, "================First time====================="); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}); + DB::KeeperLogStore changelog(DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); @@ -1896,7 +2022,8 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth) { LOG_INFO(log, "================Second time====================="); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}); + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); @@ -1907,7 +2034,8 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth) { LOG_INFO(log, "================Third time====================="); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}); + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); @@ -1918,7 +2046,8 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth) { LOG_INFO(log, "================Fourth time====================="); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}); + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); @@ -1933,10 +2062,12 @@ TEST_P(CoordinationTest, ChangelogInsertMultipleTimesSmooth) { auto params = GetParam(); ChangelogDirTest test("./logs"); + setLogDirectory("./logs"); for (size_t i = 0; i < 36; ++i) { LOG_INFO(log, "================First time====================="); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}); + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); changelog.init(1, 0); for (size_t j = 0; j < 7; ++j) { @@ -1947,7 +2078,8 @@ TEST_P(CoordinationTest, ChangelogInsertMultipleTimesSmooth) waitDurableLogs(changelog); } - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}); + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); changelog.init(1, 0); EXPECT_EQ(changelog.next_slot(), 36 * 7 + 1); } @@ -1956,9 +2088,11 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) { auto params = GetParam(); ChangelogDirTest test("./logs"); + setLogDirectory("./logs"); { LOG_INFO(log, "================First time====================="); - DB::KeeperLogStore changelog1("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}); + DB::KeeperLogStore changelog1( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); changelog1.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog1.append(entry); @@ -1969,7 +2103,8 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) { LOG_INFO(log, "================Second time====================="); - DB::KeeperLogStore changelog2("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}); + DB::KeeperLogStore changelog2( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); changelog2.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog2.append(entry); @@ -1980,7 +2115,8 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) { LOG_INFO(log, "================Third time====================="); - DB::KeeperLogStore changelog3("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}); + DB::KeeperLogStore changelog3( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); changelog3.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog3.append(entry); @@ -1991,7 +2127,8 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) { LOG_INFO(log, "================Fourth time====================="); - DB::KeeperLogStore changelog4("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}); + DB::KeeperLogStore changelog4( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); changelog4.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog4.append(entry); @@ -2005,10 +2142,12 @@ TEST_P(CoordinationTest, TestStorageSnapshotEqual) { auto params = GetParam(); ChangelogDirTest test("./snapshots"); + setSnapshotDirectory("./snapshots"); + std::optional snapshot_hash; for (size_t i = 0; i < 15; ++i) { - DB::KeeperSnapshotManager manager("./snapshots", 3, keeper_context, params.enable_compression); + DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); DB::KeeperStorage storage(500, "", keeper_context); addNode(storage, "/hello", ""); @@ -2048,7 +2187,10 @@ TEST_P(CoordinationTest, TestLogGap) using namespace Coordination; auto test_params = GetParam(); ChangelogDirTest logs("./logs"); - DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}); + setLogDirectory("./logs"); + + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, keeper_context); changelog.init(0, 3); for (size_t i = 1; i < 55; ++i) @@ -2060,7 +2202,8 @@ TEST_P(CoordinationTest, TestLogGap) changelog.end_of_append_batch(0, 0); } - DB::KeeperLogStore changelog1("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}); + DB::KeeperLogStore changelog1( + DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, keeper_context); changelog1.init(61, 3); /// Logs discarded @@ -2220,13 +2363,13 @@ TEST_P(CoordinationTest, TestListRequestTypes) return create_response.path_created; }; - create_path(parentPath(StringRef{test_path}).toString(), false, false); + create_path(parentNodePath(StringRef{test_path}).toString(), false, false); static constexpr size_t persistent_num = 5; std::unordered_set expected_persistent_children; for (size_t i = 0; i < persistent_num; ++i) { - expected_persistent_children.insert(getBaseName(create_path(test_path, false)).toString()); + expected_persistent_children.insert(getBaseNodeName(create_path(test_path, false)).toString()); } ASSERT_EQ(expected_persistent_children.size(), persistent_num); @@ -2234,7 +2377,7 @@ TEST_P(CoordinationTest, TestListRequestTypes) std::unordered_set expected_ephemeral_children; for (size_t i = 0; i < ephemeral_num; ++i) { - expected_ephemeral_children.insert(getBaseName(create_path(test_path, true)).toString()); + expected_ephemeral_children.insert(getBaseNodeName(create_path(test_path, true)).toString()); } ASSERT_EQ(expected_ephemeral_children.size(), ephemeral_num); @@ -2242,7 +2385,7 @@ TEST_P(CoordinationTest, TestListRequestTypes) { const auto list_request = std::make_shared(); int new_zxid = ++zxid; - list_request->path = parentPath(StringRef{test_path}).toString(); + list_request->path = parentNodePath(StringRef{test_path}).toString(); list_request->list_request_type = list_request_type; storage.preprocessRequest(list_request, 1, 0, new_zxid); auto responses = storage.processRequest(list_request, 1, new_zxid); @@ -2270,20 +2413,23 @@ TEST_P(CoordinationTest, TestListRequestTypes) EXPECT_EQ(all_children.size(), ephemeral_num + persistent_num); for (const auto & child : all_children) { - EXPECT_TRUE(expected_ephemeral_children.contains(child) || expected_persistent_children.contains(child)) << "Missing child " << child; + EXPECT_TRUE(expected_ephemeral_children.contains(child) || expected_persistent_children.contains(child)) + << "Missing child " << child; } } TEST_P(CoordinationTest, TestDurableState) { ChangelogDirTest logs("./logs"); + setLogDirectory("./logs"); + setStateFileDirectory("."); auto state = nuraft::cs_new(); std::optional state_manager; const auto reload_state_manager = [&] { - state_manager.emplace(1, "localhost", 9181, "./logs", "./state"); + state_manager.emplace(1, "localhost", 9181, keeper_context); state_manager->loadLogStore(1, 0); }; @@ -2318,11 +2464,11 @@ TEST_P(CoordinationTest, TestDurableState) write_buf.sync(); write_buf.close(); reload_state_manager(); -#ifdef NDEBUG +# ifdef NDEBUG ASSERT_EQ(state_manager->read_state(), nullptr); -#else +# else ASSERT_THROW(state_manager->read_state(), DB::Exception); -#endif +# endif } { @@ -2396,15 +2542,16 @@ TEST_P(CoordinationTest, ChangelogTestMaxLogSize) { auto params = GetParam(); ChangelogDirTest test("./logs"); + setLogDirectory("./logs"); uint64_t last_entry_index{0}; size_t i{0}; { SCOPED_TRACE("Small rotation interval, big size limit"); DB::KeeperLogStore changelog( - "./logs", DB::LogFileSettings{ - .force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20, .max_size = 50 * 1024 * 1024}); + .force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20, .max_size = 50 * 1024 * 1024}, + keeper_context); changelog.init(1, 0); for (; i < 100; ++i) @@ -2421,9 +2568,9 @@ TEST_P(CoordinationTest, ChangelogTestMaxLogSize) { SCOPED_TRACE("Large rotation interval, small size limit"); DB::KeeperLogStore changelog( - "./logs", DB::LogFileSettings{ - .force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100'000, .max_size = 4000}); + .force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100'000, .max_size = 4000}, + keeper_context); changelog.init(1, 0); ASSERT_EQ(changelog.entry_at(last_entry_index)->get_term(), (i - 1 + 44) * 10); @@ -2442,13 +2589,12 @@ TEST_P(CoordinationTest, ChangelogTestMaxLogSize) { SCOPED_TRACE("Final verify all logs"); DB::KeeperLogStore changelog( - "./logs", DB::LogFileSettings{ - .force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100'000, .max_size = 4000}); + .force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100'000, .max_size = 4000}, + keeper_context); changelog.init(1, 0); ASSERT_EQ(changelog.entry_at(last_entry_index)->get_term(), (i - 1 + 44) * 10); } - } TEST_P(CoordinationTest, TestCheckNotExistsRequest) @@ -2524,13 +2670,10 @@ TEST_P(CoordinationTest, TestCheckNotExistsRequest) } } -INSTANTIATE_TEST_SUITE_P(CoordinationTestSuite, +INSTANTIATE_TEST_SUITE_P( + CoordinationTestSuite, CoordinationTest, - ::testing::ValuesIn(std::initializer_list{ - CompressionParam{true, ".zstd"}, - CompressionParam{false, ""} - }) -); + ::testing::ValuesIn(std::initializer_list{CompressionParam{true, ".zstd"}, CompressionParam{false, ""}})); int main(int argc, char ** argv) { From 092cf99147293a8dbb45031cf361a03cbcf5777f Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 24 May 2023 09:46:07 +0000 Subject: [PATCH 0179/1997] Fix build --- programs/keeper/CMakeLists.txt | 2 +- src/Coordination/Changelog.cpp | 134 ++++++++++++------ src/Coordination/KeeperContext.cpp | 20 +++ src/Coordination/KeeperContext.h | 3 + src/Coordination/KeeperSnapshotManager.cpp | 1 - .../MetadataStorageFromPlainObjectStorage.cpp | 1 - utils/keeper-data-dumper/main.cpp | 10 +- 7 files changed, 118 insertions(+), 53 deletions(-) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index a946ea06626..6e97ab324e3 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -143,9 +143,9 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/IOUringReader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferFromTemporaryFile.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferWithFinalizeCallback.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getThreadPoolReader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolRemoteFSReader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolReader.cpp diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index c94633d6dbd..875b0758d27 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -24,63 +24,66 @@ namespace ErrorCodes extern const int CHECKSUM_DOESNT_MATCH; extern const int CORRUPTED_DATA; extern const int UNKNOWN_FORMAT_VERSION; + extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; } namespace { - void moveFileBetweenDisks(DiskPtr disk_from, ChangelogFileDescriptionPtr description, DiskPtr disk_to, const std::string & path_to) - { - disk_from->copyFile(description->path, *disk_to, path_to, {}); - disk_from->removeFile(description->path); - description->path = path_to; - description->disk = disk_to; - } - constexpr auto DEFAULT_PREFIX = "changelog"; +void moveFileBetweenDisks(DiskPtr disk_from, ChangelogFileDescriptionPtr description, DiskPtr disk_to, const std::string & path_to) +{ + disk_from->copyFile(description->path, *disk_to, path_to, {}); + disk_from->removeFile(description->path); + description->path = path_to; + description->disk = disk_to; +} - inline std::string - formatChangelogPath(const std::string & name_prefix, uint64_t from_index, uint64_t to_index, const std::string & extension) - { - return fmt::format("{}_{}_{}.{}", name_prefix, from_index, to_index, extension); - } +constexpr auto DEFAULT_PREFIX = "changelog"; - ChangelogFileDescriptionPtr getChangelogFileDescription(const std::filesystem::path & path) - { - // we can have .bin.zstd so we cannot use std::filesystem stem and extension - std::string filename_with_extension = path.filename(); - std::string_view filename_with_extension_view = filename_with_extension; +inline std::string +formatChangelogPath(const std::string & name_prefix, uint64_t from_index, uint64_t to_index, const std::string & extension) +{ + return fmt::format("{}_{}_{}.{}", name_prefix, from_index, to_index, extension); +} - auto first_dot = filename_with_extension.find('.'); - if (first_dot == std::string::npos) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid changelog file {}", path.generic_string()); +ChangelogFileDescriptionPtr getChangelogFileDescription(const std::filesystem::path & path) +{ + // we can have .bin.zstd so we cannot use std::filesystem stem and extension + std::string filename_with_extension = path.filename(); + std::string_view filename_with_extension_view = filename_with_extension; - Strings filename_parts; - boost::split(filename_parts, filename_with_extension_view.substr(0, first_dot), boost::is_any_of("_")); - if (filename_parts.size() < 3) - throw Exception(ErrorCodes::CORRUPTED_DATA, "Invalid changelog {}", path.generic_string()); + auto first_dot = filename_with_extension.find('.'); + if (first_dot == std::string::npos) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid changelog file {}", path.generic_string()); - auto result = std::make_shared(); - result->prefix = filename_parts[0]; - result->from_log_index = parse(filename_parts[1]); - result->to_log_index = parse(filename_parts[2]); - result->extension = std::string(filename_with_extension.substr(first_dot + 1)); - result->path = path.generic_string(); - return result; - } + Strings filename_parts; + boost::split(filename_parts, filename_with_extension_view.substr(0, first_dot), boost::is_any_of("_")); + if (filename_parts.size() < 3) + throw Exception(ErrorCodes::CORRUPTED_DATA, "Invalid changelog {}", path.generic_string()); - Checksum computeRecordChecksum(const ChangelogRecord & record) - { - SipHash hash; - hash.update(record.header.version); - hash.update(record.header.index); - hash.update(record.header.term); - hash.update(record.header.value_type); - hash.update(record.header.blob_size); - if (record.header.blob_size != 0) - hash.update(reinterpret_cast(record.blob->data_begin()), record.blob->size()); - return hash.get64(); - } + auto result = std::make_shared(); + result->prefix = filename_parts[0]; + result->from_log_index = parse(filename_parts[1]); + result->to_log_index = parse(filename_parts[2]); + result->extension = std::string(filename_with_extension.substr(first_dot + 1)); + result->path = path.generic_string(); + return result; +} + +Checksum computeRecordChecksum(const ChangelogRecord & record) +{ + SipHash hash; + hash.update(record.header.version); + hash.update(record.header.index); + hash.update(record.header.term); + hash.update(record.header.value_type); + hash.update(record.header.blob_size); + if (record.header.blob_size != 0) + hash.update(reinterpret_cast(record.blob->data_begin()), record.blob->size()); + return hash.get64(); +} } @@ -571,6 +574,19 @@ Changelog::Changelog(Poco::Logger * log_, LogFileSettings log_file_settings, Kee , append_completion_queue(std::numeric_limits::max()) , keeper_context(std::move(keeper_context_)) { + if (auto current_log_disk = getCurrentLogDisk(); + log_file_settings.force_sync && dynamic_cast(current_log_disk.get()) == nullptr) + { + throw DB::Exception( + DB::ErrorCodes::BAD_ARGUMENTS, + "force_sync is set to true for logs but disk '{}' cannot satisfy such guarantee because it's not of type DiskLocal.\n" + "If you want to use force_sync and same disk for all logs, please set keeper_server.log_storage_disk to a local disk.\n" + "If you want to use force_sync and different disk only for old logs, please set 'keeper_server.log_storage_disk' to any " + "supported disk and 'keeper_server.current_log_storage_disk' to a local disk.\n" + "Otherwise, disable force_sync", + current_log_disk->getName()); + } + /// Load all files on changelog disks const auto load_from_disk = [&](const auto & disk) @@ -590,6 +606,12 @@ Changelog::Changelog(Poco::Logger * log_, LogFileSettings log_file_settings, Kee } }; + /// Load all files from old disks + for (const auto & disk : keeper_context->getOldLogDisks()) + { + load_from_disk(disk); + } + auto disk = getDisk(); load_from_disk(disk); @@ -738,7 +760,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin assert(existing_changelogs.find(last_log_read_result->log_start_index) != existing_changelogs.end()); assert(existing_changelogs.find(last_log_read_result->log_start_index)->first == existing_changelogs.rbegin()->first); - /// Continue to write into incomplete existing log if it doesn't finish with error + /// Continue to write into incomplete existing log if it didn't finish with error const auto & description = existing_changelogs[last_log_read_result->log_start_index]; if (last_log_read_result->last_read_index == 0 || last_log_read_result->error) /// If it's broken log then remove it @@ -755,7 +777,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin } else if (last_log_read_result.has_value()) { - /// check if we need to move it to another disk + /// check if we need to move completed log to another disk auto current_log_disk = getCurrentLogDisk(); auto disk = getDisk(); @@ -768,6 +790,24 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin if (!current_writer->isFileSet()) current_writer->rotate(max_log_id + 1); + /// Move files to correct disks + auto latest_start_index = current_writer->getStartIndex(); + auto current_log_disk = getCurrentLogDisk(); + auto disk = getDisk(); + for (const auto & [start_index, description] : existing_changelogs) + { + /// latest log should already be on current_log_disk + if (start_index == latest_start_index) + { + chassert(description->disk == current_log_disk); + continue; + } + + if (description->disk != disk) + moveFileBetweenDisks(description->disk, description, disk, description->path); + } + + initialized = true; } diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index 3c6411a3a24..e27cfc60cff 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -27,6 +28,14 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config) else current_log_storage = log_storage; + Poco::Util::AbstractConfiguration::Keys old_log_disk_name_keys; + config.keys("keeper_server", old_log_disk_name_keys); + for (const auto & key : old_log_disk_name_keys) + { + if (key.starts_with("old_log_storage_disk")) + old_log_disk_names.push_back(config.getString("keeper_server." + key)); + } + snapshot_storage = getSnapshotsPathFromConfig(config); state_file_storage = getStatePathFromConfig(config); @@ -71,6 +80,17 @@ DiskPtr KeeperContext::getLogDisk() const return getDisk(log_storage); } +std::vector KeeperContext::getOldLogDisks() const +{ + std::vector old_log_disks; + old_log_disks.reserve(old_log_disk_names.size()); + + for (const auto & disk_name : old_log_disk_names) + old_log_disks.push_back(disk_selector->get(disk_name)); + + return old_log_disks; +} + DiskPtr KeeperContext::getCurrentLogDisk() const { return getDisk(current_log_storage); diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index 4e538c99649..e04d1cd6b3d 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -34,6 +34,7 @@ public: DiskPtr getCurrentLogDisk() const; DiskPtr getLogDisk() const; + std::vector getOldLogDisks() const; void setLogDisk(DiskPtr disk); DiskPtr getSnapshotDisk() const; @@ -63,6 +64,8 @@ private: Storage snapshot_storage; Storage state_file_storage; + std::vector old_log_disk_names; + bool standalone_keeper; }; diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index 7d808e88b3d..7f097c182a1 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -720,7 +720,6 @@ std::pair KeeperSnapshotManager::serializeSnapshot try { - std::cout << "Removing file " << tmp_snapshot_file_name << std::endl; disk->removeFile(tmp_snapshot_file_name); } catch (fs::filesystem_error & e) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index 650fde7bcd1..a680a344746 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -120,7 +120,6 @@ const IMetadataStorage & MetadataStorageFromPlainObjectStorageTransaction::getSt void MetadataStorageFromPlainObjectStorageTransaction::unlinkFile(const std::string & path) { auto object = StoredObject(metadata_storage.getAbsolutePath(path)); - std::cout << "Removing from plain " << path << std::endl; metadata_storage.object_storage->removeObject(object); } diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index e82b21079fe..5a6fd15d72c 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -8,6 +8,7 @@ #include #include #include +#include using namespace Coordination; using namespace DB; @@ -62,15 +63,18 @@ int main(int argc, char *argv[]) ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; CoordinationSettingsPtr settings = std::make_shared(); - KeeperContextPtr keeper_context = std::make_shared(); - auto state_machine = std::make_shared(queue, snapshots_queue, argv[1], settings, keeper_context, nullptr); + KeeperContextPtr keeper_context = std::make_shared(true); + keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2], 0)); + keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1], 0)); + + auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); state_machine->init(); size_t last_commited_index = state_machine->last_commit_index(); LOG_INFO(logger, "Last committed index: {}", last_commited_index); DB::KeeperLogStore changelog( - argv[2], LogFileSettings{.force_sync = true, .compress_logs = settings->compress_logs, .rotate_interval = 10000000}); + LogFileSettings{.force_sync = true, .compress_logs = settings->compress_logs, .rotate_interval = 10000000}, keeper_context); changelog.init(last_commited_index, 10000000000UL); /// collect all logs if (changelog.size() == 0) LOG_INFO(logger, "Changelog empty"); From 242c3bc9a971b1f9b76df57b7df1ac5d176fe274 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 24 May 2023 16:01:28 +0200 Subject: [PATCH 0180/1997] fix --- tests/integration/test_ttl_replicated/test.py | 22 ++++++++----------- .../02448_clone_replica_lost_part.sql | 7 +++--- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py index 4ea4472b812..d78c00a9f9c 100644 --- a/tests/integration/test_ttl_replicated/test.py +++ b/tests/integration/test_ttl_replicated/test.py @@ -6,6 +6,7 @@ from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV, exec_query_with_retry from helpers.wait_for_helpers import wait_for_delete_inactive_parts from helpers.wait_for_helpers import wait_for_delete_empty_parts +from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance("node1", with_zookeeper=True) @@ -66,7 +67,8 @@ def test_ttl_columns(started_cluster): """ CREATE TABLE test_ttl(date DateTime, id UInt32, a Int32 TTL date + INTERVAL 1 DAY, b Int32 TTL date + INTERVAL 1 MONTH) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_columns', '{replica}') - ORDER BY id PARTITION BY toDayOfMonth(date) SETTINGS merge_with_ttl_timeout=0, min_bytes_for_wide_part=0; + ORDER BY id PARTITION BY toDayOfMonth(date) + SETTINGS merge_with_ttl_timeout=0, min_bytes_for_wide_part=0, , max_merge_selecting_sleep_ms=6000; """.format( replica=node.name ) @@ -99,7 +101,7 @@ def test_merge_with_ttl_timeout(started_cluster): CREATE TABLE {table}(date DateTime, id UInt32, a Int32 TTL date + INTERVAL 1 DAY, b Int32 TTL date + INTERVAL 1 MONTH) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/{table}', '{replica}') ORDER BY id PARTITION BY toDayOfMonth(date) - SETTINGS min_bytes_for_wide_part=0; + SETTINGS min_bytes_for_wide_part=0, max_merge_selecting_sleep_ms=6000; """.format( replica=node.name, table=table ) @@ -134,14 +136,8 @@ def test_merge_with_ttl_timeout(started_cluster): ) ) - time.sleep(15) # TTL merges shall not happen. - - assert ( - node1.query("SELECT countIf(a = 0) FROM {table}".format(table=table)) == "3\n" - ) - assert ( - node2.query("SELECT countIf(a = 0) FROM {table}".format(table=table)) == "3\n" - ) + assert_eq_with_retry(node1, "SELECT countIf(a = 0) FROM {table}".format(table=table), "3\n") + assert_eq_with_retry(node2, "SELECT countIf(a = 0) FROM {table}".format(table=table), "3\n") def test_ttl_many_columns(started_cluster): @@ -155,7 +151,7 @@ def test_ttl_many_columns(started_cluster): _offset Int32 TTL date, _partition Int32 TTL date) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_2', '{replica}') - ORDER BY id PARTITION BY toDayOfMonth(date) SETTINGS merge_with_ttl_timeout=0; + ORDER BY id PARTITION BY toDayOfMonth(date) SETTINGS merge_with_ttl_timeout=0, max_merge_selecting_sleep_ms=6000; """.format( replica=node.name ) @@ -213,7 +209,7 @@ def test_ttl_table(started_cluster, delete_suffix): CREATE TABLE test_ttl(date DateTime, id UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl', '{replica}') ORDER BY id PARTITION BY toDayOfMonth(date) - TTL date + INTERVAL 1 DAY {delete_suffix} SETTINGS merge_with_ttl_timeout=0; + TTL date + INTERVAL 1 DAY {delete_suffix} SETTINGS merge_with_ttl_timeout=0, max_merge_selecting_sleep_ms=6000; """.format( replica=node.name, delete_suffix=delete_suffix ) @@ -304,7 +300,7 @@ def test_ttl_double_delete_rule_returns_error(started_cluster): CREATE TABLE test_ttl(date DateTime, id UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_double_delete', '{replica}') ORDER BY id PARTITION BY toDayOfMonth(date) - TTL date + INTERVAL 1 DAY, date + INTERVAL 2 DAY SETTINGS merge_with_ttl_timeout=0 + TTL date + INTERVAL 1 DAY, date + INTERVAL 2 DAY SETTINGS merge_with_ttl_timeout=0, max_merge_selecting_sleep_ms=6000 """.format( replica=node1.name ) diff --git a/tests/queries/0_stateless/02448_clone_replica_lost_part.sql b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql index 7ad25d75fbe..1e99e1869cc 100644 --- a/tests/queries/0_stateless/02448_clone_replica_lost_part.sql +++ b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql @@ -6,10 +6,12 @@ drop table if exists rmt1; drop table if exists rmt2; create table rmt1 (n int) engine=ReplicatedMergeTree('/test/02448/{database}/rmt', '1') order by tuple() settings min_replicated_logs_to_keep=1, max_replicated_logs_to_keep=2, cleanup_delay_period=0, cleanup_delay_period_random_add=1, - cleanup_thread_preferred_points_per_iteration=0, old_parts_lifetime=0, max_parts_to_merge_at_once=4; + cleanup_thread_preferred_points_per_iteration=0, old_parts_lifetime=0, max_parts_to_merge_at_once=4, + merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=500; create table rmt2 (n int) engine=ReplicatedMergeTree('/test/02448/{database}/rmt', '2') order by tuple() settings min_replicated_logs_to_keep=1, max_replicated_logs_to_keep=2, cleanup_delay_period=0, cleanup_delay_period_random_add=1, - cleanup_thread_preferred_points_per_iteration=0, old_parts_lifetime=0, max_parts_to_merge_at_once=4; + cleanup_thread_preferred_points_per_iteration=0, old_parts_lifetime=0, max_parts_to_merge_at_once=4, + merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=500; -- insert part only on one replica system stop replicated sends rmt1; @@ -144,7 +146,6 @@ select sleep(2) format Null; -- increases probability of reproducing the issue -- rmt1 will mimic rmt2, but will not be able to fetch parts for a while system stop replicated sends rmt2; attach table rmt1; -system sync replica rmt1; -- rmt1 should not show the value (200) from dropped part select throwIf(n = 200) from rmt1 format Null; select 11, arraySort(groupArray(n)) from rmt2; From a237b8b83958abbb6976fdb72f67790c54442195 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 24 May 2023 14:19:37 +0000 Subject: [PATCH 0181/1997] Automatic style fix --- tests/integration/test_ttl_replicated/test.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py index d78c00a9f9c..d681e81df3a 100644 --- a/tests/integration/test_ttl_replicated/test.py +++ b/tests/integration/test_ttl_replicated/test.py @@ -136,8 +136,12 @@ def test_merge_with_ttl_timeout(started_cluster): ) ) - assert_eq_with_retry(node1, "SELECT countIf(a = 0) FROM {table}".format(table=table), "3\n") - assert_eq_with_retry(node2, "SELECT countIf(a = 0) FROM {table}".format(table=table), "3\n") + assert_eq_with_retry( + node1, "SELECT countIf(a = 0) FROM {table}".format(table=table), "3\n" + ) + assert_eq_with_retry( + node2, "SELECT countIf(a = 0) FROM {table}".format(table=table), "3\n" + ) def test_ttl_many_columns(started_cluster): From 4c94b3d6bce6bf34a52e83f98b6fec312e4ba79b Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 24 May 2023 20:13:37 +0300 Subject: [PATCH 0182/1997] Update test.py --- tests/integration/test_ttl_replicated/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py index d681e81df3a..7ba5a4359c7 100644 --- a/tests/integration/test_ttl_replicated/test.py +++ b/tests/integration/test_ttl_replicated/test.py @@ -68,7 +68,7 @@ def test_ttl_columns(started_cluster): CREATE TABLE test_ttl(date DateTime, id UInt32, a Int32 TTL date + INTERVAL 1 DAY, b Int32 TTL date + INTERVAL 1 MONTH) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_columns', '{replica}') ORDER BY id PARTITION BY toDayOfMonth(date) - SETTINGS merge_with_ttl_timeout=0, min_bytes_for_wide_part=0, , max_merge_selecting_sleep_ms=6000; + SETTINGS merge_with_ttl_timeout=0, min_bytes_for_wide_part=0, max_merge_selecting_sleep_ms=6000; """.format( replica=node.name ) From d8f39b8df1ef608b9f78844a6665495be3cbec33 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 24 May 2023 17:53:37 +0000 Subject: [PATCH 0183/1997] Fixing more tests. --- src/Interpreters/ActionsVisitor.cpp | 8 ++++++ .../ClusterProxy/SelectStreamFactory.cpp | 8 +++--- .../ClusterProxy/SelectStreamFactory.h | 6 ++++- .../ClusterProxy/executeQuery.cpp | 1 + src/Interpreters/GlobalSubqueriesVisitor.h | 3 ++- src/Interpreters/PreparedSets.cpp | 6 ++++- .../QueryPlan/DistributedCreateLocalPlan.cpp | 3 ++- .../QueryPlan/DistributedCreateLocalPlan.h | 4 +++ src/Processors/QueryPlan/ReadFromRemote.cpp | 6 +++-- src/Processors/QueryPlan/ReadFromRemote.h | 2 ++ src/Storages/StorageDistributed.cpp | 3 ++- src/Storages/StorageMergeTree.cpp | 3 ++- src/Storages/StorageReplicatedMergeTree.cpp | 3 ++- src/Storages/VirtualColumnUtils.cpp | 27 +++++++++++++++++++ 14 files changed, 71 insertions(+), 12 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index dcf6c4a461f..1405568aa71 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -3,6 +3,7 @@ #include #include #include +#include "Parsers/queryToString.h" #include #include @@ -1392,8 +1393,15 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool { if (no_subqueries) return {}; + //std::cerr << queryToString(right_in_operand) << std::endl; auto set_key = PreparedSetKey::forSubquery(right_in_operand->getTreeHash()); + // std::cerr << set_key.toString() << std::endl; + // std::cerr << data.prepared_sets->getSets().size() << std::endl; + // std::cerr << reinterpret_cast(data.prepared_sets.get()) << std::endl; + // for (const auto & [k, v] : data.prepared_sets->getSets()) + // std::cerr << "... " << k.toString(); + if (auto set = data.prepared_sets->getFuture(set_key)) return set; diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 0cf3f360994..0f8a725e144 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -92,11 +92,13 @@ SelectStreamFactory::SelectStreamFactory( const Block & header_, const ColumnsDescriptionByShardNum & objects_by_shard_, const StorageSnapshotPtr & storage_snapshot_, - QueryProcessingStage::Enum processed_stage_) + QueryProcessingStage::Enum processed_stage_, + PreparedSetsPtr prepared_sets_) : header(header_), objects_by_shard(objects_by_shard_), storage_snapshot(storage_snapshot_), - processed_stage(processed_stage_) + processed_stage(processed_stage_), + prepared_sets(std::move(prepared_sets_)) { } @@ -117,7 +119,7 @@ void SelectStreamFactory::createForShard( auto emplace_local_stream = [&]() { local_plans.emplace_back(createLocalPlan( - query_ast, header, context, processed_stage, shard_info.shard_num, shard_count, /*replica_num=*/0, /*replica_count=*/0, /*coordinator=*/nullptr)); + query_ast, header, context, processed_stage, prepared_sets, shard_info.shard_num, shard_count, /*replica_num=*/0, /*replica_count=*/0, /*coordinator=*/nullptr)); }; auto emplace_remote_stream = [&](bool lazy = false, time_t local_delay = 0) diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h index f1a8b3e0984..b19012ddba6 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h @@ -26,6 +26,8 @@ using QueryPlanPtr = std::unique_ptr; struct StorageID; +class PreparedSets; +using PreparedSetsPtr = std::shared_ptr; namespace ClusterProxy { @@ -67,7 +69,8 @@ public: const Block & header_, const ColumnsDescriptionByShardNum & objects_by_shard_, const StorageSnapshotPtr & storage_snapshot_, - QueryProcessingStage::Enum processed_stage_); + QueryProcessingStage::Enum processed_stage_, + PreparedSetsPtr prepared_sets_); void createForShard( const Cluster::ShardInfo & shard_info, @@ -92,6 +95,7 @@ public: const ColumnsDescriptionByShardNum objects_by_shard; const StorageSnapshotPtr storage_snapshot; QueryProcessingStage::Enum processed_stage; + PreparedSetsPtr prepared_sets; }; } diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index e2f1dfe8ba7..a9cf3d55392 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -226,6 +226,7 @@ void executeQuery( std::move(remote_shards), header, processed_stage, + stream_factory.prepared_sets, main_table, table_func_ptr, new_context, diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 3a846bb4bc3..a872af529aa 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -181,7 +181,7 @@ public: // auto & subquery_for_set = prepared_sets->getSubquery(external_table_name); // subquery_for_set.createSource(*interpreter, external_storage); auto key = subquery_or_table_name->getColumnName(); - auto set_key = PreparedSetKey::forSubquery(subquery_or_table_name->getTreeHash()); + auto set_key = PreparedSetKey::forSubquery(database_and_table_name->getTreeHash()); if (!prepared_sets->getFuture(set_key)) { @@ -190,6 +190,7 @@ public: subquery_for_set.table = std::move(external_storage); subquery_for_set.createSource(*interpreter); + //std::cerr << reinterpret_cast(prepared_sets.get()) << std::endl; prepared_sets->addFromSubquery(set_key, std::move(subquery_for_set)); } else diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index cd6b2a81ba0..3b63d942404 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -140,6 +140,10 @@ FutureSetPtr PreparedSets::addFromSubquery(const PreparedSetKey & key, SubqueryF if (!inserted) throw Exception(ErrorCodes::LOGICAL_ERROR, "Duplicate set: {}", key.toString()); + // std::cerr << key.toString() << std::endl; + // std::cerr << "========= PreparedSets::addFromSubquery\n"; + // std::cerr << StackTrace().toString() << std::endl; + subqueries.emplace(id, std::move(from_subquery)); return it->second; } @@ -226,7 +230,7 @@ std::unique_ptr FutureSetFromSubquery::buildPlan(const ContextPtr & c if (set) return nullptr; - //std::cerr << StackTrace().toString() << std::endl; + // std::cerr << StackTrace().toString() << std::endl; auto set_cache = context->getPreparedSetsCache(); if (set_cache) diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index 9b9cc221ca8..62e369659d1 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -43,6 +43,7 @@ std::unique_ptr createLocalPlan( const Block & header, ContextPtr context, QueryProcessingStage::Enum processed_stage, + PreparedSetsPtr prepared_sets, size_t shard_num, size_t shard_count, size_t replica_num, @@ -98,7 +99,7 @@ std::unique_ptr createLocalPlan( } else { - auto interpreter = InterpreterSelectQuery(query_ast, new_context, select_query_options); + auto interpreter = InterpreterSelectQuery(query_ast, new_context, select_query_options, prepared_sets); interpreter.buildQueryPlan(*query_plan); } diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.h b/src/Processors/QueryPlan/DistributedCreateLocalPlan.h index 1afdc07fa4d..cf59027a33f 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.h +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.h @@ -10,11 +10,15 @@ namespace DB { +class PreparedSets; +using PreparedSetsPtr = std::shared_ptr; + std::unique_ptr createLocalPlan( const ASTPtr & query_ast, const Block & header, ContextPtr context, QueryProcessingStage::Enum processed_stage, + PreparedSetsPtr prepared_sets, size_t shard_num, size_t shard_count, size_t replica_num, diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index ed740e3e242..16cb06a94d6 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -97,6 +97,7 @@ ReadFromRemote::ReadFromRemote( ClusterProxy::SelectStreamFactory::Shards shards_, Block header_, QueryProcessingStage::Enum stage_, + PreparedSetsPtr prepared_sets_, StorageID main_table_, ASTPtr table_func_ptr_, ContextMutablePtr context_, @@ -109,6 +110,7 @@ ReadFromRemote::ReadFromRemote( : ISourceStep(DataStream{.header = std::move(header_)}) , shards(std::move(shards_)) , stage(stage_) + , prepared_sets(std::move(prepared_sets_)) , main_table(std::move(main_table_)) , table_func_ptr(std::move(table_func_ptr_)) , context(std::move(context_)) @@ -150,7 +152,7 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream my_context = context, my_throttler = throttler, my_main_table = main_table, my_table_func_ptr = table_func_ptr, my_scalars = scalars, my_external_tables = external_tables, - my_stage = stage, local_delay = shard.local_delay, + my_stage = stage, my_prepared_sets = prepared_sets, local_delay = shard.local_delay, add_agg_info, add_totals, add_extremes, async_read, async_query_sending]() mutable -> QueryPipelineBuilder { @@ -185,7 +187,7 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream if (try_results.empty() || local_delay < max_remote_delay) { auto plan = createLocalPlan( - query, header, my_context, my_stage, my_shard.shard_info.shard_num, my_shard_count, 0, 0, /*coordinator=*/nullptr); + query, header, my_context, my_stage, my_prepared_sets, my_shard.shard_info.shard_num, my_shard_count, 0, 0, /*coordinator=*/nullptr); return std::move(*plan->buildQueryPipeline( QueryPlanOptimizationSettings::fromContext(my_context), diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h index d4005d81f1b..405b5727ff2 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.h +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -26,6 +26,7 @@ public: ClusterProxy::SelectStreamFactory::Shards shards_, Block header_, QueryProcessingStage::Enum stage_, + PreparedSetsPtr prepared_sets_, StorageID main_table_, ASTPtr table_func_ptr_, ContextMutablePtr context_, @@ -46,6 +47,7 @@ public: private: ClusterProxy::SelectStreamFactory::Shards shards; QueryProcessingStage::Enum stage; + PreparedSetsPtr prepared_sets; StorageID main_table; ASTPtr table_func_ptr; ContextMutablePtr context; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 86a2599b49c..f45e247f8e7 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1098,7 +1098,8 @@ void StorageDistributed::read( header, snapshot_data.objects_by_shard, storage_snapshot, - processed_stage); + processed_stage, + query_info.prepared_sets); auto settings = local_context->getSettingsRef(); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 2c19d3ba122..4600532231f 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -231,7 +231,8 @@ void StorageMergeTree::read( header, {}, storage_snapshot, - processed_stage); + processed_stage, + query_info.prepared_sets); ClusterProxy::executeQueryWithParallelReplicas( query_plan, getStorageID(), /*remove_table_function_ptr*/ nullptr, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index d9c8f09ccf1..ab78ea2f0a4 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4663,7 +4663,8 @@ void StorageReplicatedMergeTree::read( header, {}, storage_snapshot, - processed_stage); + processed_stage, + query_info.prepared_sets); ClusterProxy::executeQueryWithParallelReplicas( query_plan, getStorageID(), /*remove_table_function_ptr*/ nullptr, diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 31fa1cc6a7e..be2206a78e9 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include @@ -202,6 +204,31 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr contex //buildSets(expression_ast, analyzer); ExpressionActionsPtr actions = analyzer.getActions(false /* add alises */, true /* project result */, CompileExpressions::yes); + for (const auto & node : actions->getNodes()) + { + if (node.type == ActionsDAG::ActionType::COLUMN) + { + const ColumnSet * column_set = checkAndGetColumnConstData(node.column.get()); + if (!column_set) + column_set = checkAndGetColumn(node.column.get()); + + if (column_set) + { + auto future_set = column_set->getData(); + if (!future_set->isFilled()) + { + auto plan = future_set->build(context); + auto builder = plan->buildQueryPipeline(QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); + auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder)); + pipeline.complete(std::make_shared(Block())); + + CompletedPipelineExecutor executor(pipeline); + executor.execute(); + } + } + } + } + Block block_with_filter = block; actions->execute(block_with_filter); From 7e68f61df1671b0be0b30c8cef33f34c8c971d3b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 25 May 2023 08:43:11 +0000 Subject: [PATCH 0184/1997] Fix snapshot recovery --- programs/keeper-converter/KeeperConverter.cpp | 4 +- programs/server/config.d/users.xml | 26 ----- src/Coordination/FourLetterCommand.cpp | 1 + src/Coordination/KeeperContext.cpp | 35 ++++++- src/Coordination/KeeperContext.h | 4 + src/Coordination/KeeperDiskSelector.h | 37 -------- src/Coordination/KeeperDispatcher.cpp | 11 ++- src/Coordination/KeeperDispatcher.h | 7 ++ src/Coordination/KeeperServer.cpp | 4 +- src/Coordination/KeeperServer.h | 2 +- src/Coordination/KeeperSnapshotManager.cpp | 20 +--- src/Coordination/KeeperSnapshotManager.h | 21 ++-- src/Coordination/KeeperSnapshotManagerS3.cpp | 24 ++--- src/Coordination/KeeperSnapshotManagerS3.h | 8 +- src/Coordination/KeeperStateMachine.cpp | 95 ++++++++++++++----- src/Coordination/KeeperStateMachine.h | 2 +- .../test_keeper_four_word_command/test.py | 2 + 17 files changed, 165 insertions(+), 138 deletions(-) delete mode 100644 programs/server/config.d/users.xml delete mode 100644 src/Coordination/KeeperDiskSelector.h diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp index f2389021cb6..a049e6bc2b3 100644 --- a/programs/keeper-converter/KeeperConverter.cpp +++ b/programs/keeper-converter/KeeperConverter.cpp @@ -55,8 +55,8 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv) DB::KeeperSnapshotManager manager(1, keeper_context); auto snp = manager.serializeSnapshotToBuffer(snapshot); - auto path = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID()); - std::cout << "Snapshot serialized to path:" << path << std::endl; + auto file_info = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID()); + std::cout << "Snapshot serialized to path:" << fs::path(file_info.disk->getPath()) / file_info.path << std::endl; } catch (...) { diff --git a/programs/server/config.d/users.xml b/programs/server/config.d/users.xml deleted file mode 100644 index d8a62b45baa..00000000000 --- a/programs/server/config.d/users.xml +++ /dev/null @@ -1,26 +0,0 @@ - - - - 10000000000 - 0 - 2 - - - 5000000000 - 20000000000 - - - - - - - - - - - - - default - - - diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 7077e792fd8..b7419bcaccc 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -292,6 +292,7 @@ String ConfCommand::run() StringBuffer buf; keeper_dispatcher.getKeeperConfigurationAndSettings()->dump(buf); + keeper_dispatcher.getKeeperContext()->dumpConfiguration(buf); return buf.str(); } diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index e27cfc60cff..da49868f706 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -122,6 +122,35 @@ void KeeperContext::setStateFileDisk(DiskPtr disk) state_file_storage = std::move(disk); } +void KeeperContext::dumpConfiguration(WriteBufferFromOwnString & buf) const +{ + auto dump_disk_info = [&](const std::string_view prefix, const IDisk & disk) + { + writeText(fmt::format("{}_path=", prefix), buf); + writeText(disk.getPath(), buf); + buf.write('\n'); + + writeText(fmt::format("{}_disk=", prefix), buf); + writeText(disk.getName(), buf); + buf.write('\n'); + + }; + + { + auto log_disk = getDisk(log_storage); + dump_disk_info("log_storage", *log_disk); + + auto current_log_disk = getDisk(current_log_storage); + if (log_disk != current_log_disk) + dump_disk_info("current_log_storage", *current_log_disk); + } + + { + auto snapshot_disk = getDisk(snapshot_storage); + dump_disk_info("snapshot_storage", *snapshot_disk); + } +} + KeeperContext::Storage KeeperContext::getLogsPathFromConfig(const Poco::Util::AbstractConfiguration & config) const { const auto create_local_disk = [](const auto & path) @@ -129,7 +158,7 @@ KeeperContext::Storage KeeperContext::getLogsPathFromConfig(const Poco::Util::Ab if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LogDisk", path, 0); + return std::make_shared("LocalLogDisk", path, 0); }; /// the most specialized path @@ -155,7 +184,7 @@ KeeperContext::Storage KeeperContext::getSnapshotsPathFromConfig(const Poco::Uti if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("SnapshotDisk", path, 0); + return std::make_shared("LocalSnapshotDisk", path, 0); }; /// the most specialized path @@ -181,7 +210,7 @@ KeeperContext::Storage KeeperContext::getStatePathFromConfig(const Poco::Util::A if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("SnapshotDisk", path, 0); + return std::make_shared("LocalStateFileDisk", path, 0); }; if (config.has("keeper_server.state_storage_disk")) diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index e04d1cd6b3d..6b7af3a60db 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -2,6 +2,8 @@ #include +#include + #include #include @@ -42,6 +44,8 @@ public: DiskPtr getStateFileDisk() const; void setStateFileDisk(DiskPtr disk); + + void dumpConfiguration(WriteBufferFromOwnString & buf) const; private: /// local disk defined using path or disk name using Storage = std::variant; diff --git a/src/Coordination/KeeperDiskSelector.h b/src/Coordination/KeeperDiskSelector.h deleted file mode 100644 index fa78fec1952..00000000000 --- a/src/Coordination/KeeperDiskSelector.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -class KeeperDiskSelector -{ -public: - void initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); - - DiskSelectorPtr updateFromConfig( - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, - ContextPtr context) const; - - /// Get disk by name - DiskPtr get(const String & name) const; - - DiskPtr tryGet(const String & name) const; - - /// Get all disks with names - const DisksMap & getDisksMap() const; - - void shutdown(); - -private: - mutable std::mutex disk_selector_mutex; - DiskSelectorPtr disk_selector; -}; - -using KeeperDiskSelectorPtr = std::shared_ptr; - - -} diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 17a15067301..4a460777621 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -238,13 +238,13 @@ void KeeperDispatcher::snapshotThread() try { - auto snapshot_path = task.create_snapshot(std::move(task.snapshot)); + auto snapshot_file_info = task.create_snapshot(std::move(task.snapshot)); - if (snapshot_path.empty()) + if (snapshot_file_info.path.empty()) continue; if (isLeader()) - snapshot_s3.uploadSnapshot(snapshot_path); + snapshot_s3.uploadSnapshot(snapshot_file_info); } catch (...) { @@ -336,12 +336,15 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf snapshot_s3.startup(config, macros); + keeper_context = std::make_shared(standalone_keeper); + keeper_context->initialize(config); + server = std::make_unique( configuration_and_settings, config, responses_queue, snapshots_queue, - standalone_keeper, + keeper_context, snapshot_s3, [this](const KeeperStorage::RequestForSession & request_for_session) { diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 4b8b134cf8f..1b44f0f6ced 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -81,6 +81,8 @@ private: KeeperSnapshotManagerS3 snapshot_s3; + KeeperContextPtr keeper_context; + /// Thread put requests to raft void requestThread(); /// Thread put responses for subscribed sessions @@ -198,6 +200,11 @@ public: return configuration_and_settings; } + const KeeperContextPtr & getKeeperContext() const + { + return keeper_context; + } + void incrementPacketsSent() { keeper_stats.incrementPacketsSent(); diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 31c91e2de80..989455a5a79 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -108,14 +108,14 @@ KeeperServer::KeeperServer( const Poco::Util::AbstractConfiguration & config, ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, - bool standalone_keeper, + KeeperContextPtr keeper_context_, KeeperSnapshotManagerS3 & snapshot_manager_s3, KeeperStateMachine::CommitCallback commit_callback) : server_id(configuration_and_settings_->server_id) , coordination_settings(configuration_and_settings_->coordination_settings) , log(&Poco::Logger::get("KeeperServer")) , is_recovering(config.getBool("keeper_server.force_recovery", false)) - , keeper_context{std::make_shared(standalone_keeper)} + , keeper_context{std::move(keeper_context_)} , create_snapshot_on_exit(config.getBool("keeper_server.create_snapshot_on_exit", true)) { if (coordination_settings->quorum_reads) diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h index 63f9cc2bcea..8f416b1f48c 100644 --- a/src/Coordination/KeeperServer.h +++ b/src/Coordination/KeeperServer.h @@ -72,7 +72,7 @@ public: const Poco::Util::AbstractConfiguration & config_, ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, - bool standalone_keeper, + KeeperContextPtr keeper_context_, KeeperSnapshotManagerS3 & snapshot_manager_s3, KeeperStateMachine::CommitCallback commit_callback); diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index 7f097c182a1..3bfe700bcd5 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -563,7 +563,7 @@ KeeperSnapshotManager::KeeperSnapshotManager( } -std::string KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx) +SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx) { ReadBufferFromNuraftBuffer reader(buffer); @@ -585,7 +585,7 @@ std::string KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer existing_snapshots.emplace(up_to_log_idx, snapshot_file_name); removeOutdatedSnapshotsIfNeeded(); - return snapshot_file_name; + return {snapshot_file_name, disk}; } nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBufferFromDisk() @@ -694,7 +694,7 @@ void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx) existing_snapshots.erase(itr); } -std::pair KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot) +SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot) { auto up_to_log_idx = snapshot.snapshot_meta->get_last_log_idx(); auto snapshot_file_name = getSnapshotFileName(up_to_log_idx, compress_snapshots_zstd); @@ -716,22 +716,12 @@ std::pair KeeperSnapshotManager::serializeSnapshot compressed_writer->finalize(); compressed_writer->sync(); - std::error_code ec; - - try - { - disk->removeFile(tmp_snapshot_file_name); - } - catch (fs::filesystem_error & e) - { - ec = e.code(); - return {snapshot_file_name, ec}; - } + disk->removeFile(tmp_snapshot_file_name); existing_snapshots.emplace(up_to_log_idx, snapshot_file_name); removeOutdatedSnapshotsIfNeeded(); - return {snapshot_file_name, ec}; + return {snapshot_file_name, disk}; } } diff --git a/src/Coordination/KeeperSnapshotManager.h b/src/Coordination/KeeperSnapshotManager.h index 7b1129018d8..036c0cab62b 100644 --- a/src/Coordination/KeeperSnapshotManager.h +++ b/src/Coordination/KeeperSnapshotManager.h @@ -87,8 +87,14 @@ public: uint64_t nodes_digest; }; +struct SnapshotFileInfo +{ + std::string path; + DiskPtr disk; +}; + using KeeperStorageSnapshotPtr = std::shared_ptr; -using CreateSnapshotCallback = std::function; +using CreateSnapshotCallback = std::function; using SnapshotMetaAndStorage = std::pair; @@ -112,10 +118,10 @@ public: nuraft::ptr serializeSnapshotToBuffer(const KeeperStorageSnapshot & snapshot) const; /// Serialize already compressed snapshot to disk (return path) - std::string serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx); + SnapshotFileInfo serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx); /// Serialize snapshot directly to disk - std::pair serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot); + SnapshotFileInfo serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot); SnapshotDeserializationResult deserializeSnapshotFromBuffer(nuraft::ptr buffer) const; @@ -139,7 +145,7 @@ public: return 0; } - std::string getLatestSnapshotPath() const + SnapshotFileInfo getLatestSnapshotInfo() const { if (!existing_snapshots.empty()) { @@ -147,14 +153,15 @@ public: try { - if (getDisk()->exists(path)) - return path; + auto disk = getDisk(); + if (disk->exists(path)) + return {path, disk}; } catch (...) { } } - return ""; + return {"", nullptr}; } private: diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 1afe0b352c5..580e166e302 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -132,8 +132,9 @@ std::shared_ptr KeeperSnapshotManagerS return snapshot_s3_client; } -void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_path) +void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapshot_file_info) { + const auto & [snapshot_path, snapshot_disk] = snapshot_file_info; try { auto s3_client = getSnapshotS3Client(); @@ -154,8 +155,9 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa }; }; - LOG_INFO(log, "Will try to upload snapshot on {} to S3", snapshot_path); - ReadBufferFromFile snapshot_file(snapshot_path); + LOG_INFO(log, "Will try to upload snapshot on {} to S3", snapshot_file_info.path); + + auto snapshot_file = snapshot_disk->readFile(snapshot_file_info.path); auto snapshot_name = fs::path(snapshot_path).filename().string(); auto lock_file = fmt::format(".{}_LOCK", snapshot_name); @@ -222,7 +224,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa }); WriteBufferFromS3 snapshot_writer = create_writer(snapshot_name); - copyData(snapshot_file, snapshot_writer); + copyData(*snapshot_file, snapshot_writer); snapshot_writer.finalize(); LOG_INFO(log, "Successfully uploaded {} to S3", snapshot_path); @@ -240,31 +242,31 @@ void KeeperSnapshotManagerS3::snapshotS3Thread() while (!shutdown_called) { - std::string snapshot_path; - if (!snapshots_s3_queue.pop(snapshot_path)) + SnapshotFileInfo snapshot_file_info; + if (!snapshots_s3_queue.pop(snapshot_file_info)) break; if (shutdown_called) break; - uploadSnapshotImpl(snapshot_path); + uploadSnapshotImpl(snapshot_file_info); } } -void KeeperSnapshotManagerS3::uploadSnapshot(const std::string & path, bool async_upload) +void KeeperSnapshotManagerS3::uploadSnapshot(const SnapshotFileInfo & file_info, bool async_upload) { if (getSnapshotS3Client() == nullptr) return; if (async_upload) { - if (!snapshots_s3_queue.push(path)) - LOG_WARNING(log, "Failed to add snapshot {} to S3 queue", path); + if (!snapshots_s3_queue.push(file_info)) + LOG_WARNING(log, "Failed to add snapshot {} to S3 queue", file_info.path); return; } - uploadSnapshotImpl(path); + uploadSnapshotImpl(file_info); } void KeeperSnapshotManagerS3::startup(const Poco::Util::AbstractConfiguration & config, const MultiVersion::Version & macros) diff --git a/src/Coordination/KeeperSnapshotManagerS3.h b/src/Coordination/KeeperSnapshotManagerS3.h index eff7868bba9..908deb76851 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.h +++ b/src/Coordination/KeeperSnapshotManagerS3.h @@ -10,6 +10,8 @@ #include #include +#include + #include #endif @@ -24,13 +26,13 @@ public: /// 'macros' are used to substitute macros in endpoint of disks void updateS3Configuration(const Poco::Util::AbstractConfiguration & config, const MultiVersion::Version & macros); - void uploadSnapshot(const std::string & path, bool async_upload = true); + void uploadSnapshot(const SnapshotFileInfo & file_info, bool async_upload = true); /// 'macros' are used to substitute macros in endpoint of disks void startup(const Poco::Util::AbstractConfiguration & config, const MultiVersion::Version & macros); void shutdown(); private: - using SnapshotS3Queue = ConcurrentBoundedQueue; + using SnapshotS3Queue = ConcurrentBoundedQueue; SnapshotS3Queue snapshots_s3_queue; /// Upload new snapshots to S3 @@ -48,7 +50,7 @@ private: std::shared_ptr getSnapshotS3Client() const; - void uploadSnapshotImpl(const std::string & snapshot_path); + void uploadSnapshotImpl(const SnapshotFileInfo & snapshot_file_info); /// Thread upload snapshots to S3 in the background void snapshotS3Thread(); diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 69c15db51da..49243541bc8 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -14,6 +14,8 @@ #include #include "Coordination/KeeperStorage.h" +#include + namespace ProfileEvents { @@ -66,6 +68,16 @@ KeeperStateMachine::KeeperStateMachine( { } +namespace +{ + +bool isLocalDisk(const IDisk & disk) +{ + return dynamic_cast(&disk) != nullptr; +} + +} + void KeeperStateMachine::init() { /// Do everything without mutexes, no other threads exist. @@ -80,9 +92,13 @@ void KeeperStateMachine::init() try { - auto snapshot_deserialization_result - = snapshot_manager.deserializeSnapshotFromBuffer(snapshot_manager.deserializeSnapshotBufferFromDisk(latest_log_index)); - latest_snapshot_path = snapshot_manager.getLatestSnapshotPath(); + latest_snapshot_buf = snapshot_manager.deserializeSnapshotBufferFromDisk(latest_log_index); + auto snapshot_deserialization_result = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_buf); + latest_snapshot_info = snapshot_manager.getLatestSnapshotInfo(); + + if (isLocalDisk(*latest_snapshot_info.disk)) + latest_snapshot_buf = nullptr; + storage = std::move(snapshot_deserialization_result.storage); latest_snapshot_meta = snapshot_deserialization_result.snapshot_meta; cluster_config = snapshot_deserialization_result.cluster_config; @@ -306,8 +322,14 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) { /// deserialize and apply snapshot to storage std::lock_guard lock(storage_and_responses_lock); - auto snapshot_deserialization_result - = snapshot_manager.deserializeSnapshotFromBuffer(snapshot_manager.deserializeSnapshotBufferFromDisk(s.get_last_log_idx())); + + SnapshotDeserializationResult snapshot_deserialization_result; + if (latest_snapshot_ptr) + snapshot_deserialization_result = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_ptr); + else + snapshot_deserialization_result + = snapshot_manager.deserializeSnapshotFromBuffer(snapshot_manager.deserializeSnapshotBufferFromDisk(s.get_last_log_idx())); + storage = std::move(snapshot_deserialization_result.storage); latest_snapshot_meta = snapshot_deserialization_result.snapshot_meta; cluster_config = snapshot_deserialization_result.cluster_config; @@ -387,19 +409,22 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res } else { - auto [path, error_code] = snapshot_manager.serializeSnapshotToDisk(*snapshot); - if (error_code) + /// we rely on the fact that the snapshot disk cannot be changed during runtime + if (isLocalDisk(*keeper_context->getSnapshotDisk())) { - throw Exception( - ErrorCodes::SYSTEM_ERROR, - "Snapshot {} was created failed, error: {}", - snapshot->snapshot_meta->get_last_log_idx(), - error_code.message()); + latest_snapshot_info = snapshot_manager.serializeSnapshotToDisk(*snapshot); + latest_snapshot_meta = snapshot->snapshot_meta; + latest_snapshot_buf = nullptr; } - latest_snapshot_path = path; - latest_snapshot_meta = snapshot->snapshot_meta; + else + { + auto snapshot_buf = snapshot_manager.serializeSnapshotToBuffer(*snapshot); + latest_snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk(*snapshot_buf, snapshot->snapshot_meta->get_last_log_idx()); + latest_snapshot_buf = std::move(snapshot_buf); + } + ProfileEvents::increment(ProfileEvents::KeeperSnapshotCreations); - LOG_DEBUG(log, "Created persistent snapshot {} with path {}", latest_snapshot_meta->get_last_log_idx(), path); + LOG_DEBUG(log, "Created persistent snapshot {} with path {}", latest_snapshot_meta->get_last_log_idx(), latest_snapshot_info.path); } } @@ -423,19 +448,19 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res when_done(ret, exception); - return ret ? latest_snapshot_path : ""; + return ret ? latest_snapshot_info : SnapshotFileInfo{}; }; if (keeper_context->getServerState() == KeeperContext::Phase::SHUTDOWN) { LOG_INFO(log, "Creating a snapshot during shutdown because 'create_snapshot_on_exit' is enabled."); - auto snapshot_path = snapshot_task.create_snapshot(std::move(snapshot_task.snapshot)); + auto snapshot_file_info = snapshot_task.create_snapshot(std::move(snapshot_task.snapshot)); - if (!snapshot_path.empty() && snapshot_manager_s3) + if (!snapshot_file_info.path.empty() && snapshot_manager_s3) { - LOG_INFO(log, "Uploading snapshot {} during shutdown because 'upload_snapshot_on_exit' is enabled.", snapshot_path); - snapshot_manager_s3->uploadSnapshot(snapshot_path, /* asnyc_upload */ false); + LOG_INFO(log, "Uploading snapshot {} during shutdown because 'upload_snapshot_on_exit' is enabled.", snapshot_file_info.path); + snapshot_manager_s3->uploadSnapshot(snapshot_file_info, /* asnyc_upload */ false); } return; @@ -456,14 +481,20 @@ void KeeperStateMachine::save_logical_snp_obj( nuraft::ptr snp_buf = s.serialize(); nuraft::ptr cloned_meta = nuraft::snapshot::deserialize(*snp_buf); + nuraft::ptr cloned_buffer; + + /// we rely on the fact that the snapshot disk cannot be changed during runtime + if (!isLocalDisk(*keeper_context->getSnapshotDisk())) + cloned_buffer = nuraft::buffer::clone(data); + try { std::lock_guard lock(snapshots_lock); /// Serialize snapshot to disk - auto result_path = snapshot_manager.serializeSnapshotBufferToDisk(data, s.get_last_log_idx()); - latest_snapshot_path = result_path; + latest_snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk(data, s.get_last_log_idx()); latest_snapshot_meta = cloned_meta; - LOG_DEBUG(log, "Saved snapshot {} to path {}", s.get_last_log_idx(), result_path); + latest_snapshot_buf = std::move(cloned_buffer); + LOG_DEBUG(log, "Saved snapshot {} to path {}", s.get_last_log_idx(), latest_snapshot_info.path); obj_id++; ProfileEvents::increment(ProfileEvents::KeeperSaveSnapshot); } @@ -523,11 +554,23 @@ int KeeperStateMachine::read_logical_snp_obj( latest_snapshot_meta->get_last_log_idx()); return -1; } - if (bufferFromFile(log, latest_snapshot_path, data_out)) + + const auto & [path, disk] = latest_snapshot_info; + if (isLocalDisk(*disk)) { - LOG_WARNING(log, "Error reading snapshot {} from {}", s.get_last_log_idx(), latest_snapshot_path); - return -1; + auto full_path = fs::path(disk->getPath()) / path; + if (bufferFromFile(log, full_path, data_out)) + { + LOG_WARNING(log, "Error reading snapshot {} from {}", s.get_last_log_idx(), full_path); + return -1; + } } + else + { + chassert(latest_snapshot_buf); + data_out = nuraft::buffer::clone(*latest_snapshot_buf); + } + is_last_obj = true; ProfileEvents::increment(ProfileEvents::KeeperReadSnapshot); diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index afe11150f36..834837314df 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -110,7 +110,7 @@ private: /// In our state machine we always have a single snapshot which is stored /// in memory in compressed (serialized) format. SnapshotMetadataPtr latest_snapshot_meta = nullptr; - std::string latest_snapshot_path; + SnapshotFileInfo latest_snapshot_info; nuraft::ptr latest_snapshot_buf = nullptr; CoordinationSettingsPtr coordination_settings; diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index 2098daea5fe..aab8a2ccb68 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -252,10 +252,12 @@ def test_cmd_conf(started_cluster): assert result["four_letter_word_allow_list"] == "*" assert result["log_storage_path"] == "/var/lib/clickhouse/coordination/log" + assert result["log_storage_disk"] == "LocalLogDisk" assert ( result["snapshot_storage_path"] == "/var/lib/clickhouse/coordination/snapshots" ) + assert result["snapshot_storage_disk"] == "LocalSnapshotDisk" assert result["session_timeout_ms"] == "30000" assert result["min_session_timeout_ms"] == "10000" From 4483602c50918e69d5a4d79c1628412b2c667c0d Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 25 May 2023 08:52:14 +0000 Subject: [PATCH 0185/1997] Remove double initialization --- src/Coordination/KeeperServer.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 989455a5a79..363e7f9bef1 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -121,8 +121,6 @@ KeeperServer::KeeperServer( if (coordination_settings->quorum_reads) LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower."); - keeper_context->initialize(config); - state_machine = nuraft::cs_new( responses_queue_, snapshots_queue_, From dd78008c9ec586a213e0e541b70dfe5055f7df0e Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 25 May 2023 09:36:41 +0000 Subject: [PATCH 0186/1997] Rename encryptConfig() into decryptConfig() --- src/Common/Config/ConfigProcessor.cpp | 12 ++++++------ src/Common/Config/ConfigProcessor.h | 6 +++--- src/Common/Config/ConfigReloader.cpp | 2 +- src/Daemon/BaseDaemon.cpp | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 76e4ea1ebd1..3f9535205d8 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -171,7 +171,7 @@ static void mergeAttributes(Element & config_element, Element & with_element) with_element_attributes->release(); } -void ConfigProcessor::encryptRecursive(Poco::XML::Node * config_root) +void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) { for (Node * node = config_root->firstChild(); node;) { @@ -179,7 +179,7 @@ void ConfigProcessor::encryptRecursive(Poco::XML::Node * config_root) { // NamedNodeMapPtr attributes = node->attributes(); Element & element = dynamic_cast(*node); - if (element.hasAttribute("enc_codec")) + if (element.hasAttribute("encryption_codec")) { LOG_DEBUG(log, "Encrypted node {} value '{}'.", node->nodeName(), element.getNodeValue()); // for (Node * child_node = node->firstChild(); child_node;) @@ -188,11 +188,11 @@ void ConfigProcessor::encryptRecursive(Poco::XML::Node * config_root) // child_node = child_node->nextSibling(); // } Node * child_node = node->firstChild(); - child_node->setNodeValue("encrypted_" + child_node->getNodeValue() + "_encrypted"); + child_node->setNodeValue("decrypted_" + child_node->getNodeValue() + "_decrypted"); } } - encryptRecursive(node); + decryptRecursive(node); node = node->nextSibling(); } @@ -727,10 +727,10 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes( return LoadedConfig{configuration, has_zk_includes, !processed_successfully, config_xml, path}; } -void ConfigProcessor::encryptConfig(LoadedConfig & loaded_config) +void ConfigProcessor::decryptConfig(LoadedConfig & loaded_config) { Node * config_root = getRootNode(loaded_config.preprocessed_xml.get()); - encryptRecursive(config_root); + decryptRecursive(config_root); loaded_config.configuration = new Poco::Util::XMLConfiguration(loaded_config.preprocessed_xml); } diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index 2f0046bc39c..bc2f923f705 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -92,8 +92,8 @@ public: const zkutil::EventPtr & zk_changed_event, bool fallback_to_preprocessed = false); - /// Encrypt nodes in config with specified encryption attributes - void encryptConfig(LoadedConfig & loaded_config); + /// Decrypt nodes in config with specified encryption attributes + void decryptConfig(LoadedConfig & loaded_config); /// Save preprocessed config to specified directory. /// If preprocessed_dir is empty - calculate from loaded_config.path + /preprocessed_configs/ @@ -127,7 +127,7 @@ private: using NodePtr = Poco::AutoPtr; - void encryptRecursive(Poco::XML::Node * config_root); + void decryptRecursive(Poco::XML::Node * config_root); void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root); diff --git a/src/Common/Config/ConfigReloader.cpp b/src/Common/Config/ConfigReloader.cpp index 896bd5949d9..a4d2cb3d305 100644 --- a/src/Common/Config/ConfigReloader.cpp +++ b/src/Common/Config/ConfigReloader.cpp @@ -130,7 +130,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac return; } config_processor.savePreprocessedConfig(loaded_config, preprocessed_dir); - config_processor.encryptConfig(loaded_config); + config_processor.decryptConfig(loaded_config); /** We should remember last modification time if and only if config was successfully loaded * Otherwise a race condition could occur during config files update: diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 2634439ee14..4b1cd4e036e 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -663,7 +663,7 @@ void BaseDaemon::initialize(Application & self) umask(umask_num); DB::ConfigProcessor(config_path).savePreprocessedConfig(loaded_config, ""); - DB::ConfigProcessor(config_path).encryptConfig(loaded_config); + DB::ConfigProcessor(config_path).decryptConfig(loaded_config); /// Write core dump on crash. { From f519aa4613a7f82b30bb39686412343d5a5a939d Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 25 May 2023 09:10:45 +0000 Subject: [PATCH 0187/1997] Calculate size --- .../KeeperAsynchronousMetrics.cpp | 12 +++---- src/Coordination/KeeperDispatcher.cpp | 32 +++++++++---------- src/Coordination/KeeperSnapshotManagerS3.h | 5 +-- src/Coordination/KeeperStateMachine.cpp | 5 --- 4 files changed, 25 insertions(+), 29 deletions(-) diff --git a/src/Coordination/KeeperAsynchronousMetrics.cpp b/src/Coordination/KeeperAsynchronousMetrics.cpp index 1427130b184..890079e98f7 100644 --- a/src/Coordination/KeeperAsynchronousMetrics.cpp +++ b/src/Coordination/KeeperAsynchronousMetrics.cpp @@ -28,8 +28,8 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM size_t zxid = 0; size_t session_with_watches = 0; size_t paths_watched = 0; - size_t snapshot_dir_size = 0; - size_t log_dir_size = 0; + //size_t snapshot_dir_size = 0; + //size_t log_dir_size = 0; if (keeper_dispatcher.isServerActive()) { @@ -49,8 +49,8 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM latest_snapshot_size = state_machine.getLatestSnapshotBufSize(); session_with_watches = state_machine.getSessionsWithWatchesCount(); paths_watched = state_machine.getWatchedPathsCount(); - snapshot_dir_size = keeper_dispatcher.getSnapDirSize(); - log_dir_size = keeper_dispatcher.getLogDirSize(); + //snapshot_dir_size = keeper_dispatcher.getSnapDirSize(); + //log_dir_size = keeper_dispatcher.getLogDirSize(); # if defined(__linux__) || defined(__APPLE__) open_file_descriptor_count = getCurrentProcessFDCount(); @@ -85,8 +85,8 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM new_values["KeeperZxid"] = { zxid, "The current transaction id number (zxid) in ClickHouse Keeper." }; new_values["KeeperSessionWithWatches"] = { session_with_watches, "The number of client sessions of ClickHouse Keeper having watches." }; new_values["KeeperPathsWatched"] = { paths_watched, "The number of different paths watched by the clients of ClickHouse Keeper." }; - new_values["KeeperSnapshotDirSize"] = { snapshot_dir_size, "The size of the snapshots directory of ClickHouse Keeper, in bytes." }; - new_values["KeeperLogDirSize"] = { log_dir_size, "The size of the logs directory of ClickHouse Keeper, in bytes." }; + //new_values["KeeperSnapshotDirSize"] = { snapshot_dir_size, "The size of the snapshots directory of ClickHouse Keeper, in bytes." }; + //new_values["KeeperLogDirSize"] = { log_dir_size, "The size of the logs directory of ClickHouse Keeper, in bytes." }; auto keeper_log_info = keeper_dispatcher.getKeeperLogInfo(); diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 4a460777621..6d1239c9210 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -777,37 +777,37 @@ void KeeperDispatcher::updateKeeperStatLatency(uint64_t process_time_ms) keeper_stats.updateLatency(process_time_ms); } -static uint64_t getDirSize(const fs::path & dir) +static uint64_t getTotalSize(const DiskPtr & disk, const std::string & path = "") { checkStackSize(); - if (!fs::exists(dir)) - return 0; - fs::directory_iterator it(dir); - fs::directory_iterator end; - - uint64_t size{0}; - while (it != end) + uint64_t size = 0; + for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) { - if (it->is_regular_file()) - size += fs::file_size(*it); + if (disk->isFile(it->path())) + size += disk->getFileSize(it->path()); else - size += getDirSize(it->path()); - ++it; + size += getTotalSize(disk, it->path()); } + return size; } uint64_t KeeperDispatcher::getLogDirSize() const { - //return getDirSize(configuration_and_settings->log_storage_path); - return 0; + auto log_disk = keeper_context->getLogDisk(); + auto size = getTotalSize(log_disk); + + auto current_log_disk = keeper_context->getCurrentLogDisk(); + if (log_disk != current_log_disk) + size += getTotalSize(current_log_disk); + + return size; } uint64_t KeeperDispatcher::getSnapDirSize() const { - //return getDirSize(configuration_and_settings->snapshot_storage_path); - return 0; + return getTotalSize(keeper_context->getSnapshotDisk()); } Keeper4LWInfo KeeperDispatcher::getKeeper4LWInfo() const diff --git a/src/Coordination/KeeperSnapshotManagerS3.h b/src/Coordination/KeeperSnapshotManagerS3.h index 908deb76851..e17cf5a1cfb 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.h +++ b/src/Coordination/KeeperSnapshotManagerS3.h @@ -6,11 +6,12 @@ #include #include +#include + #if USE_AWS_S3 #include #include -#include #include #endif @@ -62,7 +63,7 @@ public: KeeperSnapshotManagerS3() = default; void updateS3Configuration(const Poco::Util::AbstractConfiguration &, const MultiVersion::Version &) {} - void uploadSnapshot(const std::string &, [[maybe_unused]] bool async_upload = true) {} + void uploadSnapshot(const SnapshotFileInfo &, [[maybe_unused]] bool async_upload = true) {} void startup(const Poco::Util::AbstractConfiguration &, const MultiVersion::Version &) {} diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 49243541bc8..5cfc9333a66 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -35,11 +35,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int SYSTEM_ERROR; -} - -namespace -{ } KeeperStateMachine::KeeperStateMachine( From bb77441acb488bb2a32f691ce724b2950ec0d9ba Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 25 May 2023 13:31:11 +0000 Subject: [PATCH 0188/1997] Add support for old storage disks --- src/Coordination/KeeperContext.cpp | 30 ++++-- src/Coordination/KeeperContext.h | 2 + src/Coordination/KeeperSnapshotManager.cpp | 118 +++++++++++++-------- src/Coordination/KeeperSnapshotManager.h | 5 +- src/Coordination/KeeperStateMachine.cpp | 2 +- 5 files changed, 103 insertions(+), 54 deletions(-) diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index da49868f706..1d6f1be9bfb 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -8,7 +8,6 @@ namespace DB { - KeeperContext::KeeperContext(bool standalone_keeper_) : disk_selector(std::make_shared()) , standalone_keeper(standalone_keeper_) @@ -28,13 +27,19 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config) else current_log_storage = log_storage; - Poco::Util::AbstractConfiguration::Keys old_log_disk_name_keys; - config.keys("keeper_server", old_log_disk_name_keys); - for (const auto & key : old_log_disk_name_keys) + const auto collect_old_disk_names = [&](const std::string_view key_prefix, std::vector & disk_names) { - if (key.starts_with("old_log_storage_disk")) - old_log_disk_names.push_back(config.getString("keeper_server." + key)); - } + Poco::Util::AbstractConfiguration::Keys disk_name_keys; + config.keys("keeper_server", disk_name_keys); + for (const auto & key : disk_name_keys) + { + if (key.starts_with(key_prefix)) + disk_names.push_back(config.getString(fmt::format("keeper_server.{}", key_prefix))); + } + }; + + collect_old_disk_names("old_log_storage_disk", old_log_disk_names); + collect_old_disk_names("old_snapshot_storage_disk", old_snapshot_disk_names); snapshot_storage = getSnapshotsPathFromConfig(config); @@ -107,6 +112,17 @@ DiskPtr KeeperContext::getSnapshotDisk() const return getDisk(snapshot_storage); } +std::vector KeeperContext::getOldSnapshotDisks() const +{ + std::vector old_snapshot_disks; + old_snapshot_disks.reserve(old_snapshot_disk_names.size()); + + for (const auto & disk_name : old_snapshot_disk_names) + old_snapshot_disks.push_back(disk_selector->get(disk_name)); + + return old_snapshot_disks; +} + void KeeperContext::setSnapshotDisk(DiskPtr disk) { snapshot_storage = std::move(disk); diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index 6b7af3a60db..e41d8e35032 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -40,6 +40,7 @@ public: void setLogDisk(DiskPtr disk); DiskPtr getSnapshotDisk() const; + std::vector getOldSnapshotDisks() const; void setSnapshotDisk(DiskPtr disk); DiskPtr getStateFileDisk() const; @@ -69,6 +70,7 @@ private: Storage state_file_storage; std::vector old_log_disk_names; + std::vector old_snapshot_disk_names; bool standalone_keeper; }; diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index 3bfe700bcd5..77abbfb2054 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -519,49 +519,79 @@ KeeperSnapshotManager::KeeperSnapshotManager( , storage_tick_time(storage_tick_time_) , keeper_context(keeper_context_) { + const auto load_snapshot_from_disk = [&](DiskPtr disk) + { + std::unordered_set invalid_snapshots; + /// collect invalid snapshots + for (auto it = disk->iterateDirectory(""); it->isValid(); it->next()) + { + const auto & name = it->name(); + if (name.empty()) + continue; + + if (startsWith(name, "tmp_")) + { + disk->removeFile(it->path()); + invalid_snapshots.insert(name.substr(4)); + continue; + } + } + + /// process snapshots + for (auto it = disk->iterateDirectory(""); it->isValid(); it->next()) + { + const auto & name = it->name(); + if (name.empty()) + continue; + + /// Not snapshot file + if (!startsWith(name, "snapshot_")) + continue; + + if (invalid_snapshots.contains(name)) + { + disk->removeFile(it->path()); + continue; + } + + size_t snapshot_up_to = getSnapshotPathUpToLogIdx(name); + auto [_, inserted] = existing_snapshots.insert_or_assign(snapshot_up_to, SnapshotFileInfo{it->path(), disk}); + + if (!inserted) + LOG_WARNING( + &Poco::Logger::get("KeeperSnapshotManager"), + "Found another snapshots with last log idx {}, will use snapshot from disk {}", + snapshot_up_to, + disk->getName()); + } + }; + + for (const auto & disk : keeper_context->getOldSnapshotDisks()) + load_snapshot_from_disk(disk); + auto disk = getDisk(); - - std::unordered_set invalid_snapshots; - /// collect invalid snapshots - for (auto it = disk->iterateDirectory(""); it->isValid(); it->next()) - { - const auto & name = it->name(); - if (name.empty()) - continue; - - if (startsWith(name, "tmp_")) - { - disk->removeFile(it->path()); - invalid_snapshots.insert(name.substr(4)); - continue; - } - - } - - /// process snapshots - for (auto it = disk->iterateDirectory(""); it->isValid(); it->next()) - { - const auto & name = it->name(); - if (name.empty()) - continue; - - /// Not snapshot file - if (!startsWith(name, "snapshot_")) - continue; - - if (invalid_snapshots.contains(name)) - { - disk->removeFile(it->path()); - continue; - } - - size_t snapshot_up_to = getSnapshotPathUpToLogIdx(name); - existing_snapshots[snapshot_up_to] = it->path(); - } + load_snapshot_from_disk(disk); removeOutdatedSnapshotsIfNeeded(); -} + /// move snapshots from old disks to new one + for (auto & [_, file_info] : existing_snapshots) + { + if (file_info.disk == disk) + continue; + + auto file_path = fs::path(file_info.path); + auto tmp_snapshot_path = file_path.parent_path() / ("tmp_" + file_path.filename().generic_string()); + + { + disk->writeFile(tmp_snapshot_path); + } + + file_info.disk->copyFile(file_info.path, *disk, file_info.path, {}); + disk->removeFile(tmp_snapshot_path); + file_info.disk = disk; + } +} SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx) { @@ -599,7 +629,8 @@ nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBuff } catch (const DB::Exception &) { - getDisk()->removeFile(latest_itr->second); + const auto & [path, disk] = latest_itr->second; + disk->removeFile(path); existing_snapshots.erase(latest_itr->first); tryLogCurrentException(__PRETTY_FUNCTION__); } @@ -610,9 +641,9 @@ nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBuff nuraft::ptr KeeperSnapshotManager::deserializeSnapshotBufferFromDisk(uint64_t up_to_log_idx) const { - const std::string & snapshot_path = existing_snapshots.at(up_to_log_idx); + const auto & [snapshot_path, snapshot_disk] = existing_snapshots.at(up_to_log_idx); WriteBufferFromNuraftBuffer writer; - auto reader = getDisk()->readFile(snapshot_path); + auto reader = snapshot_disk->readFile(snapshot_path); copyData(*reader, writer); return writer.getBuffer(); } @@ -690,7 +721,8 @@ void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx) auto itr = existing_snapshots.find(log_idx); if (itr == existing_snapshots.end()) throw Exception(ErrorCodes::UNKNOWN_SNAPSHOT, "Unknown snapshot with log index {}", log_idx); - getDisk()->removeFile(itr->second); + const auto & [path, disk] = itr->second; + disk->removeFile(path); existing_snapshots.erase(itr); } diff --git a/src/Coordination/KeeperSnapshotManager.h b/src/Coordination/KeeperSnapshotManager.h index 036c0cab62b..0afe582ef59 100644 --- a/src/Coordination/KeeperSnapshotManager.h +++ b/src/Coordination/KeeperSnapshotManager.h @@ -149,11 +149,10 @@ public: { if (!existing_snapshots.empty()) { - const auto & path = existing_snapshots.at(getLatestSnapshotIndex()); + const auto & [path, disk] = existing_snapshots.at(getLatestSnapshotIndex()); try { - auto disk = getDisk(); if (disk->exists(path)) return {path, disk}; } @@ -176,7 +175,7 @@ private: /// How many snapshots to keep before remove const size_t snapshots_to_keep; /// All existing snapshots in our path (log_index -> path) - std::map existing_snapshots; + std::map existing_snapshots; /// Compress snapshots in common ZSTD format instead of custom ClickHouse block LZ4 format const bool compress_snapshots_zstd; /// Superdigest for deserialization of storage diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 5cfc9333a66..9a1cb1941b7 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -404,11 +404,11 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res } else { + latest_snapshot_meta = snapshot->snapshot_meta; /// we rely on the fact that the snapshot disk cannot be changed during runtime if (isLocalDisk(*keeper_context->getSnapshotDisk())) { latest_snapshot_info = snapshot_manager.serializeSnapshotToDisk(*snapshot); - latest_snapshot_meta = snapshot->snapshot_meta; latest_snapshot_buf = nullptr; } else From 30ff5113d98141ab4adb05ad08b7ffb48888c33d Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 25 May 2023 13:33:52 +0000 Subject: [PATCH 0189/1997] Fixing more tests. --- src/Functions/in.cpp | 8 +- src/Interpreters/ActionsVisitor.cpp | 2 +- src/Interpreters/PreparedSets.cpp | 70 +++++++++++------ src/Interpreters/PreparedSets.h | 17 +++-- src/Interpreters/Set.cpp | 76 +++++++++++-------- src/Interpreters/Set.h | 18 ++++- src/Planner/CollectSets.cpp | 2 +- .../CreateSetAndFilterOnTheFlyTransform.cpp | 2 +- src/Storages/KVStorageUtils.cpp | 4 +- src/Storages/MergeTree/KeyCondition.cpp | 8 +- .../MergeTreeIndexConditionBloomFilter.cpp | 2 +- .../MergeTree/MergeTreeIndexFullText.cpp | 2 +- .../MergeTree/MergeTreeIndexInverted.cpp | 2 +- .../System/StorageSystemZooKeeper.cpp | 4 +- 14 files changed, 136 insertions(+), 81 deletions(-) diff --git a/src/Functions/in.cpp b/src/Functions/in.cpp index eb623951bf3..6a88a413c63 100644 --- a/src/Functions/in.cpp +++ b/src/Functions/in.cpp @@ -127,12 +127,12 @@ public: } auto future_set = column_set->getData(); - if (!future_set || !future_set->isFilled()) + if (!future_set || !future_set->isReady()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Not-ready Set passed as the second argument for function '{}'", getName()); - if (auto * for_tuple = typeid_cast(future_set.get())) - if (!for_tuple->isReady()) - for_tuple->buildForTuple(size_limits, transform_null_in); + // if (auto * for_tuple = typeid_cast(future_set.get())) + // if (!for_tuple->isReady()) + // for_tuple->buildForTuple(size_limits, transform_null_in); auto set = future_set->get(); auto set_types = set->getDataTypes(); diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 1405568aa71..da8666466fc 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -458,7 +458,7 @@ FutureSetPtr makeExplicitSet( else block = createBlockForSet(left_arg_type, right_arg, set_element_types, context); - return prepared_sets.addFromTuple(set_key, block); + return prepared_sets.addFromTuple(set_key, block, context->getSettings()); } class ScopeStack::Index diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index 3b63d942404..eebc59e85d5 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -14,6 +14,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int SET_SIZE_LIMIT_EXCEEDED; } PreparedSetKey PreparedSetKey::forLiteral(Hash hash, DataTypes types_) @@ -120,9 +121,9 @@ FutureSetPtr PreparedSets::addFromStorage(const PreparedSetKey & key, SetPtr set return it->second; } -FutureSetPtr PreparedSets::addFromTuple(const PreparedSetKey & key, Block block) +FutureSetPtr PreparedSets::addFromTuple(const PreparedSetKey & key, Block block, const Settings & settings) { - auto from_tuple = std::make_shared(std::move(block)); + auto from_tuple = std::make_shared(std::move(block), settings); auto [it, inserted] = sets.emplace(key, std::move(from_tuple)); if (!inserted) @@ -292,7 +293,25 @@ SizeLimits FutureSet::getSizeLimitsForSet(const Settings & settings, bool ordere return ordered_set ? getSizeLimitsForOrderedSet(settings) : getSizeLimitsForUnorderedSet(settings); } -FutureSetFromTuple::FutureSetFromTuple(Block block_) : block(std::move(block_)) {} +FutureSetFromTuple::FutureSetFromTuple(Block block, const Settings & settings) +{ + bool create_ordered_set = false; + auto size_limits = getSizeLimitsForSet(settings, create_ordered_set); + set = std::make_shared(size_limits, create_ordered_set, settings.transform_null_in); + set->setHeader(block.cloneEmpty().getColumnsWithTypeAndName()); + + Columns columns; + columns.reserve(block.columns()); + for (const auto & column : block) + columns.emplace_back(column.column); + + set_key_columns.filter = ColumnUInt8::create(block.rows()); + + set->initSetElements(); + set->insertFromColumns(columns, set_key_columns); + set->finishInsert(); + //block(std::move(block_)) +} FutureSetFromSubquery::FutureSetFromSubquery(SubqueryForSet subquery_) : subquery(std::move(subquery_)) {} @@ -301,35 +320,40 @@ FutureSetFromStorage::FutureSetFromStorage(SetPtr set_) : set(std::move(set_)) { SetPtr FutureSetFromTuple::buildOrderedSetInplace(const ContextPtr & context) { const auto & settings = context->getSettingsRef(); - auto size_limits = getSizeLimitsForSet(settings, true); - fill(size_limits, settings.transform_null_in, true); + auto limits = getSizeLimitsForSet(settings, true); + + if (!limits.check(set->getTotalRowCount(), set->getTotalByteCount(), "IN-set", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED)) + return nullptr; + + set->initSetElements(); + set->appendSetElements(set_key_columns); return set; } -std::unique_ptr FutureSetFromTuple::build(const ContextPtr & context) +std::unique_ptr FutureSetFromTuple::build(const ContextPtr &) { - const auto & settings = context->getSettingsRef(); - auto size_limits = getSizeLimitsForSet(settings, false); - fill(size_limits, settings.transform_null_in, false); + // const auto & settings = context->getSettingsRef(); + // auto size_limits = getSizeLimitsForSet(settings, false); + // fill(size_limits, settings.transform_null_in, false); return nullptr; } -void FutureSetFromTuple::buildForTuple(SizeLimits size_limits, bool transform_null_in) -{ - fill(size_limits, transform_null_in, false); -} +// void FutureSetFromTuple::buildForTuple(SizeLimits size_limits, bool transform_null_in) +// { +// fill(size_limits, transform_null_in, false); +// } -void FutureSetFromTuple::fill(SizeLimits size_limits, bool transform_null_in, bool create_ordered_set) -{ - //std::cerr << StackTrace().toString() << std::endl; +// void FutureSetFromTuple::fill(SizeLimits size_limits, bool transform_null_in, bool create_ordered_set) +// { +// //std::cerr << StackTrace().toString() << std::endl; - if (set) - return; +// if (set) +// return; - set = std::make_shared(size_limits, create_ordered_set, transform_null_in); - set->setHeader(block.cloneEmpty().getColumnsWithTypeAndName()); - set->insertFromBlock(block.getColumnsWithTypeAndName()); - set->finishInsert(); -} +// set = std::make_shared(size_limits, create_ordered_set, transform_null_in); +// set->setHeader(block.cloneEmpty().getColumnsWithTypeAndName()); +// set->insertFromBlock(block.getColumnsWithTypeAndName()); +// set->finishInsert(); +// } }; diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index ef7aba38f24..35bbc1e12fb 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -80,24 +80,23 @@ using FutureSetPtr = std::shared_ptr; class FutureSetFromTuple final : public FutureSet { public: - FutureSetFromTuple(Block block_); + FutureSetFromTuple(Block block, const Settings & settings); - bool isReady() const override { return set != nullptr; } + bool isReady() const override { return true; } bool isFilled() const override { return true; } SetPtr get() const override { return set; } SetPtr buildOrderedSetInplace(const ContextPtr & context) override; - std::unique_ptr build(const ContextPtr & context) override; + std::unique_ptr build(const ContextPtr &) override; - void buildForTuple(SizeLimits size_limits, bool transform_null_in); +/// void buildForTuple(SizeLimits size_limits, bool transform_null_in); private: - Block block; - SetPtr set; + Set::SetKeyColumns set_key_columns; - void fill(SizeLimits size_limits, bool transform_null_in, bool create_ordered_set); + //void fill(SizeLimits size_limits, bool transform_null_in, bool create_ordered_set); }; /// Information on how to build set for the [GLOBAL] IN section. @@ -145,6 +144,8 @@ public: return nullptr; auto plan = buildPlan(context, true); + if (!plan) + return nullptr; auto builder = plan->buildQueryPipeline(QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder)); @@ -249,7 +250,7 @@ public: // SizeLimits set_size_limit, bool transform_null_in); FutureSetPtr addFromStorage(const PreparedSetKey & key, SetPtr set_); - FutureSetPtr addFromTuple(const PreparedSetKey & key, Block block); + FutureSetPtr addFromTuple(const PreparedSetKey & key, Block block, const Settings & settings); FutureSetPtr addFromSubquery(const PreparedSetKey & key, SubqueryForSet subquery); void addStorageToSubquery(const String & subquery_id, StoragePtr external_storage); diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index a7bea63bd99..5adf3d07353 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -156,25 +156,46 @@ void Set::setHeader(const ColumnsWithTypeAndName & header) { /// Create empty columns with set values in advance. /// It is needed because set may be empty, so method 'insertFromBlock' will be never called. - set_elements.reserve(keys_size); - for (const auto & type : set_elements_types) - set_elements.emplace_back(type->createColumn()); + initSetElements(); } /// Choose data structure to use for the set. data.init(data.chooseMethod(key_columns, key_sizes)); } +void Set::initSetElements() +{ + set_elements.reserve(keys_size); + for (const auto & type : set_elements_types) + set_elements.emplace_back(type->createColumn()); +} + bool Set::insertFromBlock(const ColumnsWithTypeAndName & columns) { Columns cols; cols.reserve(columns.size()); for (const auto & column : columns) cols.emplace_back(column.column); - return insertFromBlock(cols); + return insertFromColumns(cols); } -bool Set::insertFromBlock(const Columns & columns) +bool Set::insertFromColumns(const Columns & columns) +{ + size_t rows = columns.at(0)->size(); + + SetKeyColumns holder; + /// Filter to extract distinct values from the block. + if (fill_set_elements) + holder.filter = ColumnUInt8::create(rows); + + bool inserted = insertFromColumns(columns, holder); + if (inserted && fill_set_elements) + appendSetElements(holder); + + return inserted; +} + +bool Set::insertFromColumns(const Columns & columns, SetKeyColumns & holder) { std::lock_guard lock(rwlock); @@ -183,15 +204,13 @@ bool Set::insertFromBlock(const Columns & columns) ColumnRawPtrs key_columns; key_columns.reserve(keys_size); - - /// The constant columns to the right of IN are not supported directly. For this, they first materialize. - Columns materialized_columns; + holder.key_columns.reserve(keys_size); /// Remember the columns we will work with for (size_t i = 0; i < keys_size; ++i) { - materialized_columns.emplace_back(columns.at(i)->convertToFullIfNeeded()); - key_columns.emplace_back(materialized_columns.back().get()); + holder.key_columns.emplace_back(columns.at(i)->convertToFullIfNeeded()); + key_columns.emplace_back(holder.key_columns.back().get()); } size_t rows = columns.at(0)->size(); @@ -202,40 +221,37 @@ bool Set::insertFromBlock(const Columns & columns) if (!transform_null_in) null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); - /// Filter to extract distinct values from the block. - ColumnUInt8::MutablePtr filter; - if (fill_set_elements) - filter = ColumnUInt8::create(rows); - switch (data.type) { case SetVariants::Type::EMPTY: break; #define M(NAME) \ case SetVariants::Type::NAME: \ - insertFromBlockImpl(*data.NAME, key_columns, rows, data, null_map, filter ? &filter->getData() : nullptr); \ + insertFromBlockImpl(*data.NAME, key_columns, rows, data, null_map, holder.filter ? &holder.filter->getData() : nullptr); \ break; APPLY_FOR_SET_VARIANTS(M) #undef M } - if (fill_set_elements) - { - for (size_t i = 0; i < keys_size; ++i) - { - auto filtered_column = key_columns[i]->filter(filter->getData(), rows); - if (set_elements[i]->empty()) - set_elements[i] = filtered_column; - else - set_elements[i]->insertRangeFrom(*filtered_column, 0, filtered_column->size()); - if (transform_null_in && null_map_holder) - set_elements[i]->insert(Null{}); - } - } - return limits.check(data.getTotalRowCount(), data.getTotalByteCount(), "IN-set", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); } +void Set::appendSetElements(SetKeyColumns & holder) +{ + //std::cerr << "========= " << keys_size << ' ' << holder.key_columns.size() << std::endl; + size_t rows = holder.key_columns.at(0)->size(); + for (size_t i = 0; i < keys_size; ++i) + { + auto filtered_column = holder.key_columns[i]->filter(holder.filter->getData(), rows); + if (set_elements[i]->empty()) + set_elements[i] = filtered_column; + else + set_elements[i]->insertRangeFrom(*filtered_column, 0, filtered_column->size()); + if (transform_null_in && holder.null_map_holder) + set_elements[i]->insert(Null{}); + } +} + void Set::checkIsCreated() const { if (!is_created.load()) diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index fff5fa4e1b1..cb47fde7f7d 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -45,9 +45,23 @@ public: void setHeader(const ColumnsWithTypeAndName & header); /// Returns false, if some limit was exceeded and no need to insert more data. - bool insertFromBlock(const Columns & columns); + bool insertFromColumns(const Columns & columns); bool insertFromBlock(const ColumnsWithTypeAndName & columns); + + struct SetKeyColumns + { + //ColumnRawPtrs key_columns; + /// The constant columns to the right of IN are not supported directly. For this, they first materialize. + Columns key_columns; + ColumnPtr null_map_holder; + ColumnUInt8::MutablePtr filter; + }; + + void initSetElements(); + bool insertFromColumns(const Columns & columns, SetKeyColumns & holder); + void appendSetElements(SetKeyColumns & holder); + /// Call after all blocks were inserted. To get the information that set is already created. void finishInsert() { is_created = true; } @@ -68,7 +82,7 @@ public: const DataTypes & getDataTypes() const { return data_types; } const DataTypes & getElementsTypes() const { return set_elements_types; } - bool hasExplicitSetElements() const { return fill_set_elements; } + bool hasExplicitSetElements() const { return fill_set_elements || (!set_elements.empty() && set_elements.front()->size() == data.getTotalRowCount()); } Columns getSetElements() const { checkIsCreated(); return { set_elements.begin(), set_elements.end() }; } void checkColumnsNumber(size_t num_key_columns) const; diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp index 6c970e0e91b..d66424f89ec 100644 --- a/src/Planner/CollectSets.cpp +++ b/src/Planner/CollectSets.cpp @@ -82,7 +82,7 @@ public: auto set_key = PreparedSetKey::forLiteral(in_second_argument->getTreeHash(), set_element_types); - sets.addFromTuple(set_key, std::move(set)); + sets.addFromTuple(set_key, std::move(set), settings); //planner_context.registerSet(set_key, PlannerSet(FutureSet(std::move(set)))); } diff --git a/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.cpp b/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.cpp index 4278eb8e8b2..59c4b9a6a87 100644 --- a/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.cpp +++ b/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.cpp @@ -106,7 +106,7 @@ void CreatingSetsOnTheFlyTransform::transform(Chunk & chunk) if (chunk.getNumRows()) { Columns key_columns = getColumnsByIndices(chunk, key_column_indices); - bool limit_exceeded = !set->insertFromBlock(key_columns); + bool limit_exceeded = !set->insertFromColumns(key_columns); if (limit_exceeded) { auto prev_state = set->state.exchange(SetWithState::State::Suspended); diff --git a/src/Storages/KVStorageUtils.cpp b/src/Storages/KVStorageUtils.cpp index 281236e631e..f4a4b6e9af1 100644 --- a/src/Storages/KVStorageUtils.cpp +++ b/src/Storages/KVStorageUtils.cpp @@ -76,8 +76,8 @@ bool traverseASTFilter( if (!future_set) return false; - if (!future_set->isReady()) - future_set->buildOrderedSetInplace(context); + //if (!future_set->isReady()) + future_set->buildOrderedSetInplace(context); auto set = future_set->get(); if (!set) diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 4b053a37aa2..7fcc111ced9 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1212,11 +1212,11 @@ bool KeyCondition::tryPrepareSetIndex( // LOG_TRACE(&Poco::Logger::get("KK"), "Found set for {}", right_arg.getColumnName()); - if (!future_set->isReady()) - { + //if (!future_set->isReady()) + //{ // LOG_TRACE(&Poco::Logger::get("KK"), "Building set inplace for {}", right_arg.getColumnName()); - future_set->buildOrderedSetInplace(right_arg.getTreeContext().getQueryContext()); - } + future_set->buildOrderedSetInplace(right_arg.getTreeContext().getQueryContext()); + //} auto prepared_set = future_set->get(); if (!prepared_set) diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index 2bd9db12b93..3d0883a1241 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -316,7 +316,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseFunction(const RPNBuilderTreeNo //std::cerr << "==== Finding set for MergeTreeBF " << bool(future_set) << std::endl; - if (future_set && !future_set->isReady()) + if (future_set) // && !future_set->isReady()) { //std::cerr << "==== not ready, building " << std::endl; future_set->buildOrderedSetInplace(rhs_argument.getTreeContext().getQueryContext()); diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 5e1d23df3c7..d14ff4fa20b 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -625,7 +625,7 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter( return false; auto future_set = right_argument.tryGetPreparedSet(data_types); - if (future_set && !future_set->isReady()) + if (future_set) // && !future_set->isReady()) future_set->buildOrderedSetInplace(right_argument.getTreeContext().getQueryContext()); ConstSetPtr prepared_set; diff --git a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp index a64f81807ae..1d3b923056a 100644 --- a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp @@ -661,7 +661,7 @@ bool MergeTreeConditionInverted::tryPrepareSetGinFilter( //std::cerr << "==== Set for MergeTreeConditionInverted" << bool(future_set) << std::endl; - if (future_set && !future_set->isReady()) + if (future_set) // && !future_set->isReady()) future_set->buildOrderedSetInplace(rhs.getTreeContext().getQueryContext()); ConstSetPtr prepared_set; diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 0f45f6825f6..34b463eadee 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -317,8 +317,8 @@ static void extractPathImpl(const ActionsDAG::Node & node, Paths & res, ContextP if (!future_set) return; - if (!future_set->isReady()) - future_set->buildOrderedSetInplace(context); + //if (!future_set->isReady()) + future_set->buildOrderedSetInplace(context); auto set = future_set->get(); if (!set) From 5f73681b00fb1a13873c9a8e6b07c7f57c335668 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 25 May 2023 15:51:20 +0000 Subject: [PATCH 0190/1997] Make working note descryption --- src/Common/Config/ConfigProcessor.cpp | 44 +++++++++++++++++++++++++-- src/Common/Config/ConfigProcessor.h | 2 +- utils/config-processor/CMakeLists.txt | 3 +- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 3f9535205d8..fdfc6343876 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -26,6 +26,10 @@ #include #include #include +#include +#include +#include +#include #define PREPROCESSED_SUFFIX "-preprocessed" @@ -181,14 +185,47 @@ void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) Element & element = dynamic_cast(*node); if (element.hasAttribute("encryption_codec")) { - LOG_DEBUG(log, "Encrypted node {} value '{}'.", node->nodeName(), element.getNodeValue()); + LOG_DEBUG(log, "Encrypted node <{}>", node->nodeName()); // for (Node * child_node = node->firstChild(); child_node;) // { // LOG_DEBUG(log, " Child node {} value '{}'.", child_node->nodeName(), child_node->getNodeValue()); // child_node = child_node->nextSibling(); // } - Node * child_node = node->firstChild(); - child_node->setNodeValue("decrypted_" + child_node->getNodeValue() + "_decrypted"); + + Node * text_node = node->firstChild(); + auto codec_128 = DB::CompressionCodecEncrypted(DB::AES_128_GCM_SIV); + // DB::CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, ""); + + /* + DB::Memory<> memory1; + std::string password="abcd"; + memory1.resize(password.size() + codec_128.getAdditionalSizeAtTheEndOfBuffer() + codec_128.getHeaderSize()+100); + auto bytes_written = codec_128.compress(password.data(), static_cast(password.size()), memory1.data()); + // std::string encrypted_password = std::string(memory1.data(), memory1.size()); + std::string encrypted_password = std::string(memory1.data(), bytes_written); + std::string password_hex; + boost::algorithm::hex(encrypted_password.begin(), encrypted_password.end(), std::back_inserter(password_hex)); + LOG_DEBUG(log, "Encrypted password: '{}'.", password_hex); + */ + + DB::Memory<> memory; + std::string encrypted_value; + + try + { + boost::algorithm::unhex(text_node->getNodeValue(), std::back_inserter(encrypted_value)); + // boost::algorithm::unhex(password_hex, std::back_inserter(encrypted_value)); + } + catch (const std::exception &) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read encrypted text for {}, check for valid characters [0-9a-fA-F] and length", node->nodeName()); + } + + memory.resize(codec_128.readDecompressedBlockSize(encrypted_value.data()) + codec_128.getAdditionalSizeAtTheEndOfBuffer()); + codec_128.decompress(encrypted_value.data(), static_cast(encrypted_value.size()), memory.data()); + std::string decrypted_value = std::string(memory.data(), memory.size()); + LOG_DEBUG(log, "Decrypted value '{}'", decrypted_value); + text_node->setNodeValue(decrypted_value); } } @@ -729,6 +766,7 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes( void ConfigProcessor::decryptConfig(LoadedConfig & loaded_config) { + DB::CompressionCodecEncrypted::Configuration::instance().tryLoad(*loaded_config.configuration, "encryption_codecs"); Node * config_root = getRootNode(loaded_config.preprocessed_xml.get()); decryptRecursive(config_root); loaded_config.configuration = new Poco::Util::XMLConfiguration(loaded_config.preprocessed_xml); diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index bc2f923f705..479a0053efa 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -92,7 +92,7 @@ public: const zkutil::EventPtr & zk_changed_event, bool fallback_to_preprocessed = false); - /// Decrypt nodes in config with specified encryption attributes + /// crypt nodes in config with specified encryption attributes void decryptConfig(LoadedConfig & loaded_config); /// Save preprocessed config to specified directory. diff --git a/utils/config-processor/CMakeLists.txt b/utils/config-processor/CMakeLists.txt index 53b6163ba87..00cbfbba659 100644 --- a/utils/config-processor/CMakeLists.txt +++ b/utils/config-processor/CMakeLists.txt @@ -1,2 +1,3 @@ clickhouse_add_executable (config-processor config-processor.cpp) -target_link_libraries(config-processor PRIVATE clickhouse_common_config_no_zookeeper_log) +target_link_libraries(config-processor PRIVATE dbms clickhouse_common_config_no_zookeeper_log) +target_link_libraries(config-processor PUBLIC clickhouse_parsers clickhouse_common_io common ch_contrib::lz4) From 6a8a21e09a49303919d2357f0b7f9ca38c79684b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 25 May 2023 16:01:40 +0000 Subject: [PATCH 0191/1997] Add test for s3 disk restore --- src/Coordination/Changelog.cpp | 3 +- src/Coordination/KeeperSnapshotManager.cpp | 4 +- .../configs/disk_s3_storage.xml | 26 +++ .../configs/enable_keeper1.xml | 2 - .../configs/enable_keeper2.xml | 2 - .../configs/enable_keeper3.xml | 2 - .../configs/keeper1_snapshot_disk.xml | 5 + .../configs/keeper2_snapshot_disk.xml | 5 + .../configs/keeper3_snapshot_disk.xml | 5 + .../configs/local_storage_path.xml | 6 + .../test_keeper_restore_from_snapshot/test.py | 12 +- .../test_disk_s3.py | 152 ++++++++++++++++++ 12 files changed, 212 insertions(+), 12 deletions(-) create mode 100644 tests/integration/test_keeper_restore_from_snapshot/configs/disk_s3_storage.xml create mode 100644 tests/integration/test_keeper_restore_from_snapshot/configs/keeper1_snapshot_disk.xml create mode 100644 tests/integration/test_keeper_restore_from_snapshot/configs/keeper2_snapshot_disk.xml create mode 100644 tests/integration/test_keeper_restore_from_snapshot/configs/keeper3_snapshot_disk.xml create mode 100644 tests/integration/test_keeper_restore_from_snapshot/configs/local_storage_path.xml create mode 100644 tests/integration/test_keeper_restore_from_snapshot/test_disk_s3.py diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 875b0758d27..b289e1dc9f2 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -616,7 +616,8 @@ Changelog::Changelog(Poco::Logger * log_, LogFileSettings log_file_settings, Kee load_from_disk(disk); auto current_log_disk = getCurrentLogDisk(); - load_from_disk(current_log_disk); + if (disk != current_log_disk) + load_from_disk(current_log_disk); if (existing_changelogs.empty()) LOG_WARNING(log, "No logs exists in {}. It's Ok if it's the first run of clickhouse-keeper.", disk->getPath()); diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index 77abbfb2054..c3371501976 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -612,7 +612,7 @@ SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::bu disk->removeFile(tmp_snapshot_file_name); - existing_snapshots.emplace(up_to_log_idx, snapshot_file_name); + existing_snapshots.emplace(up_to_log_idx, SnapshotFileInfo{snapshot_file_name, disk}); removeOutdatedSnapshotsIfNeeded(); return {snapshot_file_name, disk}; @@ -750,7 +750,7 @@ SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStor disk->removeFile(tmp_snapshot_file_name); - existing_snapshots.emplace(up_to_log_idx, snapshot_file_name); + existing_snapshots.emplace(up_to_log_idx, SnapshotFileInfo{snapshot_file_name, disk}); removeOutdatedSnapshotsIfNeeded(); return {snapshot_file_name, disk}; diff --git a/tests/integration/test_keeper_restore_from_snapshot/configs/disk_s3_storage.xml b/tests/integration/test_keeper_restore_from_snapshot/configs/disk_s3_storage.xml new file mode 100644 index 00000000000..2ec4bcd77fb --- /dev/null +++ b/tests/integration/test_keeper_restore_from_snapshot/configs/disk_s3_storage.xml @@ -0,0 +1,26 @@ + + + + + s3_plain + http://minio1:9001/root/data/snapshots1/ + minio + minio123 + + + s3_plain + http://minio1:9001/root/data/snapshots2/ + minio + minio123 + + + s3_plain + http://minio1:9001/root/data/snapshots3/ + minio + minio123 + + + + + + diff --git a/tests/integration/test_keeper_restore_from_snapshot/configs/enable_keeper1.xml b/tests/integration/test_keeper_restore_from_snapshot/configs/enable_keeper1.xml index 4ea543e6f31..510913b5dc4 100644 --- a/tests/integration/test_keeper_restore_from_snapshot/configs/enable_keeper1.xml +++ b/tests/integration/test_keeper_restore_from_snapshot/configs/enable_keeper1.xml @@ -2,8 +2,6 @@ 9181 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots 5000 diff --git a/tests/integration/test_keeper_restore_from_snapshot/configs/enable_keeper2.xml b/tests/integration/test_keeper_restore_from_snapshot/configs/enable_keeper2.xml index 4bf3083c1fa..0d50573df01 100644 --- a/tests/integration/test_keeper_restore_from_snapshot/configs/enable_keeper2.xml +++ b/tests/integration/test_keeper_restore_from_snapshot/configs/enable_keeper2.xml @@ -2,8 +2,6 @@ 9181 2 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots 5000 diff --git a/tests/integration/test_keeper_restore_from_snapshot/configs/enable_keeper3.xml b/tests/integration/test_keeper_restore_from_snapshot/configs/enable_keeper3.xml index b9e2a2d0422..8fa322cb9e7 100644 --- a/tests/integration/test_keeper_restore_from_snapshot/configs/enable_keeper3.xml +++ b/tests/integration/test_keeper_restore_from_snapshot/configs/enable_keeper3.xml @@ -2,8 +2,6 @@ 9181 3 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots 5000 diff --git a/tests/integration/test_keeper_restore_from_snapshot/configs/keeper1_snapshot_disk.xml b/tests/integration/test_keeper_restore_from_snapshot/configs/keeper1_snapshot_disk.xml new file mode 100644 index 00000000000..01759e2771a --- /dev/null +++ b/tests/integration/test_keeper_restore_from_snapshot/configs/keeper1_snapshot_disk.xml @@ -0,0 +1,5 @@ + + + snapshot_s3_plain1 + + \ No newline at end of file diff --git a/tests/integration/test_keeper_restore_from_snapshot/configs/keeper2_snapshot_disk.xml b/tests/integration/test_keeper_restore_from_snapshot/configs/keeper2_snapshot_disk.xml new file mode 100644 index 00000000000..6c33c5fb300 --- /dev/null +++ b/tests/integration/test_keeper_restore_from_snapshot/configs/keeper2_snapshot_disk.xml @@ -0,0 +1,5 @@ + + + snapshot_s3_plain2 + + \ No newline at end of file diff --git a/tests/integration/test_keeper_restore_from_snapshot/configs/keeper3_snapshot_disk.xml b/tests/integration/test_keeper_restore_from_snapshot/configs/keeper3_snapshot_disk.xml new file mode 100644 index 00000000000..5016ccd581b --- /dev/null +++ b/tests/integration/test_keeper_restore_from_snapshot/configs/keeper3_snapshot_disk.xml @@ -0,0 +1,5 @@ + + + snapshot_s3_plain3 + + \ No newline at end of file diff --git a/tests/integration/test_keeper_restore_from_snapshot/configs/local_storage_path.xml b/tests/integration/test_keeper_restore_from_snapshot/configs/local_storage_path.xml new file mode 100644 index 00000000000..63cb958c88e --- /dev/null +++ b/tests/integration/test_keeper_restore_from_snapshot/configs/local_storage_path.xml @@ -0,0 +1,6 @@ + + + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + \ No newline at end of file diff --git a/tests/integration/test_keeper_restore_from_snapshot/test.py b/tests/integration/test_keeper_restore_from_snapshot/test.py index bc33689dd20..e4d5793bb17 100644 --- a/tests/integration/test_keeper_restore_from_snapshot/test.py +++ b/tests/integration/test_keeper_restore_from_snapshot/test.py @@ -9,13 +9,19 @@ import time cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/enable_keeper1.xml"], stay_alive=True + "node1", + main_configs=["configs/enable_keeper1.xml", "configs/local_storage_path.xml"], + stay_alive=True, ) node2 = cluster.add_instance( - "node2", main_configs=["configs/enable_keeper2.xml"], stay_alive=True + "node2", + main_configs=["configs/enable_keeper2.xml", "configs/local_storage_path.xml"], + stay_alive=True, ) node3 = cluster.add_instance( - "node3", main_configs=["configs/enable_keeper3.xml"], stay_alive=True + "node3", + main_configs=["configs/enable_keeper3.xml", "configs/local_storage_path.xml"], + stay_alive=True, ) from kazoo.client import KazooClient, KazooState diff --git a/tests/integration/test_keeper_restore_from_snapshot/test_disk_s3.py b/tests/integration/test_keeper_restore_from_snapshot/test_disk_s3.py new file mode 100644 index 00000000000..1226df75203 --- /dev/null +++ b/tests/integration/test_keeper_restore_from_snapshot/test_disk_s3.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 +import pytest +from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils +import random +import string +import os +import time + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance( + "node1", + main_configs=[ + "configs/enable_keeper1.xml", + "configs/disk_s3_storage.xml", + "configs/keeper1_snapshot_disk.xml", + ], + stay_alive=True, + with_minio=True, +) +node2 = cluster.add_instance( + "node2", + main_configs=[ + "configs/enable_keeper2.xml", + "configs/disk_s3_storage.xml", + "configs/keeper2_snapshot_disk.xml", + ], + stay_alive=True, + with_minio=True, +) +node3 = cluster.add_instance( + "node3", + main_configs=[ + "configs/enable_keeper3.xml", + "configs/disk_s3_storage.xml", + "configs/keeper3_snapshot_disk.xml", + ], + stay_alive=True, + with_minio=True, +) + +from kazoo.client import KazooClient, KazooState + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + + +def get_fake_zk(nodename, timeout=30.0): + _fake_zk_instance = KazooClient( + hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout + ) + _fake_zk_instance.start() + return _fake_zk_instance + + +def stop_zk(zk): + try: + if zk: + zk.stop() + zk.close() + except: + pass + + +def test_recover_from_snapshot_with_disk_s3(started_cluster): + try: + node1_zk = node2_zk = node3_zk = None + node1_zk = get_fake_zk("node1") + node2_zk = get_fake_zk("node2") + node3_zk = get_fake_zk("node3") + + node1_zk.create("/test_snapshot_multinode_recover", "somedata".encode()) + + node2_zk.sync("/test_snapshot_multinode_recover") + node3_zk.sync("/test_snapshot_multinode_recover") + + assert node1_zk.get("/test_snapshot_multinode_recover")[0] == b"somedata" + assert node2_zk.get("/test_snapshot_multinode_recover")[0] == b"somedata" + assert node3_zk.get("/test_snapshot_multinode_recover")[0] == b"somedata" + + node3.stop_clickhouse(kill=True) + + # at least we will have 2 snapshots + for i in range(435): + node1_zk.create( + "/test_snapshot_multinode_recover" + str(i), + ("somedata" + str(i)).encode(), + ) + + for i in range(435): + if i % 10 == 0: + node1_zk.delete("/test_snapshot_multinode_recover" + str(i)) + + finally: + for zk in [node1_zk, node2_zk, node3_zk]: + stop_zk(zk) + + # stale node should recover from leader's snapshot + # with some sanitizers can start longer than 5 seconds + node3.start_clickhouse(20) + keeper_utils.wait_until_connected(cluster, node3) + print("Restarted") + + try: + node1_zk = node2_zk = node3_zk = None + node1_zk = get_fake_zk("node1") + node2_zk = get_fake_zk("node2") + node3_zk = get_fake_zk("node3") + + node1_zk.sync("/test_snapshot_multinode_recover") + node2_zk.sync("/test_snapshot_multinode_recover") + node3_zk.sync("/test_snapshot_multinode_recover") + + assert node1_zk.get("/test_snapshot_multinode_recover")[0] == b"somedata" + assert node2_zk.get("/test_snapshot_multinode_recover")[0] == b"somedata" + assert node3_zk.get("/test_snapshot_multinode_recover")[0] == b"somedata" + + for i in range(435): + if i % 10 != 0: + assert ( + node1_zk.get("/test_snapshot_multinode_recover" + str(i))[0] + == ("somedata" + str(i)).encode() + ) + assert ( + node2_zk.get("/test_snapshot_multinode_recover" + str(i))[0] + == ("somedata" + str(i)).encode() + ) + assert ( + node3_zk.get("/test_snapshot_multinode_recover" + str(i))[0] + == ("somedata" + str(i)).encode() + ) + else: + assert ( + node1_zk.exists("/test_snapshot_multinode_recover" + str(i)) is None + ) + assert ( + node2_zk.exists("/test_snapshot_multinode_recover" + str(i)) is None + ) + assert ( + node3_zk.exists("/test_snapshot_multinode_recover" + str(i)) is None + ) + finally: + for zk in [node1_zk, node2_zk, node3_zk]: + stop_zk(zk) From 7ebe19f5fb1390bc5f2dac3a5cca7db9c76bdd98 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 25 May 2023 19:18:11 +0000 Subject: [PATCH 0192/1997] Fixing more tests --- src/Interpreters/ActionsVisitor.cpp | 7 ++- src/Interpreters/Context.cpp | 15 +++++ src/Interpreters/Context.h | 1 + src/Interpreters/DatabaseCatalog.h | 3 + src/Interpreters/GlobalSubqueriesVisitor.h | 19 ++++--- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Interpreters/PreparedSets.cpp | 55 +++++++++++++++---- src/Interpreters/PreparedSets.h | 35 ++++-------- src/Planner/CollectSets.cpp | 3 +- src/Planner/Planner.cpp | 2 +- src/Planner/PlannerActionsVisitor.cpp | 8 ++- src/Processors/QueryPlan/CreatingSetsStep.cpp | 4 +- .../QueryPlan/DistributedCreateLocalPlan.cpp | 4 +- .../QueryPlan/ReadFromMemoryStorageStep.cpp | 6 +- .../QueryPlan/ReadFromMemoryStorageStep.h | 2 + .../Transforms/CreatingSetsTransform.cpp | 13 +++-- .../Transforms/CreatingSetsTransform.h | 5 +- src/QueryPipeline/QueryPipelineBuilder.cpp | 4 +- src/QueryPipeline/QueryPipelineBuilder.h | 5 +- src/Storages/StorageMemory.cpp | 2 +- src/Storages/StorageMemory.h | 2 + 21 files changed, 132 insertions(+), 65 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index da8666466fc..133fcbbfe87 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1405,6 +1405,8 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool if (auto set = data.prepared_sets->getFuture(set_key)) return set; + FutureSetPtr external_table_set; + /// A special case is if the name of the table is specified on the right side of the IN statement, /// and the table has the type Set (a previously prepared set). if (identifier) @@ -1417,6 +1419,9 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool if (StorageSet * storage_set = dynamic_cast(table.get())) return data.prepared_sets->addFromStorage(set_key, storage_set->getSet()); } + + if (auto tmp_table = data.getContext()->findExternalTable(table_id.getShortName())) + external_table_set = tmp_table->future_set; } /// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery. @@ -1438,7 +1443,7 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool subquery_for_set.createSource(*interpreter); } - return data.prepared_sets->addFromSubquery(set_key, std::move(subquery_for_set)); + return data.prepared_sets->addFromSubquery(set_key, std::move(subquery_for_set), std::move(external_table_set)); } else { diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index d9f450191bc..f8fe99fbcc6 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1319,6 +1319,21 @@ void Context::addExternalTable(const String & table_name, TemporaryTableHolder & external_tables_mapping.emplace(table_name, std::make_shared(std::move(temporary_table))); } +std::shared_ptr Context::findExternalTable(const String & table_name) const +{ + if (isGlobalContext()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have external tables"); + + std::shared_ptr holder; + { + auto lock = getLock(); + auto iter = external_tables_mapping.find(table_name); + if (iter == external_tables_mapping.end()) + return {}; + holder = iter->second; + } + return holder; +} std::shared_ptr Context::removeExternalTable(const String & table_name) { diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 1be662e0958..1a0ee891699 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -611,6 +611,7 @@ public: Tables getExternalTables() const; void addExternalTable(const String & table_name, TemporaryTableHolder && temporary_table); + std::shared_ptr findExternalTable(const String & table_name) const; std::shared_ptr removeExternalTable(const String & table_name); const Scalars & getScalars() const; diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 51e9fbdb936..258ea2dee7c 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -79,6 +79,8 @@ private: using DDLGuardPtr = std::unique_ptr; +class FutureSet; +using FutureSetPtr = std::shared_ptr; /// Creates temporary table in `_temporary_and_external_tables` with randomly generated unique StorageID. /// Such table can be accessed from everywhere by its ID. @@ -111,6 +113,7 @@ struct TemporaryTableHolder : boost::noncopyable, WithContext IDatabase * temporary_tables = nullptr; UUID id = UUIDHelpers::Nil; + FutureSetPtr future_set; }; ///TODO maybe remove shared_ptr from here? diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index a872af529aa..195839da04f 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -64,6 +64,7 @@ public: void addExternalStorage(ASTPtr & ast, bool set_alias = false) { + // std::cerr << "=============== addExternalStorage is remote " << is_remote << std::endl; /// With nondistributed queries, creating temporary tables does not make sense. if (!is_remote) return; @@ -163,10 +164,10 @@ public: /// We need to materialize external tables immediately because reading from distributed /// tables might generate local plans which can refer to external tables during index /// analysis. It's too late to populate the external table via CreatingSetsTransform. - if (is_explain) - { - /// Do not materialize external tables if it's explain statement. - } + // if (is_explain) + // { + // /// Do not materialize external tables if it's explain statement. + // } // else if (getContext()->getSettingsRef().use_index_for_in_with_subqueries) // { // auto external_table = external_storage_holder->getTable(); @@ -176,13 +177,15 @@ public: // CompletedPipelineExecutor executor(io.pipeline); // executor.execute(); // } - else + // else { // auto & subquery_for_set = prepared_sets->getSubquery(external_table_name); // subquery_for_set.createSource(*interpreter, external_storage); auto key = subquery_or_table_name->getColumnName(); auto set_key = PreparedSetKey::forSubquery(database_and_table_name->getTreeHash()); + // std::cerr << "====== Adding key " << set_key.toString() << std::endl; + if (!prepared_sets->getFuture(set_key)) { SubqueryForSet subquery_for_set; @@ -191,10 +194,12 @@ public: subquery_for_set.createSource(*interpreter); //std::cerr << reinterpret_cast(prepared_sets.get()) << std::endl; - prepared_sets->addFromSubquery(set_key, std::move(subquery_for_set)); + auto future_set = prepared_sets->addFromSubquery(set_key, std::move(subquery_for_set), nullptr); + external_storage_holder->future_set = std::move(future_set); } else - prepared_sets->addStorageToSubquery(key, std::move(external_storage)); + throw Exception(ErrorCodes::LOGICAL_ERROR, "!!!!!!!!"); + //prepared_sets->addStorageToSubquery(key, std::move(external_storage)); } /** NOTE If it was written IN tmp_table - the existing temporary (but not external) table, diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 8d305c07ce9..28a0e27384a 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -3088,7 +3088,7 @@ void InterpreterSelectQuery::executeExtremes(QueryPlan & query_plan) void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPlan & query_plan) { - auto subqueries = prepared_sets->detachSubqueries(context); + auto subqueries = prepared_sets->detachSubqueries(); if (!subqueries.empty()) { diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index eebc59e85d5..f2b70462227 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -101,14 +101,14 @@ String PreparedSetKey::toString() const /// If the subquery is not associated with any set, create default-constructed SubqueryForSet. /// It's aimed to fill external table passed to SubqueryForSet::createSource. -void PreparedSets::addStorageToSubquery(const String & subquery_id, StoragePtr storage) -{ - auto it = subqueries.find(subquery_id); - if (it == subqueries.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find subquery {}", subquery_id); +// void PreparedSets::addStorageToSubquery(const String & subquery_id, StoragePtr storage) +// { +// auto it = subqueries.find(subquery_id); +// if (it == subqueries.end()) +// throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find subquery {}", subquery_id); - it->second->addStorage(std::move(storage)); -} +// it->second->addStorage(std::move(storage)); +// } FutureSetPtr PreparedSets::addFromStorage(const PreparedSetKey & key, SetPtr set_) { @@ -132,10 +132,10 @@ FutureSetPtr PreparedSets::addFromTuple(const PreparedSetKey & key, Block block, return it->second; } -FutureSetPtr PreparedSets::addFromSubquery(const PreparedSetKey & key, SubqueryForSet subquery) +FutureSetPtr PreparedSets::addFromSubquery(const PreparedSetKey & key, SubqueryForSet subquery, FutureSetPtr external_table_set) { auto id = subquery.key; - auto from_subquery = std::make_shared(std::move(subquery)); + auto from_subquery = std::make_shared(std::move(subquery), std::move(external_table_set)); auto [it, inserted] = sets.emplace(key, from_subquery); if (!inserted) @@ -145,7 +145,7 @@ FutureSetPtr PreparedSets::addFromSubquery(const PreparedSetKey & key, SubqueryF // std::cerr << "========= PreparedSets::addFromSubquery\n"; // std::cerr << StackTrace().toString() << std::endl; - subqueries.emplace(id, std::move(from_subquery)); + subqueries.emplace_back(SetAndName{.name = id, .set = std::move(from_subquery)}); return it->second; } @@ -176,7 +176,7 @@ FutureSetPtr PreparedSets::getFuture(const PreparedSetKey & key) const // return res; // } -PreparedSets::SubqueriesForSets PreparedSets::detachSubqueries(const ContextPtr &) +PreparedSets::SubqueriesForSets PreparedSets::detachSubqueries() { auto res = std::move(subqueries); subqueries = SubqueriesForSets(); @@ -226,6 +226,36 @@ std::variant, SharedSet> PreparedSetsCache::findOrPromiseTo return promise_to_fill_set; } +SetPtr FutureSetFromSubquery::buildOrderedSetInplace(const ContextPtr & context) +{ + if (!context->getSettingsRef().use_index_for_in_with_subqueries) + return nullptr; + + if (set) + { + if (set->hasExplicitSetElements()) + return set; + + return nullptr; + } + + if (external_table_set) + return set = external_table_set->buildOrderedSetInplace(context); + + auto plan = buildPlan(context, true); + if (!plan) + return nullptr; + + auto builder = plan->buildQueryPipeline(QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); + auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder)); + pipeline.complete(std::make_shared(Block())); + + CompletedPipelineExecutor executor(pipeline); + executor.execute(); + + return set; +} + std::unique_ptr FutureSetFromSubquery::buildPlan(const ContextPtr & context, bool create_ordered_set) { if (set) @@ -313,7 +343,8 @@ FutureSetFromTuple::FutureSetFromTuple(Block block, const Settings & settings) //block(std::move(block_)) } -FutureSetFromSubquery::FutureSetFromSubquery(SubqueryForSet subquery_) : subquery(std::move(subquery_)) {} +FutureSetFromSubquery::FutureSetFromSubquery(SubqueryForSet subquery_, FutureSetPtr external_table_set_) + : subquery(std::move(subquery_)), external_table_set(std::move(external_table_set_)) {} FutureSetFromStorage::FutureSetFromStorage(SetPtr set_) : set(std::move(set_)) {} diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index 35bbc1e12fb..c510e690e25 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -132,30 +132,13 @@ public: class FutureSetFromSubquery : public FutureSet { public: - FutureSetFromSubquery(SubqueryForSet subquery_); + FutureSetFromSubquery(SubqueryForSet subquery_, FutureSetPtr external_table_set_); bool isReady() const override { return set != nullptr; } bool isFilled() const override { return isReady(); } SetPtr get() const override { return set; } - SetPtr buildOrderedSetInplace(const ContextPtr & context) override - { - if (!context->getSettingsRef().use_index_for_in_with_subqueries) - return nullptr; - - auto plan = buildPlan(context, true); - if (!plan) - return nullptr; - - auto builder = plan->buildQueryPipeline(QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); - auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder)); - pipeline.complete(std::make_shared(Block())); - - CompletedPipelineExecutor executor(pipeline); - executor.execute(); - - return set; - } + SetPtr buildOrderedSetInplace(const ContextPtr & context) override; std::unique_ptr build(const ContextPtr & context) override { @@ -167,6 +150,7 @@ public: private: SetPtr set; SubqueryForSet subquery; + FutureSetPtr external_table_set; std::unique_ptr buildPlan(const ContextPtr & context, bool create_ordered_set); }; @@ -244,16 +228,21 @@ struct PreparedSetKey class PreparedSets { public: - using SubqueriesForSets = std::unordered_map>; + struct SetAndName + { + String name; + std::shared_ptr set; + }; + using SubqueriesForSets = std::vector; // SubqueryForSet & createOrGetSubquery(const String & subquery_id, const PreparedSetKey & key, // SizeLimits set_size_limit, bool transform_null_in); FutureSetPtr addFromStorage(const PreparedSetKey & key, SetPtr set_); FutureSetPtr addFromTuple(const PreparedSetKey & key, Block block, const Settings & settings); - FutureSetPtr addFromSubquery(const PreparedSetKey & key, SubqueryForSet subquery); + FutureSetPtr addFromSubquery(const PreparedSetKey & key, SubqueryForSet subquery, FutureSetPtr external_table_set); - void addStorageToSubquery(const String & subquery_id, StoragePtr external_storage); + //void addStorageToSubquery(const String & subquery_id, StoragePtr external_storage); FutureSetPtr getFuture(const PreparedSetKey & key) const; //SubqueryForSet & getSubquery(const String & subquery_id); @@ -262,7 +251,7 @@ public: /// Get subqueries and clear them. /// We need to build a plan for subqueries just once. That's why we can clear them after accessing them. /// SetPtr would still be available for consumers of PreparedSets. - SubqueriesForSets detachSubqueries(const ContextPtr &); + SubqueriesForSets detachSubqueries(); /// Returns all sets that match the given ast hash not checking types /// Used in KeyCondition and MergeTreeIndexConditionBloomFilter to make non exact match for types in PreparedSetKey diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp index d66424f89ec..19dc569ff0c 100644 --- a/src/Planner/CollectSets.cpp +++ b/src/Planner/CollectSets.cpp @@ -107,7 +107,8 @@ public: subquery_for_set.key = planner_context.createSetKey(in_second_argument); subquery_for_set.source = std::make_unique(std::move(subquery_planner).extractQueryPlan()); - sets.addFromSubquery(set_key, std::move(subquery_for_set)); + /// TODO + sets.addFromSubquery(set_key, std::move(subquery_for_set), nullptr); //planner_context.registerSet(set_key, PlannerSet(in_second_argument)); } diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index b1780212e51..5c3fc82a80b 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1468,7 +1468,7 @@ void Planner::buildPlanForQueryNode() if (!select_query_options.only_analyze) { - auto subqueries = planner_context->getPreparedSets().detachSubqueries(planner_context->getQueryContext()); + auto subqueries = planner_context->getPreparedSets().detachSubqueries(); if (!subqueries.empty()) { diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index e0844a6d2b1..bbac9bf5c4b 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -632,7 +632,13 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma DataTypes set_element_types; auto in_second_argument_node_type = in_second_argument->getNodeType(); - if (!(in_second_argument_node_type == QueryTreeNodeType::QUERY || in_second_argument_node_type == QueryTreeNodeType::UNION)) + // std::cerr << "=========== " << in_second_argument->getNodeTypeName() << std::endl; + bool subquery_or_table = + in_second_argument_node_type == QueryTreeNodeType::QUERY || + in_second_argument_node_type == QueryTreeNodeType::UNION || + in_second_argument_node_type == QueryTreeNodeType::TABLE; + + if (!subquery_or_table) { set_element_types = {in_first_argument->getResultType()}; const auto * left_tuple_type = typeid_cast(set_element_types.front().get()); diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp index 34018b2144c..38506412531 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.cpp +++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp @@ -48,7 +48,7 @@ CreatingSetStep::CreatingSetStep( void CreatingSetStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - pipeline.addCreatingSetsTransform(getOutputStream().header, std::move(subquery_for_set), network_transfer_limits, context); + pipeline.addCreatingSetsTransform(getOutputStream().header, std::move(subquery_for_set), network_transfer_limits, context->getPreparedSetsCache()); } void CreatingSetStep::updateOutputStream() @@ -189,7 +189,7 @@ void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, if (!prepared_sets || prepared_sets->empty()) return; - addCreatingSetsStep(query_plan, prepared_sets->detachSubqueries(context), context); + addCreatingSetsStep(query_plan, prepared_sets->detachSubqueries(), context); } DelayedCreatingSetsStep::DelayedCreatingSetsStep( diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index 62e369659d1..8e2221d564d 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -43,7 +43,7 @@ std::unique_ptr createLocalPlan( const Block & header, ContextPtr context, QueryProcessingStage::Enum processed_stage, - PreparedSetsPtr prepared_sets, + [[maybe_unused]] PreparedSetsPtr prepared_sets, size_t shard_num, size_t shard_count, size_t replica_num, @@ -99,7 +99,7 @@ std::unique_ptr createLocalPlan( } else { - auto interpreter = InterpreterSelectQuery(query_ast, new_context, select_query_options, prepared_sets); + auto interpreter = InterpreterSelectQuery(query_ast, new_context, select_query_options); //, prepared_sets); interpreter.buildQueryPlan(*query_plan); } diff --git a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp index 2080c31d253..4ad3cc7373a 100644 --- a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp +++ b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp @@ -96,11 +96,13 @@ private: }; ReadFromMemoryStorageStep::ReadFromMemoryStorageStep(const Names & columns_to_read_, + StoragePtr storage_, const StorageSnapshotPtr & storage_snapshot_, const size_t num_streams_, const bool delay_read_for_global_sub_queries_) : SourceStepWithFilter(DataStream{.header=storage_snapshot_->getSampleBlockForColumns(columns_to_read_)}), columns_to_read(columns_to_read_), + storage(std::move(storage_)), storage_snapshot(storage_snapshot_), num_streams(num_streams_), delay_read_for_global_sub_queries(delay_read_for_global_sub_queries_) @@ -142,9 +144,9 @@ Pipe ReadFromMemoryStorageStep::makePipe() storage_snapshot, nullptr /* data */, nullptr /* parallel execution index */, - [current_data](std::shared_ptr & data_to_initialize) + [storage = storage](std::shared_ptr & data_to_initialize) { - data_to_initialize = current_data; + data_to_initialize = static_cast(*storage).data.get(); })); } diff --git a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.h b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.h index cec523ed58b..1122bfbb2a5 100644 --- a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.h +++ b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.h @@ -16,6 +16,7 @@ class ReadFromMemoryStorageStep final : public SourceStepWithFilter { public: ReadFromMemoryStorageStep(const Names & columns_to_read_, + StoragePtr storage_, const StorageSnapshotPtr & storage_snapshot_, size_t num_streams_, bool delay_read_for_global_sub_queries_); @@ -35,6 +36,7 @@ private: static constexpr auto name = "ReadFromMemoryStorage"; Names columns_to_read; + StoragePtr storage; StorageSnapshotPtr storage_snapshot; size_t num_streams; bool delay_read_for_global_sub_queries; diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp index 6626d4b9795..24e17ff210f 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.cpp +++ b/src/Processors/Transforms/CreatingSetsTransform.cpp @@ -27,11 +27,11 @@ CreatingSetsTransform::CreatingSetsTransform( Block out_header_, SubqueryForSet subquery_for_set_, SizeLimits network_transfer_limits_, - ContextPtr context_) + PreparedSetsCachePtr prepared_sets_cache_) : IAccumulatingTransform(std::move(in_header_), std::move(out_header_)) - , WithContext(context_) , subquery(std::move(subquery_for_set_)) , network_transfer_limits(std::move(network_transfer_limits_)) + , prepared_sets_cache(std::move(prepared_sets_cache_)) { } @@ -52,14 +52,13 @@ void CreatingSetsTransform::work() void CreatingSetsTransform::startSubquery() { /// Lookup the set in the cache if we don't need to build table. - auto ctx = context.lock(); - if (ctx && ctx->getPreparedSetsCache() && !subquery.table) + if (prepared_sets_cache && !subquery.table) { /// Try to find the set in the cache and wait for it to be built. /// Retry if the set from cache fails to be built. while (true) { - auto from_cache = ctx->getPreparedSetsCache()->findOrPromiseToBuild(subquery.key); + auto from_cache = prepared_sets_cache->findOrPromiseToBuild(subquery.key); if (from_cache.index() == 0) { promise_to_build = std::move(std::get<0>(from_cache)); @@ -89,9 +88,11 @@ void CreatingSetsTransform::startSubquery() if (subquery.table) LOG_TRACE(log, "Filling temporary table."); + // std::cerr << StackTrace().toString() << std::endl; + if (subquery.table) /// TODO: make via port - table_out = QueryPipeline(subquery.table->write({}, subquery.table->getInMemoryMetadataPtr(), getContext())); + table_out = QueryPipeline(subquery.table->write({}, subquery.table->getInMemoryMetadataPtr(), nullptr)); done_with_set = !subquery.set; done_with_table = !subquery.table; diff --git a/src/Processors/Transforms/CreatingSetsTransform.h b/src/Processors/Transforms/CreatingSetsTransform.h index 26bbc45933d..ef586b0655c 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.h +++ b/src/Processors/Transforms/CreatingSetsTransform.h @@ -23,7 +23,7 @@ class PushingPipelineExecutor; /// Don't return any data. Sets are created when Finish status is returned. /// In general, several work() methods need to be called to finish. /// Independent processors is created for each subquery. -class CreatingSetsTransform : public IAccumulatingTransform, WithContext +class CreatingSetsTransform : public IAccumulatingTransform { public: CreatingSetsTransform( @@ -31,7 +31,7 @@ public: Block out_header_, SubqueryForSet subquery_for_set_, SizeLimits network_transfer_limits_, - ContextPtr context_); + PreparedSetsCachePtr prepared_sets_cache_); ~CreatingSetsTransform() override; @@ -55,6 +55,7 @@ private: bool done_with_table = true; SizeLimits network_transfer_limits; + PreparedSetsCachePtr prepared_sets_cache; size_t rows_to_transfer = 0; size_t bytes_to_transfer = 0; diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 764997e7b7e..4898501b980 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -569,7 +569,7 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe return left; } -void QueryPipelineBuilder::addCreatingSetsTransform(const Block & res_header, SubqueryForSet subquery_for_set, const SizeLimits & limits, ContextPtr context) +void QueryPipelineBuilder::addCreatingSetsTransform(const Block & res_header, SubqueryForSet subquery_for_set, const SizeLimits & limits, PreparedSetsCachePtr prepared_sets_cache) { resize(1); @@ -578,7 +578,7 @@ void QueryPipelineBuilder::addCreatingSetsTransform(const Block & res_header, Su res_header, std::move(subquery_for_set), limits, - context); + std::move(prepared_sets_cache)); InputPort * totals_port = nullptr; diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index 3a5d65d4388..85b6f5c6772 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -33,6 +33,9 @@ class TableJoin; class QueryPipelineBuilder; using QueryPipelineBuilderPtr = std::unique_ptr; +class PreparedSetsCache; +using PreparedSetsCachePtr = std::shared_ptr; + class QueryPipelineBuilder { public: @@ -138,7 +141,7 @@ public: /// This is used for CreatingSets. void addPipelineBefore(QueryPipelineBuilder pipeline); - void addCreatingSetsTransform(const Block & res_header, SubqueryForSet subquery_for_set, const SizeLimits & limits, ContextPtr context); + void addCreatingSetsTransform(const Block & res_header, SubqueryForSet subquery_for_set, const SizeLimits & limits, PreparedSetsCachePtr prepared_sets_cache); PipelineExecutorPtr execute(); diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 31e45db55cb..1392ed5c684 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -155,7 +155,7 @@ void StorageMemory::read( size_t /*max_block_size*/, size_t num_streams) { - query_plan.addStep(std::make_unique(column_names, storage_snapshot, num_streams, delay_read_for_global_subqueries)); + query_plan.addStep(std::make_unique(column_names, shared_from_this(), storage_snapshot, num_streams, delay_read_for_global_subqueries)); } diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index ce8a59b8bcd..31c8ee31f8b 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -132,6 +132,8 @@ private: std::atomic total_size_rows = 0; bool compress; + + friend class ReadFromMemoryStorageStep; }; } From 0740bfbe4bfb3d186c8803e15aaffd2042c7f1fa Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 26 May 2023 14:31:09 +0000 Subject: [PATCH 0193/1997] Better disks moving --- programs/keeper/CMakeLists.txt | 3 +- src/Coordination/Changelog.cpp | 111 +++++++++++++++++++------- src/Coordination/Changelog.h | 2 +- src/Coordination/KeeperContext.cpp | 20 ++--- src/Coordination/KeeperContext.h | 4 +- src/Coordination/KeeperDispatcher.cpp | 6 +- 6 files changed, 98 insertions(+), 48 deletions(-) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 6e97ab324e3..f775e8a5a22 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -131,6 +131,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/StoredObject.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/registerDiskS3.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -144,7 +145,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/IOUringReader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferFromTemporaryFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferWithFinalizeCallback.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/AsynchronousBoundedReadBuffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getThreadPoolReader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolRemoteFSReader.cpp diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index b289e1dc9f2..bcce9982e04 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -32,9 +32,21 @@ namespace ErrorCodes namespace { +constexpr std::string_view tmp_prefix = "tmp_"; + void moveFileBetweenDisks(DiskPtr disk_from, ChangelogFileDescriptionPtr description, DiskPtr disk_to, const std::string & path_to) { - disk_from->copyFile(description->path, *disk_to, path_to, {}); + /// we use empty file with prefix tmp_ to detect incomplete copies + /// if a copy is complete we don't care from which disk we use the same file + /// so it's okay if a failure happens after removing of tmp file but before we remove + /// the changelog from the source disk + auto from_path = fs::path(description->path); + auto tmp_changelog_name = from_path.parent_path() / (std::string{tmp_prefix} + from_path.filename().string()); + { + disk_to->writeFile(tmp_changelog_name); + } + disk_from->copyFile(from_path, *disk_to, path_to, {}); + disk_to->removeFile(tmp_changelog_name); disk_from->removeFile(description->path); description->path = path_to; description->disk = disk_to; @@ -164,9 +176,9 @@ public: } } - auto current_log_disk = getCurrentLogDisk(); - assert(file_description->disk == current_log_disk); - file_buf = current_log_disk->writeFile(file_description->path, DBMS_DEFAULT_BUFFER_SIZE, mode); + auto latest_log_disk = getLatestLogDisk(); + assert(file_description->disk == latest_log_disk); + file_buf = latest_log_disk->writeFile(file_description->path, DBMS_DEFAULT_BUFFER_SIZE, mode); assert(file_buf); last_index_written.reset(); current_file_description = std::move(file_description); @@ -176,7 +188,7 @@ public: std::move(file_buf), /* compressi)on level = */ 3, /* append_to_existing_file_ = */ mode == WriteMode::Append, - [current_log_disk, path = current_file_description->path] { return current_log_disk->readFile(path); }); + [latest_log_disk, path = current_file_description->path] { return latest_log_disk->readFile(path); }); prealloc_done = false; } @@ -274,7 +286,7 @@ public: new_description->from_log_index = new_start_log_index; new_description->to_log_index = new_start_log_index + log_file_settings.rotate_interval - 1; new_description->extension = "bin"; - new_description->disk = getCurrentLogDisk(); + new_description->disk = getLatestLogDisk(); if (log_file_settings.compress_logs) new_description->extension += "." + toContentEncodingName(CompressionMethod::Zstd); @@ -413,7 +425,7 @@ private: prealloc_done = true; } - DiskPtr getCurrentLogDisk() const { return keeper_context->getCurrentLogDisk(); } + DiskPtr getLatestLogDisk() const { return keeper_context->getLatestLogDisk(); } DiskPtr getDisk() const { return keeper_context->getLogDisk(); } @@ -574,50 +586,87 @@ Changelog::Changelog(Poco::Logger * log_, LogFileSettings log_file_settings, Kee , append_completion_queue(std::numeric_limits::max()) , keeper_context(std::move(keeper_context_)) { - if (auto current_log_disk = getCurrentLogDisk(); - log_file_settings.force_sync && dynamic_cast(current_log_disk.get()) == nullptr) + if (auto latest_log_disk = getLatestLogDisk(); + log_file_settings.force_sync && dynamic_cast(latest_log_disk.get()) == nullptr) { throw DB::Exception( DB::ErrorCodes::BAD_ARGUMENTS, "force_sync is set to true for logs but disk '{}' cannot satisfy such guarantee because it's not of type DiskLocal.\n" "If you want to use force_sync and same disk for all logs, please set keeper_server.log_storage_disk to a local disk.\n" "If you want to use force_sync and different disk only for old logs, please set 'keeper_server.log_storage_disk' to any " - "supported disk and 'keeper_server.current_log_storage_disk' to a local disk.\n" + "supported disk and 'keeper_server.latest_log_storage_disk' to a local disk.\n" "Otherwise, disable force_sync", - current_log_disk->getName()); + latest_log_disk->getName()); } /// Load all files on changelog disks const auto load_from_disk = [&](const auto & disk) { + LOG_TRACE(log, "Reading from disk {}", disk->getName()); + std::unordered_map incomplete_files; + + const auto clean_incomplete_file = [&](const auto & file_path) + { + if (auto incomplete_it = incomplete_files.find(fs::path(file_path).filename()); incomplete_it != incomplete_files.end()) + { + LOG_TRACE(log, "Removing {} from {}", file_path, disk->getName()); + disk->removeFile(file_path); + disk->removeFile(incomplete_it->second); + incomplete_files.erase(incomplete_it); + return true; + } + + return false; + }; + + std::vector changelog_files; for (auto it = disk->iterateDirectory(""); it->isValid(); it->next()) { if (it->name() == changelogs_detached_dir) continue; - auto file_description = getChangelogFileDescription(it->path()); + if (it->name().starts_with(tmp_prefix)) + { + incomplete_files.emplace(it->name().substr(tmp_prefix.size()), it->path()); + continue; + } + + if (clean_incomplete_file(it->path())) + continue; + + changelog_files.push_back(it->path()); + } + + for (const auto & changelog_file : changelog_files) + { + if (clean_incomplete_file(fs::path(changelog_file).filename())) + continue; + + auto file_description = getChangelogFileDescription(changelog_file); file_description->disk = disk; + LOG_TRACE(log, "Found {} on {}", changelog_file, disk->getName()); auto [changelog_it, inserted] = existing_changelogs.insert_or_assign(file_description->from_log_index, std::move(file_description)); if (!inserted) LOG_WARNING(log, "Found duplicate entries for {}, will use the entry from {}", changelog_it->second->path, disk->getName()); } + + for (const auto & [name, path] : incomplete_files) + disk->removeFile(path); }; /// Load all files from old disks for (const auto & disk : keeper_context->getOldLogDisks()) - { load_from_disk(disk); - } auto disk = getDisk(); load_from_disk(disk); - auto current_log_disk = getCurrentLogDisk(); - if (disk != current_log_disk) - load_from_disk(current_log_disk); + auto latest_log_disk = getLatestLogDisk(); + if (disk != latest_log_disk) + load_from_disk(latest_log_disk); if (existing_changelogs.empty()) LOG_WARNING(log, "No logs exists in {}. It's Ok if it's the first run of clickhouse-keeper.", disk->getPath()); @@ -779,12 +828,12 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin else if (last_log_read_result.has_value()) { /// check if we need to move completed log to another disk - auto current_log_disk = getCurrentLogDisk(); + auto latest_log_disk = getLatestLogDisk(); auto disk = getDisk(); auto & description = existing_changelogs.at(last_log_read_result->log_start_index); - if (current_log_disk != disk && current_log_disk == description->disk) - moveFileBetweenDisks(current_log_disk, description, disk, description->path); + if (latest_log_disk != disk && latest_log_disk == description->disk) + moveFileBetweenDisks(latest_log_disk, description, disk, description->path); } /// Start new log if we don't initialize writer from previous log. All logs can be "complete". @@ -793,14 +842,14 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin /// Move files to correct disks auto latest_start_index = current_writer->getStartIndex(); - auto current_log_disk = getCurrentLogDisk(); + auto latest_log_disk = getLatestLogDisk(); auto disk = getDisk(); for (const auto & [start_index, description] : existing_changelogs) { - /// latest log should already be on current_log_disk + /// latest log should already be on latest_log_disk if (start_index == latest_start_index) { - chassert(description->disk == current_log_disk); + chassert(description->disk == latest_log_disk); continue; } @@ -825,9 +874,9 @@ void Changelog::initWriter(ChangelogFileDescriptionPtr description) LOG_TRACE(log, "Continue to write into {}", description->path); auto log_disk = description->disk; - auto current_log_disk = getCurrentLogDisk(); - if (log_disk != current_log_disk) - moveFileBetweenDisks(log_disk, description, current_log_disk, description->path); + auto latest_log_disk = getLatestLogDisk(); + if (log_disk != latest_log_disk) + moveFileBetweenDisks(log_disk, description, latest_log_disk, description->path); current_writer->setFile(std::move(description), WriteMode::Append); } @@ -855,9 +904,9 @@ DiskPtr Changelog::getDisk() const return keeper_context->getLogDisk(); } -DiskPtr Changelog::getCurrentLogDisk() const +DiskPtr Changelog::getLatestLogDisk() const { - return keeper_context->getCurrentLogDisk(); + return keeper_context->getLatestLogDisk(); } void Changelog::removeExistingLogs(ChangelogIter begin, ChangelogIter end) @@ -1044,9 +1093,9 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry) description = std::prev(index_changelog)->second; auto log_disk = description->disk; - auto current_log_disk = getCurrentLogDisk(); - if (log_disk != current_log_disk) - moveFileBetweenDisks(log_disk, description, current_log_disk, description->path); + auto latest_log_disk = getLatestLogDisk(); + if (log_disk != latest_log_disk) + moveFileBetweenDisks(log_disk, description, latest_log_disk, description->path); current_writer->setFile(std::move(description), WriteMode::Append); diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index 4054829ef19..d20eaa1666c 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -155,7 +155,7 @@ private: static ChangelogRecord buildRecord(uint64_t index, const LogEntryPtr & log_entry); DiskPtr getDisk() const; - DiskPtr getCurrentLogDisk() const; + DiskPtr getLatestLogDisk() const; /// Currently existing changelogs std::map existing_changelogs; diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index 1d6f1be9bfb..9d7e62ffae7 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -22,10 +22,10 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config) log_storage = getLogsPathFromConfig(config); - if (config.has("keeper_server.current_log_storage_disk")) - current_log_storage = config.getString("keeper_server.current_log_storage_disk"); + if (config.has("keeper_server.latest_log_storage_disk")) + latest_log_storage = config.getString("keeper_server.latest_log_storage_disk"); else - current_log_storage = log_storage; + latest_log_storage = log_storage; const auto collect_old_disk_names = [&](const std::string_view key_prefix, std::vector & disk_names) { @@ -34,7 +34,7 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config) for (const auto & key : disk_name_keys) { if (key.starts_with(key_prefix)) - disk_names.push_back(config.getString(fmt::format("keeper_server.{}", key_prefix))); + disk_names.push_back(config.getString(fmt::format("keeper_server.{}", key))); } }; @@ -96,15 +96,15 @@ std::vector KeeperContext::getOldLogDisks() const return old_log_disks; } -DiskPtr KeeperContext::getCurrentLogDisk() const +DiskPtr KeeperContext::getLatestLogDisk() const { - return getDisk(current_log_storage); + return getDisk(latest_log_storage); } void KeeperContext::setLogDisk(DiskPtr disk) { log_storage = disk; - current_log_storage = std::move(disk); + latest_log_storage = std::move(disk); } DiskPtr KeeperContext::getSnapshotDisk() const @@ -156,9 +156,9 @@ void KeeperContext::dumpConfiguration(WriteBufferFromOwnString & buf) const auto log_disk = getDisk(log_storage); dump_disk_info("log_storage", *log_disk); - auto current_log_disk = getDisk(current_log_storage); - if (log_disk != current_log_disk) - dump_disk_info("current_log_storage", *current_log_disk); + auto latest_log_disk = getDisk(latest_log_storage); + if (log_disk != latest_log_disk) + dump_disk_info("latest_log_storage", *latest_log_disk); } { diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index e41d8e35032..00607fd904e 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -34,7 +34,7 @@ public: bool digestEnabled() const; void setDigestEnabled(bool digest_enabled_); - DiskPtr getCurrentLogDisk() const; + DiskPtr getLatestLogDisk() const; DiskPtr getLogDisk() const; std::vector getOldLogDisks() const; void setLogDisk(DiskPtr disk); @@ -65,7 +65,7 @@ private: std::shared_ptr disk_selector; Storage log_storage; - Storage current_log_storage; + Storage latest_log_storage; Storage snapshot_storage; Storage state_file_storage; diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 6d1239c9210..a5b0f0b2c01 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -798,9 +798,9 @@ uint64_t KeeperDispatcher::getLogDirSize() const auto log_disk = keeper_context->getLogDisk(); auto size = getTotalSize(log_disk); - auto current_log_disk = keeper_context->getCurrentLogDisk(); - if (log_disk != current_log_disk) - size += getTotalSize(current_log_disk); + auto latest_log_disk = keeper_context->getLatestLogDisk(); + if (log_disk != latest_log_disk) + size += getTotalSize(latest_log_disk); return size; } From 6ed7a3b73feadcf5c3d72ed4a137fccd55e10fbb Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 26 May 2023 19:25:33 +0000 Subject: [PATCH 0194/1997] Fixing more tests. --- src/Interpreters/ActionsVisitor.cpp | 6 +++- src/Interpreters/DatabaseCatalog.cpp | 2 +- src/Interpreters/GlobalSubqueriesVisitor.h | 4 +++ src/Interpreters/PreparedSets.cpp | 7 +++- src/Interpreters/Set.cpp | 17 ++++++---- src/Interpreters/Set.h | 3 +- .../QueryPlan/ReadFromMergeTree.cpp | 27 ++++++++++++--- src/Processors/QueryPlan/ReadFromMergeTree.h | 3 ++ src/Storages/MergeTree/KeyCondition.cpp | 18 +++++++++- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 20 ++++------- .../MergeTree/MergeTreeDataSelectExecutor.h | 3 +- .../MergeTreeIndexConditionBloomFilter.cpp | 11 ++++++ src/Storages/MergeTree/PartitionPruner.cpp | 16 +++++++++ src/Storages/MergeTree/PartitionPruner.h | 1 + .../System/StorageSystemZooKeeper.cpp | 34 +++++++++++-------- 15 files changed, 127 insertions(+), 45 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 133fcbbfe87..23c6867a868 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1420,8 +1420,12 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool return data.prepared_sets->addFromStorage(set_key, storage_set->getSet()); } - if (auto tmp_table = data.getContext()->findExternalTable(table_id.getShortName())) + // std::cerr << ".... checking for " << identifier->getColumnName() << std::endl; + if (auto tmp_table = data.getContext()->findExternalTable(identifier->getColumnName())) + { external_table_set = tmp_table->future_set; + // std::cerr << "Found " << reinterpret_cast(tmp_table.get()) << " " << reinterpret_cast(external_table_set.get()) << std::endl; + } } /// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery. diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 8d3fa91a7fe..dccf2978f49 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -110,7 +110,7 @@ TemporaryTableHolder::TemporaryTableHolder( } TemporaryTableHolder::TemporaryTableHolder(TemporaryTableHolder && rhs) noexcept - : WithContext(rhs.context), temporary_tables(rhs.temporary_tables), id(rhs.id) + : WithContext(rhs.context), temporary_tables(rhs.temporary_tables), id(rhs.id), future_set(std::move(rhs.future_set)) { rhs.id = UUIDHelpers::Nil; } diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 195839da04f..8d2f2204e84 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -30,6 +30,7 @@ namespace DB namespace ErrorCodes { extern const int WRONG_GLOBAL_SUBQUERY; + extern const int LOGICAL_ERROR; } class GlobalSubqueriesMatcher @@ -159,6 +160,8 @@ public: /*create_for_global_subquery*/ true); StoragePtr external_storage = external_storage_holder->getTable(); + // std::cerr << "......... adding external table " << external_table_name << std::endl; + external_tables.emplace(external_table_name, external_storage_holder); /// We need to materialize external tables immediately because reading from distributed @@ -195,6 +198,7 @@ public: //std::cerr << reinterpret_cast(prepared_sets.get()) << std::endl; auto future_set = prepared_sets->addFromSubquery(set_key, std::move(subquery_for_set), nullptr); + // std::cerr << "... Future set " << reinterpret_cast(external_storage_holder.get()) << " " << reinterpret_cast(future_set.get()) << std::endl; external_storage_holder->future_set = std::move(future_set); } else diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index f2b70462227..c71273e0baa 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -239,6 +239,8 @@ SetPtr FutureSetFromSubquery::buildOrderedSetInplace(const ContextPtr & context) return nullptr; } + // std::cerr << "... external_table_set " << reinterpret_cast(external_table_set.get()) << std::endl; + if (external_table_set) return set = external_table_set->buildOrderedSetInplace(context); @@ -337,7 +339,7 @@ FutureSetFromTuple::FutureSetFromTuple(Block block, const Settings & settings) set_key_columns.filter = ColumnUInt8::create(block.rows()); - set->initSetElements(); + //set->initSetElements(); set->insertFromColumns(columns, set_key_columns); set->finishInsert(); //block(std::move(block_)) @@ -350,6 +352,9 @@ FutureSetFromStorage::FutureSetFromStorage(SetPtr set_) : set(std::move(set_)) { SetPtr FutureSetFromTuple::buildOrderedSetInplace(const ContextPtr & context) { + if (set->hasExplicitSetElements()) + return set; + const auto & settings = context->getSettingsRef(); auto limits = getSizeLimitsForSet(settings, true); diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 5adf3d07353..bd9cafc66eb 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -202,15 +202,14 @@ bool Set::insertFromColumns(const Columns & columns, SetKeyColumns & holder) if (data.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Method Set::setHeader must be called before Set::insertFromBlock"); - ColumnRawPtrs key_columns; - key_columns.reserve(keys_size); holder.key_columns.reserve(keys_size); + holder.materialized_columns.reserve(keys_size); /// Remember the columns we will work with for (size_t i = 0; i < keys_size; ++i) { - holder.key_columns.emplace_back(columns.at(i)->convertToFullIfNeeded()); - key_columns.emplace_back(holder.key_columns.back().get()); + holder.materialized_columns.emplace_back(columns.at(i)->convertToFullIfNeeded()); + holder.key_columns.emplace_back(holder.materialized_columns.back().get()); } size_t rows = columns.at(0)->size(); @@ -219,7 +218,7 @@ bool Set::insertFromColumns(const Columns & columns, SetKeyColumns & holder) ConstNullMapPtr null_map{}; ColumnPtr null_map_holder; if (!transform_null_in) - null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); + null_map_holder = extractNestedColumnsAndNullMap(holder.key_columns, null_map); switch (data.type) { @@ -227,7 +226,7 @@ bool Set::insertFromColumns(const Columns & columns, SetKeyColumns & holder) break; #define M(NAME) \ case SetVariants::Type::NAME: \ - insertFromBlockImpl(*data.NAME, key_columns, rows, data, null_map, holder.filter ? &holder.filter->getData() : nullptr); \ + insertFromBlockImpl(*data.NAME, holder.key_columns, rows, data, null_map, holder.filter ? &holder.filter->getData() : nullptr); \ break; APPLY_FOR_SET_VARIANTS(M) #undef M @@ -445,6 +444,11 @@ void Set::checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) c MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector && indexes_mapping_) : has_all_keys(set_elements.size() == indexes_mapping_.size()), indexes_mapping(std::move(indexes_mapping_)) { + // std::cerr << "MergeTreeSetIndex::MergeTreeSetIndex " + // << set_elements.size() << ' ' << indexes_mapping.size() << std::endl; + // for (const auto & vv : indexes_mapping) + // std::cerr << vv.key_index << ' ' << vv.tuple_index << std::endl; + ::sort(indexes_mapping.begin(), indexes_mapping.end(), [](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r) { @@ -487,6 +491,7 @@ MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector & key_ranges, const DataTypes & data_types, bool single_point) const { size_t tuple_size = indexes_mapping.size(); + // std::cerr << "MergeTreeSetIndex::checkInRange " << single_point << ' ' << tuple_size << ' ' << has_all_keys << std::endl; FieldValues left_point; FieldValues right_point; diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index cb47fde7f7d..c2931d79de0 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -53,7 +53,8 @@ public: { //ColumnRawPtrs key_columns; /// The constant columns to the right of IN are not supported directly. For this, they first materialize. - Columns key_columns; + ColumnRawPtrs key_columns; + Columns materialized_columns; ColumnPtr null_map_holder; ColumnUInt8::MutablePtr filter; }; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index acd9147b613..4940cbd032c 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1174,6 +1174,7 @@ static ActionsDAGPtr buildFilterDAG( static void buildIndexes( std::optional & indexes, ActionsDAGPtr filter_actions_dag, + const MergeTreeData & data, const ContextPtr & context, const SelectQueryInfo & query_info, const StorageMetadataPtr & metadata_snapshot) @@ -1196,7 +1197,7 @@ static void buildIndexes( context, primary_key_column_names, primary_key.expression, - array_join_name_set}, {}, false}); + array_join_name_set}, {}, {}, {}, false}); } else { @@ -1204,7 +1205,22 @@ static void buildIndexes( query_info, context, primary_key_column_names, - primary_key.expression}, {}, false}); + primary_key.expression}, {}, {}, {}, false}); + } + + if (metadata_snapshot->hasPartitionKey()) + { + const auto & partition_key = metadata_snapshot->getPartitionKey(); + auto minmax_columns_names = data.getMinMaxColumnsNames(partition_key); + auto minmax_expression_actions = data.getMinMaxExpr(partition_key, ExpressionActionsSettings::fromContext(context)); + // minmax_columns_types = data.getMinMaxColumnsTypes(partition_key); + + // if (context->getSettingsRef().allow_experimental_analyzer) + indexes->minmax_idx_condition.emplace(filter_actions_dag, context, minmax_columns_names, minmax_expression_actions, NameSet()); + // else + // indexes->minmax_idx_condition.emplace(query_info, context, minmax_columns_names, minmax_expression_actions); + + indexes->partition_pruner.emplace(metadata_snapshot, filter_actions_dag, context, false /* strict */); } indexes->use_skip_indexes = settings.use_skip_indexes; @@ -1250,7 +1266,7 @@ void ReadFromMergeTree::onAddFilterFinish() if (!filter_nodes.nodes.empty()) { auto filter_actions_dag = buildFilterDAG(context, prewhere_info, filter_nodes, query_info); - buildIndexes(indexes, filter_actions_dag, context, query_info, metadata_for_reading); + buildIndexes(indexes, filter_actions_dag, data, context, query_info, metadata_for_reading); } } @@ -1366,7 +1382,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( // } if (!indexes) - buildIndexes(indexes, query_info.filter_actions_dag, context, query_info, metadata_snapshot); + buildIndexes(indexes, query_info.filter_actions_dag, data, context, query_info, metadata_snapshot); if (settings.force_primary_key && indexes->key_condition.alwaysUnknownOrTrue()) { @@ -1386,11 +1402,12 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( try { MergeTreeDataSelectExecutor::filterPartsByPartition( + indexes->partition_pruner, + indexes->minmax_idx_condition, parts, part_values, metadata_snapshot_base, data, - query_info, context, max_block_numbers_to_read.get(), log, diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 6610b463726..5ff9d2f046c 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB { @@ -164,6 +165,8 @@ public: struct Indexes { KeyCondition key_condition; + std::optional partition_pruner; + std::optional minmax_idx_condition; UsefulSkipIndexes skip_indexes; bool use_skip_indexes; }; diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 7fcc111ced9..fe87198dcf5 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1157,6 +1157,8 @@ bool KeyCondition::tryPrepareSetIndex( RPNElement & out, size_t & out_key_column_num) { + // std::cerr << "::: tryPrepareSetIndex for " << func.getColumnName() << std::endl; + // std::cerr << StackTrace().toString() << std::endl; const auto & left_arg = func.getArgumentAt(0); out_key_column_num = 0; @@ -1200,7 +1202,10 @@ bool KeyCondition::tryPrepareSetIndex( } if (indexes_mapping.empty()) + { + // std::cerr << ".. index mapping is empty\n"; return false; + } const auto right_arg = func.getArgumentAt(1); @@ -1208,7 +1213,10 @@ bool KeyCondition::tryPrepareSetIndex( auto future_set = right_arg.tryGetPreparedSet(indexes_mapping, data_types); if (!future_set) + { + // std::cerr << ".. no future set\n"; return false; + } // LOG_TRACE(&Poco::Logger::get("KK"), "Found set for {}", right_arg.getColumnName()); @@ -1220,13 +1228,21 @@ bool KeyCondition::tryPrepareSetIndex( auto prepared_set = future_set->get(); if (!prepared_set) + { + + // std::cerr << ".. no prepared set\n"; return false; + } // LOG_TRACE(&Poco::Logger::get("KK"), "Set if ready for {}", right_arg.getColumnName()); /// The index can be prepared if the elements of the set were saved in advance. if (!prepared_set->hasExplicitSetElements()) + { + + // std::cerr << ".. no explicit elements\n"; return false; + } // LOG_TRACE(&Poco::Logger::get("KK"), "Has explicit elements for {}", right_arg.getColumnName()); @@ -1235,7 +1251,7 @@ bool KeyCondition::tryPrepareSetIndex( prepared_set->checkTypesEqual(indexes_mapping[i].tuple_index, data_types[i]); out.set_index = std::make_shared(prepared_set->getSetElements(), std::move(indexes_mapping)); - + // std::cerr << ".. can use\n"; return true; } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index f99e15c0fc1..f42fada4222 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -793,38 +793,28 @@ std::optional> MergeTreeDataSelectExecutor::filterPar } void MergeTreeDataSelectExecutor::filterPartsByPartition( + std::optional & partition_pruner, + std::optional & minmax_idx_condition, MergeTreeData::DataPartsVector & parts, const std::optional> & part_values, const StorageMetadataPtr & metadata_snapshot, const MergeTreeData & data, - const SelectQueryInfo & query_info, const ContextPtr & context, const PartitionIdToMaxBlock * max_block_numbers_to_read, Poco::Logger * log, ReadFromMergeTree::IndexStats & index_stats) { const Settings & settings = context->getSettingsRef(); - - std::optional partition_pruner; - std::optional minmax_idx_condition; DataTypes minmax_columns_types; if (metadata_snapshot->hasPartitionKey()) { const auto & partition_key = metadata_snapshot->getPartitionKey(); - auto minmax_columns_names = data.getMinMaxColumnsNames(partition_key); - auto minmax_expression_actions = data.getMinMaxExpr(partition_key, ExpressionActionsSettings::fromContext(context)); minmax_columns_types = data.getMinMaxColumnsTypes(partition_key); - if (context->getSettingsRef().allow_experimental_analyzer) - minmax_idx_condition.emplace(query_info.filter_actions_dag, context, minmax_columns_names, minmax_expression_actions, NameSet()); - else - minmax_idx_condition.emplace(query_info, context, minmax_columns_names, minmax_expression_actions); - - partition_pruner.emplace(metadata_snapshot, query_info, context, false /* strict */); - if (settings.force_index_by_date && (minmax_idx_condition->alwaysUnknownOrTrue() && partition_pruner->isUseless())) { + auto minmax_columns_names = data.getMinMaxColumnsNames(partition_key); throw Exception(ErrorCodes::INDEX_NOT_USED, "Neither MinMax index by columns ({}) nor partition expr is used and setting 'force_index_by_date' is set", fmt::join(minmax_columns_names, ", ")); @@ -1835,7 +1825,9 @@ void MergeTreeDataSelectExecutor::selectPartsToRead( if (partition_pruner) { - if (partition_pruner->canBePruned(*part)) + auto val = partition_pruner->canBePruned(*part); + // std::cerr << "... part " << part->getNameWithState() << " cbp ? " << val << std::endl; + if (val) continue; } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 8c8ce59bebe..f1efbdf0310 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -173,11 +173,12 @@ public: /// Filter parts using minmax index and partition key. static void filterPartsByPartition( + std::optional & partition_pruner, + std::optional & minmax_idx_condition, MergeTreeData::DataPartsVector & parts, const std::optional> & part_values, const StorageMetadataPtr & metadata_snapshot, const MergeTreeData & data, - const SelectQueryInfo & query_info, const ContextPtr & context, const PartitionIdToMaxBlock * max_block_numbers_to_read, Poco::Logger * log, diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index 3d0883a1241..e2bf9bde674 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -31,12 +31,21 @@ namespace ColumnWithTypeAndName getPreparedSetInfo(const ConstSetPtr & prepared_set) { + // std::cerr << "====== " << prepared_set->getDataTypes().size() << std::endl; if (prepared_set->getDataTypes().size() == 1) return {prepared_set->getSetElements()[0], prepared_set->getElementsTypes()[0], "dummy"}; Columns set_elements; for (auto & set_element : prepared_set->getSetElements()) + { + // std::cerr << set_element->dumpStructure() << std::endl; set_elements.emplace_back(set_element->convertToFullColumnIfConst()); + } + + // for (auto & set_element : prepared_set->getElementsTypes()) + // { + // // std::cerr << set_element->getName() << std::endl; + // } return {ColumnTuple::create(set_elements), std::make_shared(prepared_set->getElementsTypes()), "dummy"}; } @@ -331,6 +340,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseFunction(const RPNBuilderTreeNo if (prepared_set && prepared_set->hasExplicitSetElements()) { const auto prepared_info = getPreparedSetInfo(prepared_set); + // std::cerr << "...... " << prepared_info.dumpStructure() << std::endl; if (traverseTreeIn(function_name, lhs_argument, prepared_set, prepared_info.type, prepared_info.column, out)) maybe_useful = true; } @@ -377,6 +387,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeIn( size_t row_size = column->size(); size_t position = header.getPositionByName(key_node_column_name); const DataTypePtr & index_type = header.getByPosition(position).type; + // std::cerr << "::::: " << ColumnWithTypeAndName{column, type, ""}.dumpStructure() << " -> " << index_type->getName() << std::endl; const auto & converted_column = castColumn(ColumnWithTypeAndName{column, type, ""}, index_type); out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(index_type, converted_column, 0, row_size))); diff --git a/src/Storages/MergeTree/PartitionPruner.cpp b/src/Storages/MergeTree/PartitionPruner.cpp index 35b2d5db3b5..a397a1475d1 100644 --- a/src/Storages/MergeTree/PartitionPruner.cpp +++ b/src/Storages/MergeTree/PartitionPruner.cpp @@ -24,6 +24,19 @@ PartitionPruner::PartitionPruner(const StorageMetadataPtr & metadata, const Sele { } +PartitionPruner::PartitionPruner(const StorageMetadataPtr & metadata, ActionsDAGPtr filter_actions_dag, ContextPtr context, bool strict) + : partition_key(MergeTreePartition::adjustPartitionKey(metadata, context)) + , partition_condition(filter_actions_dag, context, partition_key.column_names, partition_key.expression, {}, true /* single_point */, strict) + , useless(strict ? partition_condition.anyUnknownOrAlwaysTrue() : partition_condition.alwaysUnknownOrTrue()) +{ + // auto description = getKeyCondition().getDescription(); + // std::cerr << ".... " << description.condition << std::endl; + // std::cerr << filter_actions_dag->dumpDAG() << std::endl; + // for (const auto & name : partition_key.column_names) + // std::cerr << ". " << name << std::endl; + // std::cerr << partition_key.expression->dumpActions() << std::endl; +} + bool PartitionPruner::canBePruned(const IMergeTreeDataPart & part) { if (part.isEmpty()) @@ -39,6 +52,8 @@ bool PartitionPruner::canBePruned(const IMergeTreeDataPart & part) else { const auto & partition_value = part.partition.value; + // for (const auto & val : partition_value) + // std::cerr << val.dump() << std::endl; std::vector index_value(partition_value.begin(), partition_value.end()); for (auto & field : index_value) { @@ -49,6 +64,7 @@ bool PartitionPruner::canBePruned(const IMergeTreeDataPart & part) is_valid = partition_condition.mayBeTrueInRange( partition_value.size(), index_value.data(), index_value.data(), partition_key.data_types); + // std::cerr << "may be true " << is_valid << std::endl; partition_filter_map.emplace(partition_id, is_valid); if (!is_valid) diff --git a/src/Storages/MergeTree/PartitionPruner.h b/src/Storages/MergeTree/PartitionPruner.h index 3a986923321..7f1b74795c4 100644 --- a/src/Storages/MergeTree/PartitionPruner.h +++ b/src/Storages/MergeTree/PartitionPruner.h @@ -14,6 +14,7 @@ class PartitionPruner { public: PartitionPruner(const StorageMetadataPtr & metadata, const SelectQueryInfo & query_info, ContextPtr context, bool strict); + PartitionPruner(const StorageMetadataPtr & metadata, ActionsDAGPtr filter_actions_dag, ContextPtr context, bool strict); bool canBePruned(const IMergeTreeDataPart & part); diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 34b463eadee..1e89427071c 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -161,6 +161,17 @@ public: } }; +/// Type of path to be fetched +enum class ZkPathType +{ + Exact, /// Fetch all nodes under this path + Prefix, /// Fetch all nodes starting with this prefix, recursively (multiple paths may match prefix) + Recurse, /// Fatch all nodes under this path, recursively +}; + +/// List of paths to be feched from zookeeper +using Paths = std::deque>; + class ReadFromSystemZooKeeper final : public SourceStepWithFilter { public: @@ -170,11 +181,14 @@ public: void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) override; + void onAddFilterFinish() override; + private: - void fillData(MutableColumns & res_columns) const; + void fillData(MutableColumns & res_columns); std::shared_ptr storage_limits; ContextPtr context; + Paths paths; }; StorageSystemZooKeeper::StorageSystemZooKeeper(const StorageID & table_id_) @@ -246,17 +260,6 @@ NamesAndTypesList StorageSystemZooKeeper::getNamesAndTypes() }; } -/// Type of path to be fetched -enum class ZkPathType -{ - Exact, /// Fetch all nodes under this path - Prefix, /// Fetch all nodes starting with this prefix, recursively (multiple paths may match prefix) - Recurse, /// Fatch all nodes under this path, recursively -}; - -/// List of paths to be feched from zookeeper -using Paths = std::deque>; - static String pathCorrected(const String & path) { String path_corrected; @@ -421,10 +424,13 @@ static Paths extractPath(const ActionsDAG::NodeRawConstPtrs & filter_nodes, Cont } -void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns) const +void ReadFromSystemZooKeeper::onAddFilterFinish() { - Paths paths = extractPath(getFilterNodes().nodes, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper); + paths = extractPath(getFilterNodes().nodes, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper); +} +void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns) +{ zkutil::ZooKeeperPtr zookeeper = context->getZooKeeper(); if (paths.empty()) From d4cec1f0e0fa114f5e04dead6b90f51b3e292310 Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 30 May 2023 01:30:01 +0200 Subject: [PATCH 0195/1997] fix client thread attachment + make better tests --- programs/client/Client.cpp | 1 + src/Client/ClientBase.cpp | 3 --- src/Client/Connection.cpp | 4 --- src/Common/ThreadStatus.h | 3 ++- src/DataTypes/DataTypeDateTime.h | 4 ++- src/Server/TCPHandler.cpp | 2 +- .../02681_timezone_setting.reference | 5 ---- .../0_stateless/02681_timezone_setting.sql | 11 -------- .../02737_timezone_setting.reference | 7 +++++ .../0_stateless/02737_timezone_setting.sql | 27 +++++++++++++++++++ 10 files changed, 41 insertions(+), 26 deletions(-) delete mode 100644 tests/queries/0_stateless/02681_timezone_setting.reference delete mode 100644 tests/queries/0_stateless/02681_timezone_setting.sql create mode 100644 tests/queries/0_stateless/02737_timezone_setting.reference create mode 100644 tests/queries/0_stateless/02737_timezone_setting.sql diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 6b34bdbc5bb..231b7fd6d61 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -326,6 +326,7 @@ try // All that just to set DB::CurrentThread::get().getGlobalContext() // which is required for client timezone (pushed from server) to work. auto thread_group = std::make_shared(); + thread_group->global_context = global_context; thread_status.attachToGroup(thread_group, false); } diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index fad9494ba4b..562c11680a1 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -448,9 +448,7 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query) /// output_format, do not output it. /// Also do not output too much data if we're fuzzing. if (block.rows() == 0 || (query_fuzzer_runs != 0 && processed_rows >= 100)) - { return; - } /// If results are written INTO OUTFILE, we can avoid clearing progress to avoid flicker. if (need_render_progress && tty_buf && (!select_into_file || select_into_file_and_stdout)) @@ -902,7 +900,6 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa if (send_external_tables) sendExternalTables(parsed_query); - receiveResult(parsed_query, signals_before_stop, settings.partial_result_on_first_cancel); break; diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 86585d805d9..457d90c5bd4 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -355,10 +355,6 @@ void Connection::receiveHello() nonce.emplace(read_nonce); } } -// else if (packet_type == Protocol::Server::TimezoneUpdate) -// { -// // skip this packet at hello, will receive and process it later -// } else if (packet_type == Protocol::Server::Exception) receiveException()->rethrow(); else diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 600dfc56d2b..400b55c2409 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -72,7 +72,8 @@ public: /// Set up at creation, no race when reading const ContextWeakPtr query_context; - const ContextWeakPtr global_context; + /// Cannot make it const -- we need to modify it in ch-client to process timezone from server + ContextWeakPtr global_context; const FatalErrorCallback fatal_error_callback; diff --git a/src/DataTypes/DataTypeDateTime.h b/src/DataTypes/DataTypeDateTime.h index 91a09ff7cb9..a4a05917ba5 100644 --- a/src/DataTypes/DataTypeDateTime.h +++ b/src/DataTypes/DataTypeDateTime.h @@ -21,7 +21,9 @@ namespace DB * all types with different time zones are equivalent and may be used interchangingly. * Time zone only affects parsing and displaying in text formats. * - * If time zone is not specified (example: DateTime without parameter), then default time zone is used. + * If time zone is not specified (example: DateTime without parameter), + * then `session_timezone` setting value is used. + * If `session_timezone` is not set (or empty string), server default time zone is used. * Default time zone is server time zone, if server is doing transformations * and if client is doing transformations, unless 'use_client_time_zone' setting is passed to client; * Server time zone is the time zone specified in 'timezone' parameter in configuration file, diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index d57a1c93dd7..c41eace68ba 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1068,7 +1068,7 @@ void TCPHandler::sendTimezone() if (client_tcp_protocol_version < DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES) return; - const String & tz = query_context->getSettingsRef().session_timezone.toString(); + const String & tz = query_context->getSettingsRef().session_timezone.value; LOG_DEBUG(log, "TCPHandler::sendTimezone(): {}", tz); writeVarUInt(Protocol::Server::TimezoneUpdate, *out); diff --git a/tests/queries/0_stateless/02681_timezone_setting.reference b/tests/queries/0_stateless/02681_timezone_setting.reference deleted file mode 100644 index 8850d77ab03..00000000000 --- a/tests/queries/0_stateless/02681_timezone_setting.reference +++ /dev/null @@ -1,5 +0,0 @@ -2022-12-12 17:23:23.123 -2022-12-12 23:23:23.123 -2022-12-12 22:23:23.123 -Europe/Zurich Europe/Zurich -Pacific/Pitcairn Pacific/Pitcairn diff --git a/tests/queries/0_stateless/02681_timezone_setting.sql b/tests/queries/0_stateless/02681_timezone_setting.sql deleted file mode 100644 index f66e8d2b646..00000000000 --- a/tests/queries/0_stateless/02681_timezone_setting.sql +++ /dev/null @@ -1,11 +0,0 @@ -SET session_timezone = 'Абырвалг'; -- { serverError BAD_ARGUMENTS} - -SET session_timezone = 'Asia/Novosibirsk'; -SELECT toDateTime64(toDateTime64('2022-12-12 23:23:23.123', 3), 3, 'Europe/Zurich'); -SELECT toDateTime64(toDateTime64('2022-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS session_timezone = 'Europe/Zurich'; - -SET session_timezone = 'Asia/Manila'; -SELECT toDateTime64(toDateTime64('2022-12-12 23:23:23.123', 3), 3, 'Asia/Novosibirsk'); - -SELECT timezone(), timezoneOf(now()) SETTINGS session_timezone = 'Europe/Zurich' FORMAT TSV; -SELECT timezone(), timezoneOf(now()) SETTINGS session_timezone = 'Pacific/Pitcairn' FORMAT TSV; diff --git a/tests/queries/0_stateless/02737_timezone_setting.reference b/tests/queries/0_stateless/02737_timezone_setting.reference new file mode 100644 index 00000000000..578aec4e316 --- /dev/null +++ b/tests/queries/0_stateless/02737_timezone_setting.reference @@ -0,0 +1,7 @@ +Pacific/Pitcairn Pacific/Pitcairn +Asia/Novosibirsk Asia/Novosibirsk +2022-12-12 17:23:23 +2022-12-13 07:23:23.123 +2002-12-12 23:23:23 2002-12-12 23:23:23 +2002-12-12 23:23:23.123 2002-12-12 23:23:23.123 +2000-01-01 01:00:00 diff --git a/tests/queries/0_stateless/02737_timezone_setting.sql b/tests/queries/0_stateless/02737_timezone_setting.sql new file mode 100644 index 00000000000..87eeec0779b --- /dev/null +++ b/tests/queries/0_stateless/02737_timezone_setting.sql @@ -0,0 +1,27 @@ +SET session_timezone = 'Абырвалг'; -- { serverError BAD_ARGUMENTS} + +SELECT timezone(), timezoneOf(now()) SETTINGS session_timezone = 'Pacific/Pitcairn'; + +SET session_timezone = 'Asia/Novosibirsk'; +SELECT timezone(), timezoneOf(now()); + +-- test simple queries +SELECT toDateTime(toDateTime('2022-12-12 23:23:23'), 'Europe/Zurich'); +SELECT toDateTime64(toDateTime64('2022-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS session_timezone = 'America/Denver'; + +-- test proper serialization +SELECT toDateTime('2002-12-12 23:23:23'), toString(toDateTime('2002-12-12 23:23:23')) SETTINGS session_timezone = 'Asia/Phnom_Penh'; +SELECT toDateTime64('2002-12-12 23:23:23.123', 3), toString(toDateTime64('2002-12-12 23:23:23.123', 3)) SETTINGS session_timezone = 'Asia/Phnom_Penh'; + +-- Create a table and test that DateTimes are processed correctly on insert +SET session_timezone='Asia/Novosibirsk'; +CREATE TABLE test_tz_setting (d DateTime('UTC')) Engine=Memory AS SELECT toDateTime('2000-01-01 00:00:00'); +INSERT INTO test_tz_setting VALUES ('2000-01-01 01:00:00'); -- this is parsed using timezone from `d` column +INSERT INTO test_tz_setting VALUES (toDateTime('2000-01-02 02:00:00')); -- this is parsed using `session_timezone` + +-- Test parsing in WHERE filter, shall have the same logic as insert +SELECT d FROM test_tz_setting WHERE d == '2000-01-01 01:00:00'; -- 1 row expected +SELECT d FROM test_tz_setting WHERE d == toDateTime('2000-01-01 02:00:00'); -- 0 rows expected + +-- Cleanup table +DROP TABLE test_tz_setting SYNC; \ No newline at end of file From 21aba94909e2ab7fe357c30d694af8674f81dbd0 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 30 May 2023 13:22:40 +0000 Subject: [PATCH 0196/1997] Add support for latest snapshot disk --- src/Coordination/KeeperContext.cpp | 11 ++ src/Coordination/KeeperContext.h | 2 + src/Coordination/KeeperSnapshotManager.cpp | 143 ++++++++++++++------- src/Coordination/KeeperSnapshotManager.h | 4 + src/Coordination/KeeperStateMachine.cpp | 8 +- 5 files changed, 119 insertions(+), 49 deletions(-) diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index 9d7e62ffae7..5ec86c827b8 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -43,6 +43,11 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config) snapshot_storage = getSnapshotsPathFromConfig(config); + if (config.has("keeper_server.latest_snapshot_storage_disk")) + latest_snapshot_storage = config.getString("keeper_server.latest_snapshot_storage_disk"); + else + latest_snapshot_storage = snapshot_storage; + state_file_storage = getStatePathFromConfig(config); } @@ -107,6 +112,11 @@ void KeeperContext::setLogDisk(DiskPtr disk) latest_log_storage = std::move(disk); } +DiskPtr KeeperContext::getLatestSnapshotDisk() const +{ + return getDisk(latest_snapshot_storage); +} + DiskPtr KeeperContext::getSnapshotDisk() const { return getDisk(snapshot_storage); @@ -126,6 +136,7 @@ std::vector KeeperContext::getOldSnapshotDisks() const void KeeperContext::setSnapshotDisk(DiskPtr disk) { snapshot_storage = std::move(disk); + latest_snapshot_storage = snapshot_storage; } DiskPtr KeeperContext::getStateFileDisk() const diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index 00607fd904e..b04afd2a57b 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -39,6 +39,7 @@ public: std::vector getOldLogDisks() const; void setLogDisk(DiskPtr disk); + DiskPtr getLatestSnapshotDisk() const; DiskPtr getSnapshotDisk() const; std::vector getOldSnapshotDisks() const; void setSnapshotDisk(DiskPtr disk); @@ -67,6 +68,7 @@ private: Storage log_storage; Storage latest_log_storage; Storage snapshot_storage; + Storage latest_snapshot_storage; Storage state_file_storage; std::vector old_log_disk_names; diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index c3371501976..4b9a34c07e9 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -16,6 +16,7 @@ #include #include #include +#include "Core/Field.h" #include @@ -31,6 +32,24 @@ namespace ErrorCodes namespace { + constexpr std::string_view tmp_prefix = "tmp_"; + + void moveFileBetweenDisks(DiskPtr disk_from, const std::string & path_from, DiskPtr disk_to, const std::string & path_to) + { + /// we use empty file with prefix tmp_ to detect incomplete copies + /// if a copy is complete we don't care from which disk we use the same file + /// so it's okay if a failure happens after removing of tmp file but before we remove + /// the snapshot from the source disk + auto from_path = fs::path(path_from); + auto tmp_snapshot_name = from_path.parent_path() / (std::string{tmp_prefix} + from_path.filename().string()); + { + disk_to->writeFile(tmp_snapshot_name); + } + disk_from->copyFile(from_path, *disk_to, path_to, {}); + disk_to->removeFile(tmp_snapshot_name); + disk_from->removeFile(path_from); + } + uint64_t getSnapshotPathUpToLogIdx(const String & snapshot_path) { std::filesystem::path path(snapshot_path); @@ -519,43 +538,48 @@ KeeperSnapshotManager::KeeperSnapshotManager( , storage_tick_time(storage_tick_time_) , keeper_context(keeper_context_) { - const auto load_snapshot_from_disk = [&](DiskPtr disk) + const auto load_snapshot_from_disk = [&](const auto & disk) { - std::unordered_set invalid_snapshots; - /// collect invalid snapshots + LOG_TRACE(log, "Reading from disk {}", disk->getName()); + std::unordered_map incomplete_files; + + const auto clean_incomplete_file = [&](const auto & file_path) + { + if (auto incomplete_it = incomplete_files.find(fs::path(file_path).filename()); incomplete_it != incomplete_files.end()) + { + LOG_TRACE(log, "Removing {} from {}", file_path, disk->getName()); + disk->removeFile(file_path); + disk->removeFile(incomplete_it->second); + incomplete_files.erase(incomplete_it); + return true; + } + + return false; + }; + + std::vector snapshot_files; for (auto it = disk->iterateDirectory(""); it->isValid(); it->next()) { - const auto & name = it->name(); - if (name.empty()) - continue; - - if (startsWith(name, "tmp_")) + if (it->name().starts_with(tmp_prefix)) { - disk->removeFile(it->path()); - invalid_snapshots.insert(name.substr(4)); + incomplete_files.emplace(it->name().substr(tmp_prefix.size()), it->path()); continue; } + + if (clean_incomplete_file(it->path())) + continue; + + snapshot_files.push_back(it->path()); } - /// process snapshots - for (auto it = disk->iterateDirectory(""); it->isValid(); it->next()) + for (const auto & snapshot_file : snapshot_files) { - const auto & name = it->name(); - if (name.empty()) + if (clean_incomplete_file(fs::path(snapshot_file).filename())) continue; - /// Not snapshot file - if (!startsWith(name, "snapshot_")) - continue; - - if (invalid_snapshots.contains(name)) - { - disk->removeFile(it->path()); - continue; - } - - size_t snapshot_up_to = getSnapshotPathUpToLogIdx(name); - auto [_, inserted] = existing_snapshots.insert_or_assign(snapshot_up_to, SnapshotFileInfo{it->path(), disk}); + LOG_TRACE(log, "Found {} on {}", snapshot_file, disk->getName()); + size_t snapshot_up_to = getSnapshotPathUpToLogIdx(snapshot_file); + auto [_, inserted] = existing_snapshots.insert_or_assign(snapshot_up_to, SnapshotFileInfo{snapshot_file, disk}); if (!inserted) LOG_WARNING( @@ -564,6 +588,9 @@ KeeperSnapshotManager::KeeperSnapshotManager( snapshot_up_to, disk->getName()); } + + for (const auto & [name, path] : incomplete_files) + disk->removeFile(path); }; for (const auto & disk : keeper_context->getOldSnapshotDisks()) @@ -572,25 +599,12 @@ KeeperSnapshotManager::KeeperSnapshotManager( auto disk = getDisk(); load_snapshot_from_disk(disk); + auto latest_snapshot_disk = getLatestSnapshotDisk(); + if (latest_snapshot_disk != disk) + load_snapshot_from_disk(latest_snapshot_disk); + removeOutdatedSnapshotsIfNeeded(); - - /// move snapshots from old disks to new one - for (auto & [_, file_info] : existing_snapshots) - { - if (file_info.disk == disk) - continue; - - auto file_path = fs::path(file_info.path); - auto tmp_snapshot_path = file_path.parent_path() / ("tmp_" + file_path.filename().generic_string()); - - { - disk->writeFile(tmp_snapshot_path); - } - - file_info.disk->copyFile(file_info.path, *disk, file_info.path, {}); - disk->removeFile(tmp_snapshot_path); - file_info.disk = disk; - } + moveSnapshotsIfNeeded(); } SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx) @@ -600,7 +614,7 @@ SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::bu auto snapshot_file_name = getSnapshotFileName(up_to_log_idx, compress_snapshots_zstd); auto tmp_snapshot_file_name = "tmp_" + snapshot_file_name; - auto disk = getDisk(); + auto disk = getLatestSnapshotDisk(); { disk->writeFile(tmp_snapshot_file_name); @@ -614,6 +628,7 @@ SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::bu existing_snapshots.emplace(up_to_log_idx, SnapshotFileInfo{snapshot_file_name, disk}); removeOutdatedSnapshotsIfNeeded(); + moveSnapshotsIfNeeded(); return {snapshot_file_name, disk}; } @@ -710,12 +725,47 @@ DiskPtr KeeperSnapshotManager::getDisk() const return keeper_context->getSnapshotDisk(); } +DiskPtr KeeperSnapshotManager::getLatestSnapshotDisk() const +{ + return keeper_context->getLatestSnapshotDisk(); +} + void KeeperSnapshotManager::removeOutdatedSnapshotsIfNeeded() { while (existing_snapshots.size() > snapshots_to_keep) removeSnapshot(existing_snapshots.begin()->first); } +void KeeperSnapshotManager::moveSnapshotsIfNeeded() +{ + /// move snapshots to correct disks + + auto disk = getDisk(); + auto latest_snapshot_disk = getLatestSnapshotDisk(); + auto latest_snapshot_idx = getLatestSnapshotIndex(); + + for (auto & [idx, file_info] : existing_snapshots) + { + if (idx == latest_snapshot_idx) + { + if (file_info.disk != latest_snapshot_disk) + { + moveFileBetweenDisks(file_info.disk, file_info.path, latest_snapshot_disk, file_info.path); + file_info.disk = latest_snapshot_disk; + } + } + else + { + if (file_info.disk != disk) + { + moveFileBetweenDisks(file_info.disk, file_info.path, disk, file_info.path); + file_info.disk = disk; + } + } + } + +} + void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx) { auto itr = existing_snapshots.find(log_idx); @@ -732,7 +782,7 @@ SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStor auto snapshot_file_name = getSnapshotFileName(up_to_log_idx, compress_snapshots_zstd); auto tmp_snapshot_file_name = "tmp_" + snapshot_file_name; - auto disk = getDisk(); + auto disk = getLatestSnapshotDisk(); { disk->writeFile(tmp_snapshot_file_name); } @@ -752,6 +802,7 @@ SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStor existing_snapshots.emplace(up_to_log_idx, SnapshotFileInfo{snapshot_file_name, disk}); removeOutdatedSnapshotsIfNeeded(); + moveSnapshotsIfNeeded(); return {snapshot_file_name, disk}; } diff --git a/src/Coordination/KeeperSnapshotManager.h b/src/Coordination/KeeperSnapshotManager.h index 0afe582ef59..9bb287b9276 100644 --- a/src/Coordination/KeeperSnapshotManager.h +++ b/src/Coordination/KeeperSnapshotManager.h @@ -165,8 +165,10 @@ public: private: void removeOutdatedSnapshotsIfNeeded(); + void moveSnapshotsIfNeeded(); DiskPtr getDisk() const; + DiskPtr getLatestSnapshotDisk() const; /// Checks first 4 buffer bytes to became sure that snapshot compressed with /// ZSTD codec. @@ -184,6 +186,8 @@ private: size_t storage_tick_time; KeeperContextPtr keeper_context; + + Poco::Logger * log = &Poco::Logger::get("KeeperSnapshotManager"); }; /// Keeper create snapshots in background thread. KeeperStateMachine just create diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index e7cd409f569..a8c0d8d1518 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -468,15 +468,17 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res { latest_snapshot_meta = snapshot->snapshot_meta; /// we rely on the fact that the snapshot disk cannot be changed during runtime - if (isLocalDisk(*keeper_context->getSnapshotDisk())) + if (isLocalDisk(*keeper_context->getLatestSnapshotDisk())) { - latest_snapshot_info = snapshot_manager.serializeSnapshotToDisk(*snapshot); + auto snapshot_info = snapshot_manager.serializeSnapshotToDisk(*snapshot); + latest_snapshot_info = std::move(snapshot_info); latest_snapshot_buf = nullptr; } else { auto snapshot_buf = snapshot_manager.serializeSnapshotToBuffer(*snapshot); - latest_snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk(*snapshot_buf, snapshot->snapshot_meta->get_last_log_idx()); + auto snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk(*snapshot_buf, snapshot->snapshot_meta->get_last_log_idx()); + latest_snapshot_info = std::move(snapshot_info); latest_snapshot_buf = std::move(snapshot_buf); } From e91934bceaea3809feb0e2e52532b9eeb35ad7f2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 30 May 2023 16:02:14 +0000 Subject: [PATCH 0197/1997] Fixing style. --- src/Interpreters/GlobalSubqueriesVisitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 8d2f2204e84..fa4fc2c82df 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -202,7 +202,7 @@ public: external_storage_holder->future_set = std::move(future_set); } else - throw Exception(ErrorCodes::LOGICAL_ERROR, "!!!!!!!!"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Set is already created for GLOBAL IN"); //prepared_sets->addStorageToSubquery(key, std::move(external_storage)); } From c8bb1f64ad21dea5ba63fa8f2ea8434d90f9e823 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 30 May 2023 18:46:49 +0200 Subject: [PATCH 0198/1997] fix --- src/Storages/StorageReplicatedMergeTree.cpp | 3 +++ tests/integration/test_lost_part/test.py | 12 ++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e71f5217c2b..35f75880ced 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5469,6 +5469,7 @@ void StorageReplicatedMergeTree::alter( if (mutation_znode) { LOG_DEBUG(log, "Metadata changes applied. Will wait for data changes."); + merge_selecting_task->schedule(); waitMutation(*mutation_znode, query_context->getSettingsRef().alter_sync); LOG_DEBUG(log, "Data changes applied."); } @@ -6620,6 +6621,8 @@ void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, Conte throw Coordination::Exception("Unable to create a mutation znode", rc); } + merge_selecting_task->schedule(); + waitMutation(mutation_entry.znode_name, query_context->getSettingsRef().mutations_sync); } diff --git a/tests/integration/test_lost_part/test.py b/tests/integration/test_lost_part/test.py index 44cd19fd1fb..0bc24268040 100644 --- a/tests/integration/test_lost_part/test.py +++ b/tests/integration/test_lost_part/test.py @@ -42,7 +42,8 @@ def test_lost_part_same_replica(start_cluster): for node in [node1, node2]: node.query( f"CREATE TABLE mt0 (id UInt64, date Date) ENGINE ReplicatedMergeTree('/clickhouse/tables/t', '{node.name}') ORDER BY tuple() PARTITION BY date " - "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0" + "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," + "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" ) node1.query("SYSTEM STOP MERGES mt0") @@ -109,7 +110,8 @@ def test_lost_part_other_replica(start_cluster): for node in [node1, node2]: node.query( f"CREATE TABLE mt1 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t1', '{node.name}') ORDER BY tuple() " - "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0" + "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," + "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" ) node1.query("SYSTEM STOP MERGES mt1") @@ -178,7 +180,8 @@ def test_lost_part_mutation(start_cluster): for node in [node1, node2]: node.query( f"CREATE TABLE mt2 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t2', '{node.name}') ORDER BY tuple() " - "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0" + "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," + "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" ) node1.query("SYSTEM STOP MERGES mt2") @@ -241,7 +244,8 @@ def test_lost_last_part(start_cluster): for node in [node1, node2]: node.query( f"CREATE TABLE mt3 (id UInt64, p String) ENGINE ReplicatedMergeTree('/clickhouse/tables/t3', '{node.name}') " - "ORDER BY tuple() PARTITION BY p SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0" + "ORDER BY tuple() PARTITION BY p SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," + "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" ) node1.query("SYSTEM STOP MERGES mt3") From c7403284260992c296fbb34782bd1007a18ba28f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 30 May 2023 18:51:18 +0000 Subject: [PATCH 0199/1997] Fix more tests. --- src/Interpreters/ActionsVisitor.cpp | 11 +++++++---- src/Planner/CollectSets.cpp | 2 ++ src/Processors/QueryPlan/ReadFromMergeTree.cpp | 13 +++++++++++-- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 23c6867a868..142b6f73b75 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1420,11 +1420,14 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool return data.prepared_sets->addFromStorage(set_key, storage_set->getSet()); } - // std::cerr << ".... checking for " << identifier->getColumnName() << std::endl; - if (auto tmp_table = data.getContext()->findExternalTable(identifier->getColumnName())) + if (!data.getContext()->isGlobalContext()) { - external_table_set = tmp_table->future_set; - // std::cerr << "Found " << reinterpret_cast(tmp_table.get()) << " " << reinterpret_cast(external_table_set.get()) << std::endl; + // std::cerr << ".... checking for " << identifier->getColumnName() << std::endl; + if (auto tmp_table = data.getContext()->findExternalTable(identifier->getColumnName())) + { + external_table_set = tmp_table->future_set; + // std::cerr << "Found " << reinterpret_cast(tmp_table.get()) << " " << reinterpret_cast(external_table_set.get()) << std::endl; + } } } diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp index 19dc569ff0c..913cf1c6ee3 100644 --- a/src/Planner/CollectSets.cpp +++ b/src/Planner/CollectSets.cpp @@ -90,6 +90,8 @@ public: in_second_argument_node_type == QueryTreeNodeType::UNION) { auto set_key = PreparedSetKey::forSubquery(in_second_argument->getTreeHash()); + if (sets.getFuture(set_key)) + return; auto subquery_options = select_query_options.subquery(); Planner subquery_planner( diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 367d3dcb525..8483df797ef 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1236,6 +1236,15 @@ static void buildIndexes( if (!indexes->use_skip_indexes) return; + const SelectQueryInfo * info = &query_info; + std::optional info_copy; + if (settings.allow_experimental_analyzer) + { + info_copy.emplace(query_info); + info_copy->filter_actions_dag = filter_actions_dag; + info = &*info_copy; + } + UsefulSkipIndexes skip_indexes; using Key = std::pair; std::map merged; @@ -1249,14 +1258,14 @@ static void buildIndexes( if (inserted) { skip_indexes.merged_indices.emplace_back(); - skip_indexes.merged_indices.back().condition = index_helper->createIndexMergedCondition(query_info, metadata_snapshot); + skip_indexes.merged_indices.back().condition = index_helper->createIndexMergedCondition(*info, metadata_snapshot); } skip_indexes.merged_indices[it->second].addIndex(index_helper); } else { - auto condition = index_helper->createIndexCondition(query_info, context); + auto condition = index_helper->createIndexCondition(*info, context); if (!condition->alwaysUnknownOrTrue()) skip_indexes.useful_indices.emplace_back(index_helper, condition); } From 38634cc5c5221a6ec646fc11dff34deda7c6b7d2 Mon Sep 17 00:00:00 2001 From: tpanetti Date: Wed, 24 May 2023 13:49:18 +0100 Subject: [PATCH 0200/1997] Convert Clickhouse Types to MySQL types in Compatibility mode This changes MySQL compatibility mode to display MySQL compatible types --- src/DataTypes/DataTypeAggregateFunction.h | 1 + src/DataTypes/DataTypeArray.h | 4 + src/DataTypes/DataTypeDate.h | 1 + src/DataTypes/DataTypeDate32.h | 1 + src/DataTypes/DataTypeDateTime.h | 1 + src/DataTypes/DataTypeDateTime64.h | 1 + src/DataTypes/DataTypeEnum.cpp | 24 ++++ src/DataTypes/DataTypeEnum.h | 3 + src/DataTypes/DataTypeFixedString.h | 1 + src/DataTypes/DataTypeFunction.h | 1 + src/DataTypes/DataTypeIPv4andIPv6.h | 4 + src/DataTypes/DataTypeInterval.h | 1 + src/DataTypes/DataTypeLowCardinality.h | 2 + src/DataTypes/DataTypeMap.h | 1 + src/DataTypes/DataTypeNothing.h | 2 + src/DataTypes/DataTypeNullable.h | 1 + src/DataTypes/DataTypeNumberBase.cpp | 17 +++ src/DataTypes/DataTypeNumberBase.h | 3 + src/DataTypes/DataTypeObject.h | 1 + src/DataTypes/DataTypeSet.h | 2 + src/DataTypes/DataTypeString.h | 3 + src/DataTypes/DataTypeTuple.h | 1 + src/DataTypes/DataTypeUUID.h | 2 + src/DataTypes/DataTypesDecimal.h | 3 + src/DataTypes/IDataType.h | 10 ++ src/Storages/System/StorageSystemColumns.cpp | 15 ++- .../02740_show_columns_mysql_compatibility.sh | 116 ++++++++++++++++++ 27 files changed, 221 insertions(+), 1 deletion(-) create mode 100755 tests/queries/0_stateless/02740_show_columns_mysql_compatibility.sh diff --git a/src/DataTypes/DataTypeAggregateFunction.h b/src/DataTypes/DataTypeAggregateFunction.h index 2d712d9c686..697be13652c 100644 --- a/src/DataTypes/DataTypeAggregateFunction.h +++ b/src/DataTypes/DataTypeAggregateFunction.h @@ -45,6 +45,7 @@ public: String doGetName() const override; String getNameWithoutVersion() const; const char * getFamilyName() const override { return "AggregateFunction"; } + const char * getMySQLName() const override { return "text"; } TypeIndex getTypeId() const override { return TypeIndex::AggregateFunction; } Array getParameters() const { return parameters; } diff --git a/src/DataTypes/DataTypeArray.h b/src/DataTypes/DataTypeArray.h index 033a657c845..35462df9a4e 100644 --- a/src/DataTypes/DataTypeArray.h +++ b/src/DataTypes/DataTypeArray.h @@ -30,6 +30,10 @@ public: { return "Array"; } + const char * getMySQLName() const override + { + return "string"; + } bool canBeInsideNullable() const override { diff --git a/src/DataTypes/DataTypeDate.h b/src/DataTypes/DataTypeDate.h index 2f17207cc07..33bcb6123ff 100644 --- a/src/DataTypes/DataTypeDate.h +++ b/src/DataTypes/DataTypeDate.h @@ -13,6 +13,7 @@ public: TypeIndex getTypeId() const override { return TypeIndex::Date; } const char * getFamilyName() const override { return family_name; } + const char * getMySQLName() const override { return "date"; } bool canBeUsedAsVersion() const override { return true; } bool canBeInsideNullable() const override { return true; } diff --git a/src/DataTypes/DataTypeDate32.h b/src/DataTypes/DataTypeDate32.h index 9160b62dc15..56315f46e8c 100644 --- a/src/DataTypes/DataTypeDate32.h +++ b/src/DataTypes/DataTypeDate32.h @@ -13,6 +13,7 @@ public: TypeIndex getTypeId() const override { return TypeIndex::Date32; } const char * getFamilyName() const override { return family_name; } + const char * getMySQLName() const override { return "date"; } Field getDefault() const override { diff --git a/src/DataTypes/DataTypeDateTime.h b/src/DataTypes/DataTypeDateTime.h index 91a09ff7cb9..c868f92c311 100644 --- a/src/DataTypes/DataTypeDateTime.h +++ b/src/DataTypes/DataTypeDateTime.h @@ -36,6 +36,7 @@ public: static constexpr auto family_name = "DateTime"; const char * getFamilyName() const override { return family_name; } + const char * getMySQLName() const override { return "datetime"; } String doGetName() const override; TypeIndex getTypeId() const override { return TypeIndex::DateTime; } diff --git a/src/DataTypes/DataTypeDateTime64.h b/src/DataTypes/DataTypeDateTime64.h index aaa99485040..8d317bb9430 100644 --- a/src/DataTypes/DataTypeDateTime64.h +++ b/src/DataTypes/DataTypeDateTime64.h @@ -28,6 +28,7 @@ public: DataTypeDateTime64(UInt32 scale_, const TimezoneMixin & time_zone_info); const char * getFamilyName() const override { return family_name; } + const char * getMySQLName() const override { return "datetime"; } std::string doGetName() const override; TypeIndex getTypeId() const override { return type_id; } diff --git a/src/DataTypes/DataTypeEnum.cpp b/src/DataTypes/DataTypeEnum.cpp index 3c3ac2ae4e2..bfed4d4d5a2 100644 --- a/src/DataTypes/DataTypeEnum.cpp +++ b/src/DataTypes/DataTypeEnum.cpp @@ -36,6 +36,29 @@ const char * DataTypeEnum::getFamilyName() const return EnumName::value; } +template +std::string DataTypeEnum::generateMySQLName(const Values & values) +{ + WriteBufferFromOwnString out; + + writeString("enum", out); + writeChar('(', out); + + auto first = true; + for (const auto & name_and_value : values) + { + if (!first) + writeString(", ", out); + + first = false; + + writeQuotedString(name_and_value.first, out); + } + + writeChar(')', out); + + return out.str(); +} template std::string DataTypeEnum::generateName(const Values & values) @@ -67,6 +90,7 @@ template DataTypeEnum::DataTypeEnum(const Values & values_) : EnumValues(values_) , type_name(generateName(this->getValues())) + , my_sql_type_name(generateMySQLName(this->getValues())) { } diff --git a/src/DataTypes/DataTypeEnum.h b/src/DataTypes/DataTypeEnum.h index 2f607fc2aa6..c6e523adf96 100644 --- a/src/DataTypes/DataTypeEnum.h +++ b/src/DataTypes/DataTypeEnum.h @@ -45,13 +45,16 @@ public: private: std::string type_name; + std::string my_sql_type_name; static std::string generateName(const Values & values); + static std::string generateMySQLName(const Values & values); public: explicit DataTypeEnum(const Values & values_); std::string doGetName() const override { return type_name; } const char * getFamilyName() const override; + const char * getMySQLName() const override { return my_sql_type_name.c_str(); } TypeIndex getTypeId() const override { return type_id; } diff --git a/src/DataTypes/DataTypeFixedString.h b/src/DataTypes/DataTypeFixedString.h index 8d114121c1a..eb09914ec9c 100644 --- a/src/DataTypes/DataTypeFixedString.h +++ b/src/DataTypes/DataTypeFixedString.h @@ -42,6 +42,7 @@ public: TypeIndex getTypeId() const override { return type_id; } const char * getFamilyName() const override { return "FixedString"; } + const char * getMySQLName() const override { return "text"; } size_t getN() const { diff --git a/src/DataTypes/DataTypeFunction.h b/src/DataTypes/DataTypeFunction.h index 888bcb6a775..f3423796126 100644 --- a/src/DataTypes/DataTypeFunction.h +++ b/src/DataTypes/DataTypeFunction.h @@ -24,6 +24,7 @@ public: std::string doGetName() const override; const char * getFamilyName() const override { return "Function"; } + const char * getMySQLName() const override { return "text"; } TypeIndex getTypeId() const override { return TypeIndex::Function; } const DataTypes & getArgumentTypes() const diff --git a/src/DataTypes/DataTypeIPv4andIPv6.h b/src/DataTypes/DataTypeIPv4andIPv6.h index ad70bdae933..8f7fe79793b 100644 --- a/src/DataTypes/DataTypeIPv4andIPv6.h +++ b/src/DataTypes/DataTypeIPv4andIPv6.h @@ -19,6 +19,8 @@ public: static constexpr auto type_id = TypeToTypeIndex; const char * getFamilyName() const override { return TypeName.data(); } + const char * getMySQLName() const override { return "text"; } + TypeIndex getTypeId() const override { return type_id; } Field getDefault() const override { return IPv4{}; } @@ -59,6 +61,8 @@ public: static constexpr auto type_id = TypeToTypeIndex; const char * getFamilyName() const override { return TypeName.data(); } + const char * getMySQLName() const override { return "text"; } + TypeIndex getTypeId() const override { return type_id; } Field getDefault() const override { return IPv6{}; } diff --git a/src/DataTypes/DataTypeInterval.h b/src/DataTypes/DataTypeInterval.h index 05abe1d9b24..69a56e8aadd 100644 --- a/src/DataTypes/DataTypeInterval.h +++ b/src/DataTypes/DataTypeInterval.h @@ -26,6 +26,7 @@ public: std::string doGetName() const override { return fmt::format("Interval{}", kind.toString()); } const char * getFamilyName() const override { return "Interval"; } + const char * getMySQLName() const override { return "text"; } TypeIndex getTypeId() const override { return TypeIndex::Interval; } bool equals(const IDataType & rhs) const override; diff --git a/src/DataTypes/DataTypeLowCardinality.h b/src/DataTypes/DataTypeLowCardinality.h index d301a0f5443..6fd4344311c 100644 --- a/src/DataTypes/DataTypeLowCardinality.h +++ b/src/DataTypes/DataTypeLowCardinality.h @@ -22,6 +22,8 @@ public: return "LowCardinality(" + dictionary_type->getName() + ")"; } const char * getFamilyName() const override { return "LowCardinality"; } + const char * getMySQLName() const override { return "text"; } + TypeIndex getTypeId() const override { return TypeIndex::LowCardinality; } MutableColumnPtr createColumn() const override; diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h index 4712f6bbdef..526dc321f44 100644 --- a/src/DataTypes/DataTypeMap.h +++ b/src/DataTypes/DataTypeMap.h @@ -30,6 +30,7 @@ public: TypeIndex getTypeId() const override { return TypeIndex::Map; } std::string doGetName() const override; const char * getFamilyName() const override { return "Map"; } + const char * getMySQLName() const override { return "json"; } bool canBeInsideNullable() const override { return false; } diff --git a/src/DataTypes/DataTypeNothing.h b/src/DataTypes/DataTypeNothing.h index c7d12388de9..fdef6026603 100644 --- a/src/DataTypes/DataTypeNothing.h +++ b/src/DataTypes/DataTypeNothing.h @@ -16,6 +16,8 @@ public: static constexpr bool is_parametric = false; const char * getFamilyName() const override { return "Nothing"; } + const char * getMySQLName() const override { return "text"; } + TypeIndex getTypeId() const override { return TypeIndex::Nothing; } MutableColumnPtr createColumn() const override; diff --git a/src/DataTypes/DataTypeNullable.h b/src/DataTypes/DataTypeNullable.h index 06d46fb15ed..64b201d32b2 100644 --- a/src/DataTypes/DataTypeNullable.h +++ b/src/DataTypes/DataTypeNullable.h @@ -16,6 +16,7 @@ public: explicit DataTypeNullable(const DataTypePtr & nested_data_type_); std::string doGetName() const override { return "Nullable(" + nested_data_type->getName() + ")"; } const char * getFamilyName() const override { return "Nullable"; } + const char * getMySQLName() const override { return nested_data_type->getMySQLName(); } TypeIndex getTypeId() const override { return TypeIndex::Nullable; } MutableColumnPtr createColumn() const override; diff --git a/src/DataTypes/DataTypeNumberBase.cpp b/src/DataTypes/DataTypeNumberBase.cpp index f668a4c522e..cd5e73ac4a1 100644 --- a/src/DataTypes/DataTypeNumberBase.cpp +++ b/src/DataTypes/DataTypeNumberBase.cpp @@ -30,6 +30,23 @@ bool DataTypeNumberBase::isValueRepresentedByUnsignedInteger() const return is_integer && is_unsigned_v; } +template +const std::map DataTypeNumberBase::mysqlTypeMap = { + {"UInt8", "tinyint unsigned"}, + {"UInt16", "smallint unsigned"}, + {"UInt32", "mediumint unsigned"}, + {"UInt64", "bigint unsigned"}, + {"UInt128", "bigint unsigned"}, + {"UInt256", "bigint unsigned"}, + {"Int8", "tinyint"}, + {"Int16", "smallint"}, + {"Int32", "int"}, + {"Int64", "bigint"}, + {"Int128", "bigint"}, + {"Int256", "bigint"}, + {"Float32", "float"}, + {"Float64", "double"}, +}; /// Explicit template instantiations - to avoid code bloat in headers. template class DataTypeNumberBase; diff --git a/src/DataTypes/DataTypeNumberBase.h b/src/DataTypes/DataTypeNumberBase.h index 3a5b11c5124..b5c963cf245 100644 --- a/src/DataTypes/DataTypeNumberBase.h +++ b/src/DataTypes/DataTypeNumberBase.h @@ -20,11 +20,14 @@ public: static constexpr bool is_parametric = false; static constexpr auto family_name = TypeName; static constexpr auto type_id = TypeToTypeIndex; + // Create a map from the name of the type to the name of the type in MySQL. + static const std::map mysqlTypeMap; using FieldType = T; using ColumnType = ColumnVector; const char * getFamilyName() const override { return TypeName.data(); } + const char * getMySQLName() const override { return mysqlTypeMap.at(TypeName.data()).c_str(); } TypeIndex getTypeId() const override { return TypeToTypeIndex; } Field getDefault() const override; diff --git a/src/DataTypes/DataTypeObject.h b/src/DataTypes/DataTypeObject.h index 937a9091371..8a2c36abcd7 100644 --- a/src/DataTypes/DataTypeObject.h +++ b/src/DataTypes/DataTypeObject.h @@ -23,6 +23,7 @@ public: DataTypeObject(const String & schema_format_, bool is_nullable_); const char * getFamilyName() const override { return "Object"; } + const char * getMySQLName() const override { return "json"; } String doGetName() const override; TypeIndex getTypeId() const override { return TypeIndex::Object; } diff --git a/src/DataTypes/DataTypeSet.h b/src/DataTypes/DataTypeSet.h index 7ddfeb9fe30..bdad638b5d5 100644 --- a/src/DataTypes/DataTypeSet.h +++ b/src/DataTypes/DataTypeSet.h @@ -15,6 +15,8 @@ class DataTypeSet final : public IDataTypeDummy public: static constexpr bool is_parametric = true; const char * getFamilyName() const override { return "Set"; } + const char * getMySQLName() const override { return "text"; } + TypeIndex getTypeId() const override { return TypeIndex::Set; } bool equals(const IDataType & rhs) const override { return typeid(rhs) == typeid(*this); } bool isParametric() const override { return true; } diff --git a/src/DataTypes/DataTypeString.h b/src/DataTypes/DataTypeString.h index 5f3bde43a13..3ac739fe68c 100644 --- a/src/DataTypes/DataTypeString.h +++ b/src/DataTypes/DataTypeString.h @@ -21,6 +21,9 @@ public: return "String"; } + // FIXME: string can contain arbitrary bytes, not only UTF-8 sequences + const char * getMySQLName() const override { return "text"; } + TypeIndex getTypeId() const override { return type_id; } MutableColumnPtr createColumn() const override; diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h index 152f21015f5..d264cc97f60 100644 --- a/src/DataTypes/DataTypeTuple.h +++ b/src/DataTypes/DataTypeTuple.h @@ -33,6 +33,7 @@ public: TypeIndex getTypeId() const override { return TypeIndex::Tuple; } std::string doGetName() const override; const char * getFamilyName() const override { return "Tuple"; } + const char * getMySQLName() const override { return "json"; } bool canBeInsideNullable() const override { return false; } bool supportsSparseSerialization() const override { return true; } diff --git a/src/DataTypes/DataTypeUUID.h b/src/DataTypes/DataTypeUUID.h index af9f1f35ca5..4d54db42b45 100644 --- a/src/DataTypes/DataTypeUUID.h +++ b/src/DataTypes/DataTypeUUID.h @@ -18,6 +18,8 @@ public: static constexpr auto type_id = TypeIndex::UUID; const char * getFamilyName() const override { return "UUID"; } + const char * getMySQLName() const override { return "char"; } + TypeIndex getTypeId() const override { return type_id; } Field getDefault() const override; diff --git a/src/DataTypes/DataTypesDecimal.h b/src/DataTypes/DataTypesDecimal.h index 583f7ea804a..5c9405cb060 100644 --- a/src/DataTypes/DataTypesDecimal.h +++ b/src/DataTypes/DataTypesDecimal.h @@ -37,8 +37,11 @@ public: using Base::Base; static constexpr auto family_name = "Decimal"; + static constexpr auto mysql_name = "decimal"; const char * getFamilyName() const override { return family_name; } + const char * getMySQLName() const override { return mysql_name; } + std::string doGetName() const override; TypeIndex getTypeId() const override { return TypeToTypeIndex; } bool canBePromoted() const override { return true; } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 7cc18fea00c..2bed18897ce 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -71,10 +71,19 @@ public: return doGetName(); } + /// MySQL equivalent Name of data type (examples: UInt64, Array(String)). + String getMySQLTypeName() const + { + if (custom_name) + return custom_name->getName(); + else + return doGetMySQLName(); + } DataTypePtr getPtr() const { return shared_from_this(); } /// Name of data type family (example: FixedString, Array). virtual const char * getFamilyName() const = 0; + virtual const char * getMySQLName() const = 0; /// Data type id. It's used for runtime type checks. virtual TypeIndex getTypeId() const = 0; @@ -126,6 +135,7 @@ public: protected: virtual String doGetName() const { return getFamilyName(); } + virtual String doGetMySQLName() const { return getMySQLName(); } virtual SerializationPtr doGetDefaultSerialization() const = 0; public: diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 18e7d269795..f391a392dbb 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -74,6 +74,7 @@ public: : ISource(header_) , columns_mask(std::move(columns_mask_)), max_block_size(max_block_size_) , databases(std::move(databases_)), tables(std::move(tables_)), storages(std::move(storages_)) + , clientInfo(context->getClientInfo()) , total_tables(tables->size()), access(context->getAccess()) , query_id(context->getCurrentQueryId()), lock_acquire_timeout(context->getSettingsRef().lock_acquire_timeout) { @@ -129,6 +130,17 @@ protected: bool check_access_for_columns = check_access_for_tables && !access->isGranted(AccessType::SHOW_COLUMNS, database_name, table_name); + auto get_type_name = [this](const IDataType& type) -> std::string + { + if (clientInfo.interface == DB::ClientInfo::Interface::MYSQL) + { + return type.getMySQLTypeName(); + } + else + { + return type.getName(); + } + }; size_t position = 0; for (const auto & column : columns) { @@ -146,7 +158,7 @@ protected: if (columns_mask[src_index++]) res_columns[res_index++]->insert(column.name); if (columns_mask[src_index++]) - res_columns[res_index++]->insert(column.type->getName()); + res_columns[res_index++]->insert(get_type_name(*column.type)); if (columns_mask[src_index++]) res_columns[res_index++]->insert(position); @@ -281,6 +293,7 @@ private: ColumnPtr databases; ColumnPtr tables; Storages storages; + ClientInfo clientInfo; size_t db_table_num = 0; size_t total_tables; std::shared_ptr access; diff --git a/tests/queries/0_stateless/02740_show_columns_mysql_compatibility.sh b/tests/queries/0_stateless/02740_show_columns_mysql_compatibility.sh new file mode 100755 index 00000000000..7f828d35679 --- /dev/null +++ b/tests/queries/0_stateless/02740_show_columns_mysql_compatibility.sh @@ -0,0 +1,116 @@ +#!/bin/bash + +# This script tests the MySQL compatibility of the SHOW COLUMNS command in ClickHouse +USER="default" +PASSWORD="" +HOST="127.0.0.1" +PORT=9004 + +# First run the clickhouse test to create the ClickHouse Tables + +echo "Drop tables if they exist" +${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS tab" +${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS database_123456789abcde" +${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS database_123456789abcde.tab" + +echo "Create tab table " +${CLICKHOUSE_LOCAL} --query " + CREATE TABLE tab + ( + uint64 UInt64, + int32 Nullable(Int32), + float32 Float32, + float64 Float64, + decimal_value Decimal(10, 2), + boolean_value UInt8, -- Use 0 for false, 1 for true + string_value String, + fixed_string_value FixedString(10), + date_value Date, + date32_value Date32, + datetime_value DateTime, + datetime64_value DateTime64(3), + json_value String, -- Store JSON as a string + uuid_value UUID, + enum_value Enum8('apple' = 1, 'banana' = 2, 'orange' = 3), + low_cardinality LowCardinality(String), + array_value Array(Int32), + map_value Map(String, Int32), + tuple_value Tuple(Int32, String), + nullable_value Nullable(Int32), + ipv4_value IPv4, + ipv6_value IPv6, + nested Nested + ( + nested_int Int32, + nested_string String + ) + ) ENGINE = MergeTree + ORDER BY uint64; + " + + +echo "Create pseudo-random database name" +${CLICKHOUSE_LOCAL} --query "CREATE DATABASE database_123456789abcde;" + +echo "Create tab duplicate table" +${CLICKHOUSE_LOCAL} --query " + CREATE TABLE database_123456789abcde.tab + ( + uint64 UInt64, + int32 Nullable(Int32), + float32 Float32, + float64 Float64, + decimal_value Decimal(10, 2), + boolean_value UInt8, -- Use 0 for false, 1 for true + string_value String, + fixed_string_value FixedString(10), + date_value Date, + date32_value Date32, + datetime_value DateTime, + datetime64_value DateTime64(3), + json_value String, -- Store JSON as a string + uuid_value UUID, + enum_value Enum8('apple' = 1, 'banana' = 2, 'orange' = 3), + low_cardinality LowCardinality(String), + array_value Array(Int32), + map_value Map(String, Int32), + tuple_value Tuple(Int32, String), + nullable_value Nullable(Int32), + ipv4_value IPv4, + ipv6_value IPv6, + nested Nested + ( + nested_int Int32, + nested_string String + ) + ) ENGINE = MergeTree + ORDER BY uint64; + " + +# Write sql to temp file +TEMP_FILE=$(mktemp) + +cat < $TEMP_FILE +SHOW COLUMNS FROM tab; +SHOW EXTENDED COLUMNS FROM tab; +SHOW FULL COLUMNS FROM tab; +SHOW COLUMNS FROM tab LIKE '%int%'; +SHOW COLUMNS FROM tab NOT LIKE '%int%'; +SHOW COLUMNS FROM tab ILIKE '%INT%'; +SHOW COLUMNS FROM tab NOT ILIKE '%INT%'; +SHOW COLUMNS FROM tab WHERE field LIKE '%int%'; +SHOW COLUMNS FROM tab LIMIT 1; +SHOW COLUMNS FROM tab; +SHOW COLUMNS FROM tab FROM database_123456789abcde; +SHOW COLUMNS FROM database_123456789abcde.tab; +DROP DATABASE database_123456789abcde; +DROP TABLE tab; +EOT + +# Now run the MySQL test script on the ClickHouse DB +echo "Run MySQL test" +mysql --user="$USER" --password="$PASSWORD" --host="$HOST" --port="$PORT" < $TEMP_FILE + +# Clean up the temp file +rm $TEMP_FILE + From bd5a1ae2b97b66361e5b958811a6055f8f5cd2ae Mon Sep 17 00:00:00 2001 From: tpanetti Date: Tue, 30 May 2023 13:32:33 -0700 Subject: [PATCH 0201/1997] Revert "Change SHOW COLUMNS query to display MySQL types in MySQL Compatibility mode" This reverts commit ddbad79c5e67518acebbacaad5be0cad3967ac67. --- .../InterpreterShowColumnsQuery.cpp | 76 +------ .../InterpreterShowColumnsQuery.h | 1 - ...show_columns_mysql_compatibility.reference | 213 ------------------ .../02726_show_columns_mysql_compatibility.sh | 115 ---------- 4 files changed, 3 insertions(+), 402 deletions(-) delete mode 100644 tests/queries/0_stateless/02726_show_columns_mysql_compatibility.reference delete mode 100755 tests/queries/0_stateless/02726_show_columns_mysql_compatibility.sh diff --git a/src/Interpreters/InterpreterShowColumnsQuery.cpp b/src/Interpreters/InterpreterShowColumnsQuery.cpp index 0ad93e37b58..c86d3c753c4 100644 --- a/src/Interpreters/InterpreterShowColumnsQuery.cpp +++ b/src/Interpreters/InterpreterShowColumnsQuery.cpp @@ -42,11 +42,9 @@ SELECT if (default_kind IN ('ALIAS', 'DEFAULT', 'MATERIALIZED'), default_expression, NULL) AS default, '' AS extra )"; - rewritten_query += getMySQLQuery(); - } - else { - rewritten_query += "SELECT name AS field, type AS type, startsWith(type, 'Nullable') AS null, trim(concatWithSeparator(' ', if(is_in_primary_key, 'PRI', ''), if (is_in_sorting_key, 'SOR', ''))) AS key, if(default_kind IN ('ALIAS', 'DEFAULT', 'MATERIALIZED'), default_expression, NULL) AS default, '' AS extra "; - } + // TODO Interpret query.extended. It is supposed to show internal/virtual columns. Need to fetch virtual column names, see + // IStorage::getVirtuals(). We can't easily do that via SQL. + if (query.full) { /// "Full" mode is mostly for MySQL compat @@ -90,74 +88,6 @@ WHERE return rewritten_query; } -String InterpreterShowColumnsQuery::getMySQLQuery() -{ - String mysql_specific_query; - - mysql_specific_query = R"(SELECT name AS field, - CASE - WHEN startsWith(type, 'Nullable') THEN - CASE - WHEN substring(type, 10, length(type) - 10) IN ('UInt8', 'Int8') THEN 'tinyint' - WHEN substring(type, 10, length(type) - 10) IN ('UInt16', 'Int16') THEN 'smallint' - WHEN substring(type, 10, length(type) - 10) IN ('UInt32', 'Int32') THEN 'int' - WHEN substring(type, 10, length(type) - 10) IN ('UInt64', 'Int64', 'UInt128', 'Int128', 'UInt256', 'Int256') THEN 'bigint' - WHEN substring(type, 10, length(type) - 10) = 'Float32' THEN 'float' - WHEN substring(type, 10, length(type) - 10) = 'Float64' THEN 'double' - WHEN substring(type, 10, length(type) - 10) LIKE 'Decimal%' THEN 'decimal' - WHEN substring(type, 10, length(type) - 10) = 'Boolean' THEN 'tinyint' - WHEN substring(type, 10, length(type) - 10) = 'String' THEN 'text' - WHEN substring(type, 10, length(type) - 10) LIKE 'FixedString%' THEN 'text' - WHEN substring(type, 10, length(type) - 10) LIKE 'Date%' THEN 'date' - WHEN substring(type, 10, length(type) - 10) LIKE 'DateTime%' THEN 'datetime' - WHEN substring(type, 10, length(type) - 10) = 'JSON' THEN 'json' - WHEN substring(type, 10, length(type) - 10) = 'UUID' THEN 'binary' - WHEN substring(type, 10, length(type) - 10) LIKE 'Enum%' THEN 'enum' - WHEN substring(type, 10, length(type) - 10) LIKE 'LowCardinality%' THEN 'text' - WHEN substring(type, 10, length(type) - 10) LIKE 'Array%' THEN 'json' - WHEN substring(type, 10, length(type) - 10) LIKE 'Map%' THEN 'json' - WHEN substring(type, 10, length(type) - 10) IN ('SimpleAggregateFunction', 'AggregateFunction') THEN 'text' - WHEN substring(type, 10, length(type) - 10) = 'Nested' THEN 'json' - WHEN substring(type, 10, length(type) - 10) LIKE 'Tuple%' THEN 'json' - WHEN substring(type, 10, length(type) - 10) LIKE 'IPv%' THEN 'text' - WHEN substring(type, 10, length(type) - 10) IN ('Expression', 'Set', 'Nothing', 'Interval') THEN 'text' - ELSE substring(type, 10, length(type) - 10) - END - ELSE - CASE - WHEN type IN ('UInt8', 'Int8') THEN 'tinyint' - WHEN type IN ('UInt16', 'Int16') THEN 'smallint' - WHEN type IN ('UInt32', 'Int32') THEN 'int' - WHEN type IN ('UInt64', 'Int64', 'UInt128', 'Int128', 'UInt256', 'Int256') THEN 'bigint' - WHEN type = 'Float32' THEN 'float' - WHEN type = 'Float64' THEN 'double' - WHEN type LIKE 'Decimal%' THEN 'decimal' - WHEN type = 'Boolean' THEN 'tinyint' - WHEN type = 'String' THEN 'text' - WHEN type LIKE 'FixedString%' THEN 'text' - WHEN type LIKE 'Date%' THEN 'date' - WHEN type LIKE 'DateTime%' THEN 'datetime' - WHEN type = 'JSON' THEN 'json' - WHEN type = 'UUID' THEN 'binary' - WHEN type LIKE 'Enum%' THEN 'enum' - WHEN type LIKE 'LowCardinality%' THEN 'text' - WHEN type LIKE 'Array%' THEN 'json' - WHEN type LIKE 'Map%' THEN 'json' - WHEN type IN ('SimpleAggregateFunction', 'AggregateFunction') THEN 'text' - WHEN type = 'Nested' THEN 'json' - WHEN type LIKE 'Tuple%' THEN 'json' - WHEN type LIKE 'IPv%' THEN 'text' - WHEN type IN ('Expression', 'Set', 'Nothing', 'Interval') THEN 'text' - ELSE type - END - END AS type, - startsWith(type, 'Nullable') AS null, - trim(concatWithSeparator(' ', if(is_in_primary_key, 'PRI', ''), if (is_in_sorting_key, 'SOR', ''))) AS key, - if(default_kind IN ('ALIAS', 'DEFAULT', 'MATERIALIZED'), default_expression, NULL) AS default, - '' AS extra )"; - - return mysql_specific_query.str(); -} BlockIO InterpreterShowColumnsQuery::execute() { diff --git a/src/Interpreters/InterpreterShowColumnsQuery.h b/src/Interpreters/InterpreterShowColumnsQuery.h index b843a163978..ee6dcabd97b 100644 --- a/src/Interpreters/InterpreterShowColumnsQuery.h +++ b/src/Interpreters/InterpreterShowColumnsQuery.h @@ -26,7 +26,6 @@ private: ASTPtr query_ptr; String getRewrittenQuery(); - String getMySQLQuery(); }; diff --git a/tests/queries/0_stateless/02726_show_columns_mysql_compatibility.reference b/tests/queries/0_stateless/02726_show_columns_mysql_compatibility.reference deleted file mode 100644 index c9ad94a34c4..00000000000 --- a/tests/queries/0_stateless/02726_show_columns_mysql_compatibility.reference +++ /dev/null @@ -1,213 +0,0 @@ -Drop tables if they exist -Create tab table -Create pseudo-random database name -Create tab duplicate table -Run MySQL test -field type null key default extra -array_value json 0 NULL -boolean_value tinyint 0 NULL -date32_value date 0 NULL -date_value date 0 NULL -datetime64_value date 0 NULL -datetime_value date 0 NULL -decimal_value decimal 0 NULL -enum_value enum 0 NULL -fixed_string_value text 0 NULL -float32 float 0 NULL -float64 double 0 NULL -int32 int 0 NULL -ipv4_value text 0 NULL -ipv6_value text 0 NULL -json_value text 0 NULL -low_cardinality text 0 NULL -map_value json 0 NULL -nested.nested_int json 0 NULL -nested.nested_string json 0 NULL -nullable_value int 0 NULL -string_value text 0 NULL -tuple_value json 0 NULL -uint64 bigint 0 PRI SOR NULL -uuid_value binary 0 NULL -field type null key default extra -array_value json 0 NULL -boolean_value tinyint 0 NULL -date32_value date 0 NULL -date_value date 0 NULL -datetime64_value date 0 NULL -datetime_value date 0 NULL -decimal_value decimal 0 NULL -enum_value enum 0 NULL -fixed_string_value text 0 NULL -float32 float 0 NULL -float64 double 0 NULL -int32 int 0 NULL -ipv4_value text 0 NULL -ipv6_value text 0 NULL -json_value text 0 NULL -low_cardinality text 0 NULL -map_value json 0 NULL -nested.nested_int json 0 NULL -nested.nested_string json 0 NULL -nullable_value int 0 NULL -string_value text 0 NULL -tuple_value json 0 NULL -uint64 bigint 0 PRI SOR NULL -uuid_value binary 0 NULL -field type null key default extra collation comment privileges -array_value json 0 NULL NULL -boolean_value tinyint 0 NULL NULL -date32_value date 0 NULL NULL -date_value date 0 NULL NULL -datetime64_value date 0 NULL NULL -datetime_value date 0 NULL NULL -decimal_value decimal 0 NULL NULL -enum_value enum 0 NULL NULL -fixed_string_value text 0 NULL NULL -float32 float 0 NULL NULL -float64 double 0 NULL NULL -int32 int 0 NULL NULL -ipv4_value text 0 NULL NULL -ipv6_value text 0 NULL NULL -json_value text 0 NULL NULL -low_cardinality text 0 NULL NULL -map_value json 0 NULL NULL -nested.nested_int json 0 NULL NULL -nested.nested_string json 0 NULL NULL -nullable_value int 0 NULL NULL -string_value text 0 NULL NULL -tuple_value json 0 NULL NULL -uint64 bigint 0 PRI SOR NULL NULL -uuid_value binary 0 NULL NULL -field type null key default extra -int32 int 0 NULL -nested.nested_int json 0 NULL -uint64 bigint 0 PRI SOR NULL -field type null key default extra -array_value json 0 NULL -boolean_value tinyint 0 NULL -date32_value date 0 NULL -date_value date 0 NULL -datetime64_value date 0 NULL -datetime_value date 0 NULL -decimal_value decimal 0 NULL -enum_value enum 0 NULL -fixed_string_value text 0 NULL -float32 float 0 NULL -float64 double 0 NULL -ipv4_value text 0 NULL -ipv6_value text 0 NULL -json_value text 0 NULL -low_cardinality text 0 NULL -map_value json 0 NULL -nested.nested_string json 0 NULL -nullable_value int 0 NULL -string_value text 0 NULL -tuple_value json 0 NULL -uuid_value binary 0 NULL -field type null key default extra -int32 int 0 NULL -nested.nested_int json 0 NULL -uint64 bigint 0 PRI SOR NULL -field type null key default extra -array_value json 0 NULL -boolean_value tinyint 0 NULL -date32_value date 0 NULL -date_value date 0 NULL -datetime64_value date 0 NULL -datetime_value date 0 NULL -decimal_value decimal 0 NULL -enum_value enum 0 NULL -fixed_string_value text 0 NULL -float32 float 0 NULL -float64 double 0 NULL -ipv4_value text 0 NULL -ipv6_value text 0 NULL -json_value text 0 NULL -low_cardinality text 0 NULL -map_value json 0 NULL -nested.nested_string json 0 NULL -nullable_value int 0 NULL -string_value text 0 NULL -tuple_value json 0 NULL -uuid_value binary 0 NULL -field type null key default extra -int32 int 0 NULL -nested.nested_int json 0 NULL -uint64 bigint 0 PRI SOR NULL -field type null key default extra -array_value json 0 NULL -field type null key default extra -array_value json 0 NULL -boolean_value tinyint 0 NULL -date32_value date 0 NULL -date_value date 0 NULL -datetime64_value date 0 NULL -datetime_value date 0 NULL -decimal_value decimal 0 NULL -enum_value enum 0 NULL -fixed_string_value text 0 NULL -float32 float 0 NULL -float64 double 0 NULL -int32 int 0 NULL -ipv4_value text 0 NULL -ipv6_value text 0 NULL -json_value text 0 NULL -low_cardinality text 0 NULL -map_value json 0 NULL -nested.nested_int json 0 NULL -nested.nested_string json 0 NULL -nullable_value int 0 NULL -string_value text 0 NULL -tuple_value json 0 NULL -uint64 bigint 0 PRI SOR NULL -uuid_value binary 0 NULL -field type null key default extra -array_value json 0 NULL -boolean_value tinyint 0 NULL -date32_value date 0 NULL -date_value date 0 NULL -datetime64_value date 0 NULL -datetime_value date 0 NULL -decimal_value decimal 0 NULL -enum_value enum 0 NULL -fixed_string_value text 0 NULL -float32 float 0 NULL -float64 double 0 NULL -int32 int 0 NULL -ipv4_value text 0 NULL -ipv6_value text 0 NULL -json_value text 0 NULL -low_cardinality text 0 NULL -map_value json 0 NULL -nested.nested_int json 0 NULL -nested.nested_string json 0 NULL -nullable_value int 0 NULL -string_value text 0 NULL -tuple_value json 0 NULL -uint64 bigint 0 PRI SOR NULL -uuid_value binary 0 NULL -field type null key default extra -array_value json 0 NULL -boolean_value tinyint 0 NULL -date32_value date 0 NULL -date_value date 0 NULL -datetime64_value date 0 NULL -datetime_value date 0 NULL -decimal_value decimal 0 NULL -enum_value enum 0 NULL -fixed_string_value text 0 NULL -float32 float 0 NULL -float64 double 0 NULL -int32 int 0 NULL -ipv4_value text 0 NULL -ipv6_value text 0 NULL -json_value text 0 NULL -low_cardinality text 0 NULL -map_value json 0 NULL -nested.nested_int json 0 NULL -nested.nested_string json 0 NULL -nullable_value int 0 NULL -string_value text 0 NULL -tuple_value json 0 NULL -uint64 bigint 0 PRI SOR NULL -uuid_value binary 0 NULL diff --git a/tests/queries/0_stateless/02726_show_columns_mysql_compatibility.sh b/tests/queries/0_stateless/02726_show_columns_mysql_compatibility.sh deleted file mode 100755 index 5324496edd3..00000000000 --- a/tests/queries/0_stateless/02726_show_columns_mysql_compatibility.sh +++ /dev/null @@ -1,115 +0,0 @@ -#!/bin/bash - -# This script tests the MySQL compatibility of the SHOW COLUMNS command in ClickHouse -USER="default" -PASSWORD="" -HOST="127.0.0.1" -PORT=9004 - -# First run the clickhouse test to create the ClickHouse Tables - -echo "Drop tables if they exist" -${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS tab" -${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS database_123456789abcde" -${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS database_123456789abcde.tab" - -echo "Create tab table " -${CLICKHOUSE_LOCAL} --query " - CREATE TABLE tab - ( - uint64 UInt64, - int32 Nullable(Int32), - float32 Float32, - float64 Float64, - decimal_value Decimal(10, 2), - boolean_value UInt8, -- Use 0 for false, 1 for true - string_value String, - fixed_string_value FixedString(10), - date_value Date, - date32_value Date32, - datetime_value DateTime, - datetime64_value DateTime64(3), - json_value String, -- Store JSON as a string - uuid_value UUID, - enum_value Enum8('apple' = 1, 'banana' = 2, 'orange' = 3), - low_cardinality LowCardinality(String), - array_value Array(Int32), - map_value Map(String, Int32), - tuple_value Tuple(Int32, String), - nullable_value Nullable(Int32), - ipv4_value IPv4, - ipv6_value IPv6, - nested Nested - ( - nested_int Int32, - nested_string String - ) - ) ENGINE = MergeTree - ORDER BY uint64; - " - - -echo "Create pseudo-random database name" -${CLICKHOUSE_LOCAL} --query "CREATE DATABASE database_123456789abcde;" - -echo "Create tab duplicate table" -${CLICKHOUSE_LOCAL} --query " - CREATE TABLE database_123456789abcde.tab - ( - uint64 UInt64, - int32 Nullable(Int32), - float32 Float32, - float64 Float64, - decimal_value Decimal(10, 2), - boolean_value UInt8, -- Use 0 for false, 1 for true - string_value String, - fixed_string_value FixedString(10), - date_value Date, - date32_value Date32, - datetime_value DateTime, - datetime64_value DateTime64(3), - json_value String, -- Store JSON as a string - uuid_value UUID, - enum_value Enum8('apple' = 1, 'banana' = 2, 'orange' = 3), - low_cardinality LowCardinality(String), - array_value Array(Int32), - map_value Map(String, Int32), - tuple_value Tuple(Int32, String), - nullable_value Nullable(Int32), - ipv4_value IPv4, - ipv6_value IPv6, - nested Nested - ( - nested_int Int32, - nested_string String - ) - ) ENGINE = MergeTree - ORDER BY uint64; - " - -# Write sql to temp file -TEMP_FILE=$(mktemp) - -cat < $TEMP_FILE -SHOW COLUMNS FROM tab; -SHOW EXTENDED COLUMNS FROM tab; -SHOW FULL COLUMNS FROM tab; -SHOW COLUMNS FROM tab LIKE '%int%'; -SHOW COLUMNS FROM tab NOT LIKE '%int%'; -SHOW COLUMNS FROM tab ILIKE '%INT%'; -SHOW COLUMNS FROM tab NOT ILIKE '%INT%'; -SHOW COLUMNS FROM tab WHERE field LIKE '%int%'; -SHOW COLUMNS FROM tab LIMIT 1; -SHOW COLUMNS FROM tab; -SHOW COLUMNS FROM tab FROM database_123456789abcde; -SHOW COLUMNS FROM database_123456789abcde.tab; -DROP DATABASE database_123456789abcde; -DROP TABLE tab; -EOT - -# Now run the MySQL test script on the ClickHouse DB -echo "Run MySQL test" -mysql --user="$USER" --password="$PASSWORD" --host="$HOST" --port="$PORT" < $TEMP_FILE - -# Clean up the temp file -rm $TEMP_FILE From cd8eb44f0c54945f4777ed3e50e08b057ee41f43 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 31 May 2023 14:03:11 +0000 Subject: [PATCH 0202/1997] Add encryptValue(), decryptValue() and exceptions --- src/Common/Config/ConfigProcessor.cpp | 94 ++++++++++++++++----------- src/Common/Config/ConfigProcessor.h | 8 ++- 2 files changed, 63 insertions(+), 39 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index fdfc6343876..b6db53018f4 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -46,6 +46,17 @@ namespace ErrorCodes extern const int CANNOT_LOAD_CONFIG; } +/// Get method for string name. Throw exception for wrong name +EncryptionMethod getEncryptionMethod(const std::string & name) +{ + if (name == "AES_128_GCM_SIV") + return AES_128_GCM_SIV; + else if (name == "AES_256_GCM_SIV") + return AES_256_GCM_SIV; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", name); +} + /// For cutting preprocessed path to this base static std::string main_config_path; @@ -175,57 +186,64 @@ static void mergeAttributes(Element & config_element, Element & with_element) with_element_attributes->release(); } +std::string ConfigProcessor::encryptValue(const std::string & codec_name, const std::string & value) +{ + auto codec = DB::CompressionCodecEncrypted(getEncryptionMethod(codec_name)); + + DB::Memory<> memory1; + memory1.resize(value.size() + codec.getAdditionalSizeAtTheEndOfBuffer() + codec.getHeaderSize()+100); + auto bytes_written = codec.compress(value.data(), static_cast(value.size()), memory1.data()); + std::string encrypted_value = std::string(memory1.data(), bytes_written); + std::string hex_value; + boost::algorithm::hex(encrypted_value.begin(), encrypted_value.end(), std::back_inserter(hex_value)); + LOG_DEBUG(log, "Encrypted value: '{}'.", hex_value); + return hex_value; +} + +std::string ConfigProcessor::decryptValue(const std::string & codec_name, const std::string & value) +{ + auto codec = DB::CompressionCodecEncrypted(getEncryptionMethod(codec_name)); + + DB::Memory<> memory; + std::string encrypted_value; + + try + { + boost::algorithm::unhex(value, std::back_inserter(encrypted_value)); + } + catch (const std::exception &) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read encrypted text, check for valid characters [0-9a-fA-F] and length"); + } + + memory.resize(codec.readDecompressedBlockSize(encrypted_value.data()) + codec.getAdditionalSizeAtTheEndOfBuffer()); + codec.decompress(encrypted_value.data(), static_cast(encrypted_value.size()), memory.data()); + std::string decrypted_value = std::string(memory.data(), memory.size()); + LOG_DEBUG(log, "Decrypted value '{}'", decrypted_value); + return decrypted_value; +} + void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) { for (Node * node = config_root->firstChild(); node;) { if (node->nodeType() == Node::ELEMENT_NODE) { - // NamedNodeMapPtr attributes = node->attributes(); Element & element = dynamic_cast(*node); if (element.hasAttribute("encryption_codec")) { LOG_DEBUG(log, "Encrypted node <{}>", node->nodeName()); - // for (Node * child_node = node->firstChild(); child_node;) - // { - // LOG_DEBUG(log, " Child node {} value '{}'.", child_node->nodeName(), child_node->getNodeValue()); - // child_node = child_node->nextSibling(); - // } + + const NodeListPtr children = element.childNodes(); + if (children->length() != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encrypted node {} should have only one text node", node->nodeName()); Node * text_node = node->firstChild(); - auto codec_128 = DB::CompressionCodecEncrypted(DB::AES_128_GCM_SIV); - // DB::CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, ""); + if (text_node->nodeType() != Node::TEXT_NODE) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encrypted node {} should have text node", node->nodeName()); - /* - DB::Memory<> memory1; - std::string password="abcd"; - memory1.resize(password.size() + codec_128.getAdditionalSizeAtTheEndOfBuffer() + codec_128.getHeaderSize()+100); - auto bytes_written = codec_128.compress(password.data(), static_cast(password.size()), memory1.data()); - // std::string encrypted_password = std::string(memory1.data(), memory1.size()); - std::string encrypted_password = std::string(memory1.data(), bytes_written); - std::string password_hex; - boost::algorithm::hex(encrypted_password.begin(), encrypted_password.end(), std::back_inserter(password_hex)); - LOG_DEBUG(log, "Encrypted password: '{}'.", password_hex); - */ - - DB::Memory<> memory; - std::string encrypted_value; - - try - { - boost::algorithm::unhex(text_node->getNodeValue(), std::back_inserter(encrypted_value)); - // boost::algorithm::unhex(password_hex, std::back_inserter(encrypted_value)); - } - catch (const std::exception &) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read encrypted text for {}, check for valid characters [0-9a-fA-F] and length", node->nodeName()); - } - - memory.resize(codec_128.readDecompressedBlockSize(encrypted_value.data()) + codec_128.getAdditionalSizeAtTheEndOfBuffer()); - codec_128.decompress(encrypted_value.data(), static_cast(encrypted_value.size()), memory.data()); - std::string decrypted_value = std::string(memory.data(), memory.size()); - LOG_DEBUG(log, "Decrypted value '{}'", decrypted_value); - text_node->setNodeValue(decrypted_value); + auto encryption_codec = element.getAttribute("encryption_codec"); + text_node->setNodeValue(decryptValue(encryption_codec, text_node->getNodeValue())); } } diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index 479a0053efa..c9b227863f0 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -92,7 +92,13 @@ public: const zkutil::EventPtr & zk_changed_event, bool fallback_to_preprocessed = false); - /// crypt nodes in config with specified encryption attributes + /// Encrypt text value + std::string encryptValue(const std::string & codec_name, const std::string & value); + + /// Decrypt value + std::string decryptValue(const std::string & codec_name, const std::string & value); + + /// Decrypt nodes in config with specified encryption attributes void decryptConfig(LoadedConfig & loaded_config); /// Save preprocessed config to specified directory. From fd8c5992889728c76d231a4f96c577bc6578017d Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 31 May 2023 15:16:18 +0000 Subject: [PATCH 0203/1997] Add encrypt_decrypt example --- src/Common/examples/CMakeLists.txt | 3 ++ src/Common/examples/encrypt_decrypt.cpp | 50 +++++++++++++++++++++++++ utils/config-processor/CMakeLists.txt | 3 +- 3 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 src/Common/examples/encrypt_decrypt.cpp diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index d095ab3a1be..12a2b59ff77 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -82,3 +82,6 @@ endif() clickhouse_add_executable (interval_tree interval_tree.cpp) target_link_libraries (interval_tree PRIVATE dbms) + +clickhouse_add_executable (encrypt_decrypt encrypt_decrypt.cpp) +target_link_libraries (encrypt_decrypt PRIVATE dbms) diff --git a/src/Common/examples/encrypt_decrypt.cpp b/src/Common/examples/encrypt_decrypt.cpp new file mode 100644 index 00000000000..cd48963c47a --- /dev/null +++ b/src/Common/examples/encrypt_decrypt.cpp @@ -0,0 +1,50 @@ +#include +#include +#include +#include + + +int main(int argc, char ** argv) +{ + try + { + if (argc != 5) + { + std::cerr << "usage: " << argv[0] << " path action codec value" << std::endl; + return 3; + } + + std::string action = argv[2]; + std::string codec_name = argv[3]; + std::string value = argv[4]; + DB::ConfigProcessor processor(argv[1], false, true); + + auto loaded_config = processor.loadConfig(); + + DB::CompressionCodecEncrypted::Configuration::instance().tryLoad(*loaded_config.configuration, "encryption_codecs"); + + if (action == "-e") + std::cout << processor.encryptValue(codec_name, value) << std::endl; + else if (action == "-d") + std::cout << processor.decryptValue(codec_name, value) << std::endl; + else + std::cerr << "Unknown action: " << action << std::endl; + } + catch (Poco::Exception & e) + { + std::cerr << "Exception: " << e.displayText() << std::endl; + return 1; + } + catch (std::exception & e) + { + std::cerr << "std::exception: " << e.what() << std::endl; + return 3; + } + catch (...) + { + std::cerr << "Some exception" << std::endl; + return 2; + } + + return 0; +} diff --git a/utils/config-processor/CMakeLists.txt b/utils/config-processor/CMakeLists.txt index 00cbfbba659..80c3535ef4e 100644 --- a/utils/config-processor/CMakeLists.txt +++ b/utils/config-processor/CMakeLists.txt @@ -1,3 +1,2 @@ clickhouse_add_executable (config-processor config-processor.cpp) -target_link_libraries(config-processor PRIVATE dbms clickhouse_common_config_no_zookeeper_log) -target_link_libraries(config-processor PUBLIC clickhouse_parsers clickhouse_common_io common ch_contrib::lz4) +target_link_libraries(config-processor PRIVATE dbms) From 0d4ed32baca8eb2d897bcfd66eed0d04781af166 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Wed, 31 May 2023 11:25:33 -0400 Subject: [PATCH 0204/1997] better exception message --- src/Functions/FunctionsCodingIP.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Functions/FunctionsCodingIP.h b/src/Functions/FunctionsCodingIP.h index bd53fa7e043..9d090abb736 100644 --- a/src/Functions/FunctionsCodingIP.h +++ b/src/Functions/FunctionsCodingIP.h @@ -341,7 +341,11 @@ ColumnPtr convertIPv6ToIPv4(ColumnPtr column, const PaddedPODArray * null { if constexpr (exception_mode == IPStringToNumExceptionMode::Throw) { - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "IPv6 in column {} is not in IPv4 mapping block", column->getName()); + char addr[IPV6_MAX_TEXT_LENGTH + 1] {}; + char * paddr = addr; + formatIPv6(src, paddr); + + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "IPv6 {} in column {} is not in IPv4 mapping block", addr, column->getName()); } else if constexpr (exception_mode == IPStringToNumExceptionMode::Default) { From 3af7e0a6fa21d570f78fcf9366c299e3199d2b77 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Wed, 31 May 2023 11:26:58 -0400 Subject: [PATCH 0205/1997] better exception message --- src/Functions/FunctionsConversion.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 4d4efc84df1..6d22fb661c3 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -310,7 +310,13 @@ struct ConvertImpl const uint8_t ip4_cidr[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}; const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); if (!matchIPv6Subnet(src, ip4_cidr, 96)) - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "IPv6 in column {} is not in IPv4 mapping block", named_from.column->getName()); + { + char addr[IPV6_MAX_TEXT_LENGTH + 1] {}; + char * paddr = addr; + formatIPv6(src, paddr); + + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "IPv6 {} in column {} is not in IPv4 mapping block", addr, named_from.column->getName()); + } uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); if constexpr (std::endian::native == std::endian::little) From 4ba08a5cbc960cace3dfcf32b3497855b3ffe6fd Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 31 May 2023 23:21:39 +0200 Subject: [PATCH 0206/1997] remove unused import --- src/DataTypes/Serializations/SerializationDate.h | 2 +- src/DataTypes/Serializations/SerializationDate32.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationDate.h b/src/DataTypes/Serializations/SerializationDate.h index 4d6a6fa36ec..f751b06fba6 100644 --- a/src/DataTypes/Serializations/SerializationDate.h +++ b/src/DataTypes/Serializations/SerializationDate.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB { diff --git a/src/DataTypes/Serializations/SerializationDate32.h b/src/DataTypes/Serializations/SerializationDate32.h index 6b6e5442240..49560fb6c7d 100644 --- a/src/DataTypes/Serializations/SerializationDate32.h +++ b/src/DataTypes/Serializations/SerializationDate32.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB { From 4c92bc7aadf354b713d6b8f3c24728d6172f1867 Mon Sep 17 00:00:00 2001 From: tpanetti Date: Wed, 31 May 2023 15:30:26 -0700 Subject: [PATCH 0207/1997] Fix incompatible ClickHouse -> MySQL types for compability mode This adjusts specific incompatible ClickHouse types to a format that can be read and interpreted by MySQL (Ex: Int128 -> text) --- src/DataTypes/DataTypeArray.h | 2 +- src/DataTypes/DataTypeLowCardinality.cpp | 3 +- src/DataTypes/DataTypeLowCardinality.h | 4 +- src/DataTypes/DataTypeNumberBase.cpp | 8 +- src/DataTypes/DataTypeString.h | 2 +- ...show_columns_mysql_compatibility.reference | 229 ++++++++++++++++++ ...02775_show_columns_mysql_compatibility.sh} | 23 +- 7 files changed, 256 insertions(+), 15 deletions(-) create mode 100644 tests/queries/0_stateless/02775_show_columns_mysql_compatibility.reference rename tests/queries/0_stateless/{02740_show_columns_mysql_compatibility.sh => 02775_show_columns_mysql_compatibility.sh} (80%) diff --git a/src/DataTypes/DataTypeArray.h b/src/DataTypes/DataTypeArray.h index 35462df9a4e..b031f411975 100644 --- a/src/DataTypes/DataTypeArray.h +++ b/src/DataTypes/DataTypeArray.h @@ -32,7 +32,7 @@ public: } const char * getMySQLName() const override { - return "string"; + return "text"; } bool canBeInsideNullable() const override diff --git a/src/DataTypes/DataTypeLowCardinality.cpp b/src/DataTypes/DataTypeLowCardinality.cpp index 8293455cabc..b1c32317015 100644 --- a/src/DataTypes/DataTypeLowCardinality.cpp +++ b/src/DataTypes/DataTypeLowCardinality.cpp @@ -28,7 +28,8 @@ namespace ErrorCodes } DataTypeLowCardinality::DataTypeLowCardinality(DataTypePtr dictionary_type_) - : dictionary_type(std::move(dictionary_type_)) + : dictionary_type(std::move(dictionary_type_)), + mysql_name(dictionary_type->getMySQLName()) { auto inner_type = dictionary_type; if (dictionary_type->isNullable()) diff --git a/src/DataTypes/DataTypeLowCardinality.h b/src/DataTypes/DataTypeLowCardinality.h index 6fd4344311c..bcc39f58ff7 100644 --- a/src/DataTypes/DataTypeLowCardinality.h +++ b/src/DataTypes/DataTypeLowCardinality.h @@ -11,6 +11,8 @@ class DataTypeLowCardinality : public IDataType { private: DataTypePtr dictionary_type; + std::string mysql_name; + public: explicit DataTypeLowCardinality(DataTypePtr dictionary_type_); @@ -22,7 +24,7 @@ public: return "LowCardinality(" + dictionary_type->getName() + ")"; } const char * getFamilyName() const override { return "LowCardinality"; } - const char * getMySQLName() const override { return "text"; } + const char * getMySQLName() const override { return mysql_name.c_str(); } TypeIndex getTypeId() const override { return TypeIndex::LowCardinality; } diff --git a/src/DataTypes/DataTypeNumberBase.cpp b/src/DataTypes/DataTypeNumberBase.cpp index cd5e73ac4a1..7d200de7996 100644 --- a/src/DataTypes/DataTypeNumberBase.cpp +++ b/src/DataTypes/DataTypeNumberBase.cpp @@ -36,14 +36,14 @@ const std::map DataTypeNumberBase::mysqlTypeMap = { {"UInt16", "smallint unsigned"}, {"UInt32", "mediumint unsigned"}, {"UInt64", "bigint unsigned"}, - {"UInt128", "bigint unsigned"}, - {"UInt256", "bigint unsigned"}, + {"UInt128", "text"}, + {"UInt256", "text"}, {"Int8", "tinyint"}, {"Int16", "smallint"}, {"Int32", "int"}, {"Int64", "bigint"}, - {"Int128", "bigint"}, - {"Int256", "bigint"}, + {"Int128", "text"}, + {"Int256", "text"}, {"Float32", "float"}, {"Float64", "double"}, }; diff --git a/src/DataTypes/DataTypeString.h b/src/DataTypes/DataTypeString.h index 3ac739fe68c..bddfb4ae287 100644 --- a/src/DataTypes/DataTypeString.h +++ b/src/DataTypes/DataTypeString.h @@ -22,7 +22,7 @@ public: } // FIXME: string can contain arbitrary bytes, not only UTF-8 sequences - const char * getMySQLName() const override { return "text"; } + const char * getMySQLName() const override { return "blob"; } TypeIndex getTypeId() const override { return type_id; } diff --git a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.reference b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.reference new file mode 100644 index 00000000000..96e542611c6 --- /dev/null +++ b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.reference @@ -0,0 +1,229 @@ +Drop tables if they exist +Create tab table +Create pseudo-random database name +Create tab duplicate table +Run MySQL test +field type null key default extra +aggregate_function text 0 NULL +array_value text 0 NULL +boolean_value tinyint unsigned 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value datetime 0 NULL +datetime_value datetime 0 NULL +decimal_value decimal 0 NULL +enum_value enum('apple', 'banana', 'orange') 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +int32 int 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value json 0 NULL +low_cardinality blob 0 NULL +low_cardinality_date datetime 0 NULL +map_value json 0 NULL +nested.nested_int text 0 NULL +nested.nested_string text 0 NULL +nullable_value int 0 NULL +string_value blob 0 NULL +tuple_value json 0 NULL +uint64 bigint unsigned 0 PRI SOR NULL +uuid_value char 0 NULL +field type null key default extra +aggregate_function text 0 NULL +array_value text 0 NULL +boolean_value tinyint unsigned 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value datetime 0 NULL +datetime_value datetime 0 NULL +decimal_value decimal 0 NULL +enum_value enum('apple', 'banana', 'orange') 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +int32 int 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value json 0 NULL +low_cardinality blob 0 NULL +low_cardinality_date datetime 0 NULL +map_value json 0 NULL +nested.nested_int text 0 NULL +nested.nested_string text 0 NULL +nullable_value int 0 NULL +string_value blob 0 NULL +tuple_value json 0 NULL +uint64 bigint unsigned 0 PRI SOR NULL +uuid_value char 0 NULL +field type null key default extra collation comment privileges +aggregate_function text 0 NULL NULL +array_value text 0 NULL NULL +boolean_value tinyint unsigned 0 NULL NULL +date32_value date 0 NULL NULL +date_value date 0 NULL NULL +datetime64_value datetime 0 NULL NULL +datetime_value datetime 0 NULL NULL +decimal_value decimal 0 NULL NULL +enum_value enum('apple', 'banana', 'orange') 0 NULL NULL +fixed_string_value text 0 NULL NULL +float32 float 0 NULL NULL +float64 double 0 NULL NULL +int32 int 0 NULL NULL +ipv4_value text 0 NULL NULL +ipv6_value text 0 NULL NULL +json_value json 0 NULL NULL +low_cardinality blob 0 NULL NULL +low_cardinality_date datetime 0 NULL NULL +map_value json 0 NULL NULL +nested.nested_int text 0 NULL NULL +nested.nested_string text 0 NULL NULL +nullable_value int 0 NULL NULL +string_value blob 0 NULL NULL +tuple_value json 0 NULL NULL +uint64 bigint unsigned 0 PRI SOR NULL NULL +uuid_value char 0 NULL NULL +field type null key default extra +int32 int 0 NULL +nested.nested_int text 0 NULL +uint64 bigint unsigned 0 PRI SOR NULL +field type null key default extra +aggregate_function text 0 NULL +array_value text 0 NULL +boolean_value tinyint unsigned 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value datetime 0 NULL +datetime_value datetime 0 NULL +decimal_value decimal 0 NULL +enum_value enum('apple', 'banana', 'orange') 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value json 0 NULL +low_cardinality blob 0 NULL +low_cardinality_date datetime 0 NULL +map_value json 0 NULL +nested.nested_string text 0 NULL +nullable_value int 0 NULL +string_value blob 0 NULL +tuple_value json 0 NULL +uuid_value char 0 NULL +field type null key default extra +int32 int 0 NULL +nested.nested_int text 0 NULL +uint64 bigint unsigned 0 PRI SOR NULL +field type null key default extra +aggregate_function text 0 NULL +array_value text 0 NULL +boolean_value tinyint unsigned 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value datetime 0 NULL +datetime_value datetime 0 NULL +decimal_value decimal 0 NULL +enum_value enum('apple', 'banana', 'orange') 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value json 0 NULL +low_cardinality blob 0 NULL +low_cardinality_date datetime 0 NULL +map_value json 0 NULL +nested.nested_string text 0 NULL +nullable_value int 0 NULL +string_value blob 0 NULL +tuple_value json 0 NULL +uuid_value char 0 NULL +field type null key default extra +int32 int 0 NULL +nested.nested_int text 0 NULL +uint64 bigint unsigned 0 PRI SOR NULL +field type null key default extra +aggregate_function text 0 NULL +field type null key default extra +aggregate_function text 0 NULL +array_value text 0 NULL +boolean_value tinyint unsigned 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value datetime 0 NULL +datetime_value datetime 0 NULL +decimal_value decimal 0 NULL +enum_value enum('apple', 'banana', 'orange') 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +int32 int 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value json 0 NULL +low_cardinality blob 0 NULL +low_cardinality_date datetime 0 NULL +map_value json 0 NULL +nested.nested_int text 0 NULL +nested.nested_string text 0 NULL +nullable_value int 0 NULL +string_value blob 0 NULL +tuple_value json 0 NULL +uint64 bigint unsigned 0 PRI SOR NULL +uuid_value char 0 NULL +field type null key default extra +aggregate_function text 0 NULL +array_value text 0 NULL +boolean_value tinyint unsigned 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value datetime 0 NULL +datetime_value datetime 0 NULL +decimal_value decimal 0 NULL +enum_value enum('apple', 'banana', 'orange') 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +int32 int 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value json 0 NULL +low_cardinality blob 0 NULL +low_cardinality_date datetime 0 NULL +map_value json 0 NULL +nested.nested_int text 0 NULL +nested.nested_string text 0 NULL +nullable_value int 0 NULL +string_value blob 0 NULL +tuple_value json 0 NULL +uint64 bigint unsigned 0 PRI SOR NULL +uuid_value char 0 NULL +field type null key default extra +aggregate_function text 0 NULL +array_value text 0 NULL +boolean_value tinyint unsigned 0 NULL +date32_value date 0 NULL +date_value date 0 NULL +datetime64_value datetime 0 NULL +datetime_value datetime 0 NULL +decimal_value decimal 0 NULL +enum_value enum('apple', 'banana', 'orange') 0 NULL +fixed_string_value text 0 NULL +float32 float 0 NULL +float64 double 0 NULL +int32 int 0 NULL +ipv4_value text 0 NULL +ipv6_value text 0 NULL +json_value json 0 NULL +low_cardinality blob 0 NULL +low_cardinality_date datetime 0 NULL +map_value json 0 NULL +nested.nested_int text 0 NULL +nested.nested_string text 0 NULL +nullable_value int 0 NULL +string_value blob 0 NULL +tuple_value json 0 NULL +uint64 bigint unsigned 0 PRI SOR NULL +uuid_value char 0 NULL diff --git a/tests/queries/0_stateless/02740_show_columns_mysql_compatibility.sh b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh similarity index 80% rename from tests/queries/0_stateless/02740_show_columns_mysql_compatibility.sh rename to tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh index 7f828d35679..a446c6e817e 100755 --- a/tests/queries/0_stateless/02740_show_columns_mysql_compatibility.sh +++ b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh @@ -13,8 +13,11 @@ ${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS tab" ${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS database_123456789abcde" ${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS database_123456789abcde.tab" +#${CLICKHOUSE_LOCAL} --query "SET allow_suspicious_low_cardinality_types = 1;" echo "Create tab table " -${CLICKHOUSE_LOCAL} --query " +${CLICKHOUSE_LOCAL} -n -q " + SET allow_suspicious_low_cardinality_types=1; + SET allow_experimental_object_type =1; CREATE TABLE tab ( uint64 UInt64, @@ -22,17 +25,19 @@ ${CLICKHOUSE_LOCAL} --query " float32 Float32, float64 Float64, decimal_value Decimal(10, 2), - boolean_value UInt8, -- Use 0 for false, 1 for true + boolean_value UInt8, string_value String, fixed_string_value FixedString(10), date_value Date, date32_value Date32, datetime_value DateTime, datetime64_value DateTime64(3), - json_value String, -- Store JSON as a string + json_value JSON, uuid_value UUID, enum_value Enum8('apple' = 1, 'banana' = 2, 'orange' = 3), low_cardinality LowCardinality(String), + low_cardinality_date LowCardinality(DateTime), + aggregate_function AggregateFunction(sum, Int32), array_value Array(Int32), map_value Map(String, Int32), tuple_value Tuple(Int32, String), @@ -53,7 +58,9 @@ echo "Create pseudo-random database name" ${CLICKHOUSE_LOCAL} --query "CREATE DATABASE database_123456789abcde;" echo "Create tab duplicate table" -${CLICKHOUSE_LOCAL} --query " +${CLICKHOUSE_LOCAL} -n -q " + SET allow_suspicious_low_cardinality_types=1; + SET allow_experimental_object_type =1; CREATE TABLE database_123456789abcde.tab ( uint64 UInt64, @@ -61,17 +68,19 @@ ${CLICKHOUSE_LOCAL} --query " float32 Float32, float64 Float64, decimal_value Decimal(10, 2), - boolean_value UInt8, -- Use 0 for false, 1 for true + boolean_value UInt8, string_value String, fixed_string_value FixedString(10), date_value Date, date32_value Date32, datetime_value DateTime, datetime64_value DateTime64(3), - json_value String, -- Store JSON as a string + json_value JSON, uuid_value UUID, enum_value Enum8('apple' = 1, 'banana' = 2, 'orange' = 3), low_cardinality LowCardinality(String), + low_cardinality_date LowCardinality(DateTime), + aggregate_function AggregateFunction(sum, Int32), array_value Array(Int32), map_value Map(String, Int32), tuple_value Tuple(Int32, String), @@ -109,7 +118,7 @@ EOT # Now run the MySQL test script on the ClickHouse DB echo "Run MySQL test" -mysql --user="$USER" --password="$PASSWORD" --host="$HOST" --port="$PORT" < $TEMP_FILE +${MYSQL_CLIENT} --user="$USER" --password="$PASSWORD" --host="$HOST" --port="$PORT" < $TEMP_FILE # Clean up the temp file rm $TEMP_FILE From 0708caeb770e88a4805e084eeb01465c85fa45e2 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 1 Jun 2023 09:01:01 +0000 Subject: [PATCH 0208/1997] Fix style --- src/Common/Config/ConfigProcessor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index b6db53018f4..055a497fb38 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -44,6 +44,7 @@ namespace ErrorCodes { extern const int FILE_DOESNT_EXIST; extern const int CANNOT_LOAD_CONFIG; + extern const int BAD_ARGUMENTS; } /// Get method for string name. Throw exception for wrong name From 2ccec017717e57b0eb1bdfb573f6f09e5201446d Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 1 Jun 2023 09:53:31 +0000 Subject: [PATCH 0209/1997] Set correct memory size for encrypt/decrypt --- src/Common/Config/ConfigProcessor.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 055a497fb38..99bea019c3b 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -191,13 +191,12 @@ std::string ConfigProcessor::encryptValue(const std::string & codec_name, const { auto codec = DB::CompressionCodecEncrypted(getEncryptionMethod(codec_name)); - DB::Memory<> memory1; - memory1.resize(value.size() + codec.getAdditionalSizeAtTheEndOfBuffer() + codec.getHeaderSize()+100); - auto bytes_written = codec.compress(value.data(), static_cast(value.size()), memory1.data()); - std::string encrypted_value = std::string(memory1.data(), bytes_written); + DB::Memory<> memory; + memory.resize(codec.getCompressedReserveSize(static_cast(value.size()))); + auto bytes_written = codec.compress(value.data(), static_cast(value.size()), memory.data()); + std::string encrypted_value = std::string(memory.data(), bytes_written); std::string hex_value; boost::algorithm::hex(encrypted_value.begin(), encrypted_value.end(), std::back_inserter(hex_value)); - LOG_DEBUG(log, "Encrypted value: '{}'.", hex_value); return hex_value; } @@ -217,10 +216,9 @@ std::string ConfigProcessor::decryptValue(const std::string & codec_name, const throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read encrypted text, check for valid characters [0-9a-fA-F] and length"); } - memory.resize(codec.readDecompressedBlockSize(encrypted_value.data()) + codec.getAdditionalSizeAtTheEndOfBuffer()); + memory.resize(codec.readDecompressedBlockSize(encrypted_value.data())); codec.decompress(encrypted_value.data(), static_cast(encrypted_value.size()), memory.data()); std::string decrypted_value = std::string(memory.data(), memory.size()); - LOG_DEBUG(log, "Decrypted value '{}'", decrypted_value); return decrypted_value; } From 0b4d29ff98916d3ab1f2ea24f4254897b18351cf Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 1 Jun 2023 12:19:00 +0000 Subject: [PATCH 0210/1997] Add tests for disks --- .../integration/test_keeper_disks/__init__.py | 0 .../configs/enable_keeper.xml | 53 ++++ tests/integration/test_keeper_disks/test.py | 260 ++++++++++++++++++ 3 files changed, 313 insertions(+) create mode 100644 tests/integration/test_keeper_disks/__init__.py create mode 100644 tests/integration/test_keeper_disks/configs/enable_keeper.xml create mode 100644 tests/integration/test_keeper_disks/test.py diff --git a/tests/integration/test_keeper_disks/__init__.py b/tests/integration/test_keeper_disks/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_disks/configs/enable_keeper.xml b/tests/integration/test_keeper_disks/configs/enable_keeper.xml new file mode 100644 index 00000000000..5814979229c --- /dev/null +++ b/tests/integration/test_keeper_disks/configs/enable_keeper.xml @@ -0,0 +1,53 @@ + + + + + local + /var/lib/clickhouse/coordination/logs/ + + + s3_plain + http://minio1:9001/root/logs/ + minio + minio123 + + + local + /var/lib/clickhouse/coordination/snapshots/ + + + s3_plain + http://minio1:9001/root/snapshots/ + minio + minio123 + + + + + + false + 9181 + 1 + false + + + 5000 + 10000 + trace + 10 + 10 + 1 + 3 + + + + + + + 1 + node + 9234 + + + + \ No newline at end of file diff --git a/tests/integration/test_keeper_disks/test.py b/tests/integration/test_keeper_disks/test.py new file mode 100644 index 00000000000..e978df18bab --- /dev/null +++ b/tests/integration/test_keeper_disks/test.py @@ -0,0 +1,260 @@ +#!/usr/bin/env python3 +import pytest +from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils +from minio.deleteobjects import DeleteObject + +import os + +CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", main_configs=["configs/enable_keeper.xml"], stay_alive=True, with_minio=True +) + +from kazoo.client import KazooClient, KazooState + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def get_fake_zk(nodename, timeout=30.0): + _fake_zk_instance = KazooClient( + hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout + ) + _fake_zk_instance.start() + return _fake_zk_instance + + +def stop_zk(zk): + try: + if zk: + zk.stop() + zk.close() + except: + pass + + +def stop_clickhouse(cluster, cleanup_disks): + node.stop_clickhouse() + + if not cleanup_disks: + return + + node.exec_in_container(["rm", "-rf", "/var/lib/clickhouse/coordination/logs"]) + node.exec_in_container( + ["rm", "-rf", "/var/lib/clickhouse/coordination/snapshots"] + ) + + s3_objects = list_s3_objects(cluster, prefix="") + if len(s3_objects) == 0: + return + + assert ( + len( + list( + cluster.minio_client.remove_objects( + cluster.minio_bucket, + [DeleteObject(obj) for obj in s3_objects], + ) + ) + ) + == 0 + ) + + +def setup_storage(cluster, storage_config, cleanup_disks): + stop_clickhouse(cluster, cleanup_disks) + node.copy_file_to_container( + os.path.join(CURRENT_TEST_DIR, "configs/enable_keeper.xml"), + "/etc/clickhouse-server/config.d/enable_keeper.xml", + ) + node.replace_in_config( + "/etc/clickhouse-server/config.d/enable_keeper.xml", + "", + storage_config, + ) + node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) + + +def setup_local_storage(cluster): + setup_storage( + cluster, + "log_local<\\/log_storage_disk>" + "snapshot_local<\\/snapshot_storage_disk>", + cleanup_disks=True, + ) + + +def list_s3_objects(cluster, prefix=""): + minio = cluster.minio_client + prefix_len = len(prefix) + return [ + obj.object_name[prefix_len:] + for obj in minio.list_objects( + cluster.minio_bucket, prefix=prefix, recursive=True + ) + ] + + +def get_local_files(path): + files = node.exec_in_container(["ls", path]).strip().split("\n") + files.sort() + return files + + +def get_local_logs(): + return get_local_files("/var/lib/clickhouse/coordination/logs") + + +def get_local_snapshots(): + return get_local_files("/var/lib/clickhouse/coordination/snapshots") + + +def test_logs_with_disks(started_cluster): + setup_local_storage(started_cluster) + + node_zk = get_fake_zk("node") + try: + node_zk.create("/test") + for _ in range(30): + node_zk.create("/test/somenode", b"somedata", sequence=True) + + stop_zk(node_zk) + + previous_log_files = get_local_logs() + + setup_storage( + started_cluster, + "log_s3_plain<\\/log_storage_disk>" + "log_local<\\/latest_log_storage_disk>" + "snapshot_local<\\/snapshot_storage_disk>", + cleanup_disks=False, + ) + + # all but the latest log should be on S3 + s3_log_files = list_s3_objects(started_cluster, "logs/") + assert set(s3_log_files) == set(previous_log_files[:-1]) + local_log_files = get_local_logs() + assert len(local_log_files) == 1 + assert local_log_files[0] == previous_log_files[-1] + + previous_log_files = s3_log_files + local_log_files + + node_zk = get_fake_zk("node") + + for _ in range(30): + node_zk.create("/test/somenode", b"somedata", sequence=True) + + stop_zk(node_zk) + + log_files = list_s3_objects(started_cluster, "logs/") + local_log_files = get_local_logs() + assert len(local_log_files) == 1 + + log_files.extend(local_log_files) + assert set(log_files) != previous_log_files + + previous_log_files = log_files + + setup_storage( + started_cluster, + "log_s3_plain<\\/old_log_storage_disk>" + "log_local<\\/log_storage_disk>" + "snapshot_local<\\/snapshot_storage_disk>", + cleanup_disks=False, + ) + + local_log_files = get_local_logs() + assert set(local_log_files) == set(previous_log_files) + + node_zk = get_fake_zk("node") + + for child in node_zk.get_children("/test"): + assert node_zk.get(f"/test/{child}")[0] == b"somedata" + + finally: + stop_zk(node_zk) + + +def test_snapshots_with_disks(started_cluster): + setup_local_storage(started_cluster) + + node_zk = get_fake_zk("node") + try: + node_zk.create("/test2") + for _ in range(30): + node_zk.create("/test2/somenode", b"somedata", sequence=True) + + stop_zk(node_zk) + + snapshot_idx = keeper_utils.send_4lw_cmd(cluster, node, "csnp") + node.wait_for_log_line( + f"Created persistent snapshot {snapshot_idx}", look_behind_lines=1000 + ) + + previous_snapshot_files = get_local_snapshots() + + setup_storage( + started_cluster, + "snapshot_s3_plain<\\/snapshot_storage_disk>" + "snapshot_local<\\/latest_snapshot_storage_disk>" + "log_local<\\/log_storage_disk>", + cleanup_disks=False, + ) + + ## all but the latest log should be on S3 + s3_snapshot_files = list_s3_objects(started_cluster, "snapshots/") + assert set(s3_snapshot_files) == set(previous_snapshot_files[:-1]) + local_snapshot_files = get_local_snapshots() + assert len(local_snapshot_files) == 1 + assert local_snapshot_files[0] == previous_snapshot_files[-1] + + previous_snapshot_files = s3_snapshot_files + local_snapshot_files + + node_zk = get_fake_zk("node") + + for _ in range(30): + node_zk.create("/test2/somenode", b"somedata", sequence=True) + + stop_zk(node_zk) + + snapshot_idx = keeper_utils.send_4lw_cmd(cluster, node, "csnp") + node.wait_for_log_line( + f"Created persistent snapshot {snapshot_idx}", look_behind_lines=1000 + ) + + snapshot_files = list_s3_objects(started_cluster, "snapshots/") + local_snapshot_files = get_local_snapshots() + assert len(local_snapshot_files) == 1 + + snapshot_files.extend(local_snapshot_files) + + previous_snapshot_files = snapshot_files + + setup_storage( + started_cluster, + "snapshot_s3_plain<\\/old_snapshot_storage_disk>" + "snapshot_local<\\/snapshot_storage_disk>" + "log_local<\\/log_storage_disk>", + cleanup_disks=False, + ) + + local_snapshot_files = get_local_snapshots() + assert set(local_snapshot_files) == set(previous_snapshot_files) + + node_zk = get_fake_zk("node") + + for child in node_zk.get_children("/test2"): + assert node_zk.get(f"/test2/{child}")[0] == b"somedata" + + finally: + stop_zk(node_zk) From a9eb2c3c4eb8354753e611c6eed71fa9b672077c Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 1 Jun 2023 12:31:06 +0000 Subject: [PATCH 0211/1997] use multiple disks for jepsne --- .../resources/keeper_config.xml | 22 +++++++++++++++++++ .../src/jepsen/clickhouse/constants.clj | 2 ++ .../src/jepsen/clickhouse/keeper/utils.clj | 6 +++-- 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/tests/jepsen.clickhouse/resources/keeper_config.xml b/tests/jepsen.clickhouse/resources/keeper_config.xml index 2ab747fbd71..52f2a0dbdc2 100644 --- a/tests/jepsen.clickhouse/resources/keeper_config.xml +++ b/tests/jepsen.clickhouse/resources/keeper_config.xml @@ -8,6 +8,28 @@ never + + + + + local + /var/lib/clickhouse/coordination/logs/ + + + local + /var/lib/clickhouse/coordination/latest_log/ + + + local + /var/lib/clickhouse/coordination/snapshots/ + + + local + /var/lib/clickhouse/coordination/latest_snapshot/ + + + + 9181 {id} diff --git a/tests/jepsen.clickhouse/src/jepsen/clickhouse/constants.clj b/tests/jepsen.clickhouse/src/jepsen/clickhouse/constants.clj index 887fc04265c..2a9a7ef6d27 100644 --- a/tests/jepsen.clickhouse/src/jepsen/clickhouse/constants.clj +++ b/tests/jepsen.clickhouse/src/jepsen/clickhouse/constants.clj @@ -14,7 +14,9 @@ (def coordination-data-dir (str data-dir "/coordination")) (def coordination-snapshots-dir (str coordination-data-dir "/snapshots")) +(def coordination-latest-snapshot-dir (str coordination-data-dir "/latest_snapshot")) (def coordination-logs-dir (str coordination-data-dir "/logs")) +(def coordination-latest_log-dir (str coordination-data-dir "/latest_log")) (def stderr-file (str logs-dir "/stderr.log")) diff --git a/tests/jepsen.clickhouse/src/jepsen/clickhouse/keeper/utils.clj b/tests/jepsen.clickhouse/src/jepsen/clickhouse/keeper/utils.clj index b882af77758..869313079d0 100644 --- a/tests/jepsen.clickhouse/src/jepsen/clickhouse/keeper/utils.clj +++ b/tests/jepsen.clickhouse/src/jepsen/clickhouse/keeper/utils.clj @@ -146,6 +146,8 @@ :-- :--logger.log (str logs-dir "/clickhouse-keeper.log") :--logger.errorlog (str logs-dir "/clickhouse-keeper.err.log") - :--keeper_server.snapshot_storage_path coordination-snapshots-dir - :--keeper_server.log_storage_path coordination-logs-dir + :--keeper_server.snapshot_storage_disk "snapshot_local" + :--keeper_server.latest_snapshot_storage_disk "latest_snapshot_local" + :--keeper_server.log_storage_disk "log_local" + :--keeper_server.latest_log_storage_disk "latest_log_local" :--path coordination-data-dir)) From d5add614daa2e6f7f0a18eaada22f5c43a057934 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 1 Jun 2023 12:48:45 +0000 Subject: [PATCH 0212/1997] Add text memo for encrypt_decrypt --- src/Common/examples/encrypt_decrypt.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/Common/examples/encrypt_decrypt.cpp b/src/Common/examples/encrypt_decrypt.cpp index cd48963c47a..542e173deb9 100644 --- a/src/Common/examples/encrypt_decrypt.cpp +++ b/src/Common/examples/encrypt_decrypt.cpp @@ -3,6 +3,12 @@ #include #include +/** This test program encrypts or decrypts text values using AES_128_GCM_SIV or AES_256_GCM_SIV codecs. + * Keys for codecs are loaded from section of configuration file. + * + * How to use: + * ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV test + */ int main(int argc, char ** argv) { @@ -10,17 +16,22 @@ int main(int argc, char ** argv) { if (argc != 5) { - std::cerr << "usage: " << argv[0] << " path action codec value" << std::endl; + std::cerr << "Usage:" << std::endl + << " " << argv[0] << " path action codec value" << std::endl + << "path: path to configuration file." << std::endl + << "action: -e for encryption and -d for decryption." << std::endl + << "codec: AES_128_GCM_SIV or AES_256_GCM_SIV." << std::endl << std::endl + << "Example:" << std::endl + << " ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV test"; return 3; } std::string action = argv[2]; std::string codec_name = argv[3]; std::string value = argv[4]; + DB::ConfigProcessor processor(argv[1], false, true); - auto loaded_config = processor.loadConfig(); - DB::CompressionCodecEncrypted::Configuration::instance().tryLoad(*loaded_config.configuration, "encryption_codecs"); if (action == "-e") From e8442b7a88467aa68a87c0abfa3983131c7b8c75 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 1 Jun 2023 12:54:00 +0000 Subject: [PATCH 0213/1997] Add finalize calls --- src/Coordination/Changelog.cpp | 9 ++++++++- src/Coordination/KeeperSnapshotManager.cpp | 6 ++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index bcce9982e04..97f2b437377 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -43,7 +43,8 @@ void moveFileBetweenDisks(DiskPtr disk_from, ChangelogFileDescriptionPtr descrip auto from_path = fs::path(description->path); auto tmp_changelog_name = from_path.parent_path() / (std::string{tmp_prefix} + from_path.filename().string()); { - disk_to->writeFile(tmp_changelog_name); + auto buf = disk_to->writeFile(tmp_changelog_name); + buf->finalize(); } disk_from->copyFile(from_path, *disk_to, path_to, {}); disk_to->removeFile(tmp_changelog_name); @@ -342,9 +343,15 @@ private: } if (log_file_settings.compress_logs) + { compressed_buffer.reset(); + } else + { + chassert(file_buf); + file_buf->finalize(); file_buf.reset(); + } } WriteBuffer & getBuffer() diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index 4b9a34c07e9..8dd6d27c379 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -43,7 +43,8 @@ namespace auto from_path = fs::path(path_from); auto tmp_snapshot_name = from_path.parent_path() / (std::string{tmp_prefix} + from_path.filename().string()); { - disk_to->writeFile(tmp_snapshot_name); + auto buf = disk_to->writeFile(tmp_snapshot_name); + buf->finalize(); } disk_from->copyFile(from_path, *disk_to, path_to, {}); disk_to->removeFile(tmp_snapshot_name); @@ -784,7 +785,8 @@ SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStor auto disk = getLatestSnapshotDisk(); { - disk->writeFile(tmp_snapshot_file_name); + auto buf = disk->writeFile(tmp_snapshot_file_name); + buf->finalize(); } auto writer = disk->writeFile(snapshot_file_name); From e269235dbcf32f7e507370e1bff74a202a33446c Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 1 Jun 2023 13:09:21 +0000 Subject: [PATCH 0214/1997] Make decryptRecursive() go through element nodes only --- src/Common/Config/ConfigProcessor.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 99bea019c3b..df25a9a3825 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -231,8 +231,6 @@ void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) Element & element = dynamic_cast(*node); if (element.hasAttribute("encryption_codec")) { - LOG_DEBUG(log, "Encrypted node <{}>", node->nodeName()); - const NodeListPtr children = element.childNodes(); if (children->length() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encrypted node {} should have only one text node", node->nodeName()); @@ -244,10 +242,8 @@ void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) auto encryption_codec = element.getAttribute("encryption_codec"); text_node->setNodeValue(decryptValue(encryption_codec, text_node->getNodeValue())); } + decryptRecursive(node); } - - decryptRecursive(node); - node = node->nextSibling(); } } From 92ee24acd33dc6f62c369dcbd10c90f126b72613 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 1 Jun 2023 13:10:50 +0000 Subject: [PATCH 0215/1997] Automatic style fix --- tests/integration/test_keeper_disks/test.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/integration/test_keeper_disks/test.py b/tests/integration/test_keeper_disks/test.py index e978df18bab..11bb215be54 100644 --- a/tests/integration/test_keeper_disks/test.py +++ b/tests/integration/test_keeper_disks/test.py @@ -49,9 +49,7 @@ def stop_clickhouse(cluster, cleanup_disks): return node.exec_in_container(["rm", "-rf", "/var/lib/clickhouse/coordination/logs"]) - node.exec_in_container( - ["rm", "-rf", "/var/lib/clickhouse/coordination/snapshots"] - ) + node.exec_in_container(["rm", "-rf", "/var/lib/clickhouse/coordination/snapshots"]) s3_objects = list_s3_objects(cluster, prefix="") if len(s3_objects) == 0: From ffd4f7f196db8f827ea3f8cc5edeb6b0a1082d72 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 1 Jun 2023 14:39:01 +0000 Subject: [PATCH 0216/1997] Add better support for state disks --- src/Coordination/KeeperDispatcher.cpp | 2 -- src/Coordination/KeeperStateManager.cpp | 33 ++++++++++++++++++++----- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index a5b0f0b2c01..9d9df5c7f30 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -38,8 +38,6 @@ namespace ProfileEvents extern const Event MemoryAllocatorPurgeTimeMicroseconds; } -namespace fs = std::filesystem; - namespace DB { diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index 33e62684d46..450fd04b61d 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -23,6 +23,8 @@ namespace ErrorCodes namespace { +const std::string copy_lock_file = "STATE_COPY_LOCK"; + bool isLocalhost(const std::string & hostname) { try @@ -324,7 +326,13 @@ void KeeperStateManager::save_state(const nuraft::srv_state & state) auto disk = getStateFileDisk(); if (disk->exists(server_state_file_name)) - disk->moveFile(server_state_file_name, old_path); + { + auto buf = disk->writeFile(copy_lock_file); + buf->finalize(); + disk->copyFile(server_state_file_name, *disk, old_path); + disk->removeFile(copy_lock_file); + disk->removeFile(old_path); + } auto server_state_file = disk->writeFile(server_state_file_name); auto buf = state.serialize(); @@ -339,6 +347,7 @@ void KeeperStateManager::save_state(const nuraft::srv_state & state) server_state_file->write(reinterpret_cast(buf->data_begin()), buf->size()); server_state_file->sync(); + server_state_file->finalize(); disk->removeFileIfExists(old_path); } @@ -417,13 +426,25 @@ nuraft::ptr KeeperStateManager::read_state() if (disk->exists(old_path)) { - auto state = try_read_file(old_path); - if (state) + if (disk->exists(copy_lock_file)) { - disk->moveFile(old_path, server_state_file_name); - return state; + disk->removeFile(old_path); + disk->removeFile(copy_lock_file); } - disk->removeFile(old_path); + else + { + auto state = try_read_file(old_path); + if (state) + { + disk->moveFile(old_path, server_state_file_name); + return state; + } + disk->removeFile(old_path); + } + } + else if (disk->exists(copy_lock_file)) + { + disk->removeFile(copy_lock_file); } LOG_WARNING(logger, "No state was read"); From 92859ebb3baad37e6538118f5e2c5c8016754b7f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 1 Jun 2023 21:15:15 +0000 Subject: [PATCH 0217/1997] Fixing more tests. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 2 +- src/Functions/in.cpp | 12 +- src/Interpreters/ActionsVisitor.cpp | 4 +- src/Interpreters/GlobalSubqueriesVisitor.h | 2 +- src/Interpreters/PreparedSets.cpp | 119 ++++++++++-------- src/Interpreters/PreparedSets.h | 25 ++-- src/Interpreters/Set.cpp | 30 ++++- src/Interpreters/Set.h | 7 +- src/Planner/CollectSets.cpp | 2 +- .../CreateSetAndFilterOnTheFlyStep.cpp | 2 +- src/Processors/QueryPlan/CreatingSetsStep.cpp | 8 +- src/Processors/QueryPlan/CreatingSetsStep.h | 6 +- .../Transforms/CreatingSetsTransform.cpp | 11 +- .../Transforms/CreatingSetsTransform.h | 6 +- src/QueryPipeline/QueryPipelineBuilder.cpp | 10 +- src/QueryPipeline/QueryPipelineBuilder.h | 10 +- src/Storages/MergeTree/RPNBuilder.cpp | 53 ++++---- src/Storages/StorageSet.cpp | 4 +- .../01786_explain_merge_tree.reference | 4 +- 19 files changed, 195 insertions(+), 122 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 05b3a545dca..b39aff86d32 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -5141,7 +5141,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi SizeLimits size_limits_for_set = {settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode}; - auto set = std::make_shared(size_limits_for_set, true /*fill_set_elements*/, settings.transform_null_in); + auto set = std::make_shared(size_limits_for_set, false /*fill_set_elements*/, 0, settings.transform_null_in); set->setHeader(result_block.cloneEmpty().getColumnsWithTypeAndName()); set->insertFromBlock(result_block.getColumnsWithTypeAndName()); diff --git a/src/Functions/in.cpp b/src/Functions/in.cpp index 6a88a413c63..0e576b92aad 100644 --- a/src/Functions/in.cpp +++ b/src/Functions/in.cpp @@ -55,13 +55,9 @@ public: /// It is needed to perform type analysis without creation of set. static constexpr auto name = FunctionInName::name; - FunctionIn(SizeLimits size_limits_, bool transform_null_in_) - : size_limits(std::move(size_limits_)), transform_null_in(transform_null_in_) {} - - static FunctionPtr create(ContextPtr context) + static FunctionPtr create(ContextPtr) { - const auto & settings = context->getSettingsRef(); - return std::make_shared(FutureSet::getSizeLimitsForSet(settings, false), settings.transform_null_in); + return std::make_shared(); } String getName() const override @@ -182,10 +178,6 @@ public: return res; } - -private: - SizeLimits size_limits; - bool transform_null_in; }; template diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 142b6f73b75..59bbc74ca3a 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1435,7 +1435,7 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool // String set_id = right_in_operand->getColumnName(); //bool transform_null_in = data.getContext()->getSettingsRef().transform_null_in; SubqueryForSet subquery_for_set; // = data.prepared_sets->createOrGetSubquery(set_id, set_key, data.set_size_limit, transform_null_in); - subquery_for_set.key = right_in_operand->getColumnName(); + subquery_for_set.key = set_key.toString(); //right_in_operand->getColumnName(); /** The following happens for GLOBAL INs or INs: * - in the addExternalStorage function, the IN (SELECT ...) subquery is replaced with IN _data1, @@ -1450,7 +1450,7 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool subquery_for_set.createSource(*interpreter); } - return data.prepared_sets->addFromSubquery(set_key, std::move(subquery_for_set), std::move(external_table_set)); + return data.prepared_sets->addFromSubquery(set_key, std::move(subquery_for_set), data.getContext()->getSettingsRef(), std::move(external_table_set)); } else { diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 624b33ea66b..cbdfb826f85 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -197,7 +197,7 @@ public: subquery_for_set.createSource(*interpreter); //std::cerr << reinterpret_cast(prepared_sets.get()) << std::endl; - auto future_set = prepared_sets->addFromSubquery(set_key, std::move(subquery_for_set), nullptr); + auto future_set = prepared_sets->addFromSubquery(set_key, std::move(subquery_for_set), getContext()->getSettingsRef(), nullptr); // std::cerr << "... Future set " << reinterpret_cast(external_storage_holder.get()) << " " << reinterpret_cast(future_set.get()) << std::endl; external_storage_holder->future_set = std::move(future_set); } diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index c71273e0baa..1e475e8403d 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -132,10 +132,10 @@ FutureSetPtr PreparedSets::addFromTuple(const PreparedSetKey & key, Block block, return it->second; } -FutureSetPtr PreparedSets::addFromSubquery(const PreparedSetKey & key, SubqueryForSet subquery, FutureSetPtr external_table_set) +FutureSetPtr PreparedSets::addFromSubquery(const PreparedSetKey & key, SubqueryForSet subquery, const Settings & settings, FutureSetPtr external_table_set) { auto id = subquery.key; - auto from_subquery = std::make_shared(std::move(subquery), std::move(external_table_set)); + auto from_subquery = std::make_shared(std::move(subquery), std::move(external_table_set), settings.transform_null_in); auto [it, inserted] = sets.emplace(key, from_subquery); if (!inserted) @@ -210,6 +210,8 @@ std::variant, SharedSet> PreparedSetsCache::findOrPromiseTo { std::lock_guard lock(cache_mutex); + // std::cerr << "PreparedSetsCache::findOrPromiseToBuild " << key << "\n" << StackTrace().toString() << std::endl; + auto it = cache.find(key); if (it != cache.end()) { @@ -231,10 +233,10 @@ SetPtr FutureSetFromSubquery::buildOrderedSetInplace(const ContextPtr & context) if (!context->getSettingsRef().use_index_for_in_with_subqueries) return nullptr; - if (set) + if (subquery.set) { - if (set->hasExplicitSetElements()) - return set; + if (subquery.set->hasExplicitSetElements()) + return subquery.set; return nullptr; } @@ -242,7 +244,7 @@ SetPtr FutureSetFromSubquery::buildOrderedSetInplace(const ContextPtr & context) // std::cerr << "... external_table_set " << reinterpret_cast(external_table_set.get()) << std::endl; if (external_table_set) - return set = external_table_set->buildOrderedSetInplace(context); + return subquery.set = external_table_set->buildOrderedSetInplace(context); auto plan = buildPlan(context, true); if (!plan) @@ -255,37 +257,44 @@ SetPtr FutureSetFromSubquery::buildOrderedSetInplace(const ContextPtr & context) CompletedPipelineExecutor executor(pipeline); executor.execute(); - return set; + subquery.set->checkIsCreated(); + + return subquery.set; +} + +static SizeLimits getSizeLimitsForSet(const Settings & settings) +{ + return SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode); } std::unique_ptr FutureSetFromSubquery::buildPlan(const ContextPtr & context, bool create_ordered_set) { - if (set) + if (subquery.set) return nullptr; // std::cerr << StackTrace().toString() << std::endl; - auto set_cache = context->getPreparedSetsCache(); - if (set_cache) - { - auto from_cache = set_cache->findOrPromiseToBuild(subquery.key); - if (from_cache.index() == 0) - { - subquery.promise_to_fill_set = std::move(std::get<0>(from_cache)); - } - else - { - LOG_TRACE(&Poco::Logger::get("FutureSetFromSubquery"), "Waiting for set, key: {}", subquery.key); - set = std::get<1>(from_cache).get(); - return nullptr; - } - } + // auto set_cache = context->getPreparedSetsCache(); + // if (set_cache) + // { + // auto from_cache = set_cache->findOrPromiseToBuild(subquery.key); + // if (from_cache.index() == 0) + // { + // subquery.promise_to_fill_set = std::move(std::get<0>(from_cache)); + // } + // else + // { + // LOG_TRACE(&Poco::Logger::get("FutureSetFromSubquery"), "Waiting for set, key: {}", subquery.key); + // set = std::get<1>(from_cache).get(); + // return nullptr; + // } + // } const auto & settings = context->getSettingsRef(); - auto size_limits = getSizeLimitsForSet(settings, create_ordered_set); + auto size_limits = getSizeLimitsForSet(settings); - subquery.set = set = std::make_shared(size_limits, create_ordered_set, settings.transform_null_in); + subquery.set = std::make_shared(size_limits, create_ordered_set, settings.use_index_for_in_with_subqueries_max_values, settings.transform_null_in); auto plan = subquery.detachSource(); auto description = subquery.key; @@ -297,7 +306,8 @@ std::unique_ptr FutureSetFromSubquery::buildPlan(const ContextPtr & c auto creating_set = std::make_unique( plan->getCurrentDataStream(), description, - std::move(subquery), + subquery, + shared_from_this(), SizeLimits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode), context); creating_set->setStepDescription("Create set for subquery"); @@ -305,31 +315,25 @@ std::unique_ptr FutureSetFromSubquery::buildPlan(const ContextPtr & c return plan; } +// static SizeLimits getSizeLimitsForOrderedSet(const Settings & settings) +// { +// if (settings.use_index_for_in_with_subqueries_max_values && +// settings.use_index_for_in_with_subqueries_max_values < settings.max_rows_in_set) +// return getSizeLimitsForUnorderedSet(settings); -static SizeLimits getSizeLimitsForUnorderedSet(const Settings & settings) -{ - return SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode); -} +// return SizeLimits(settings.use_index_for_in_with_subqueries_max_values, settings.max_bytes_in_set, OverflowMode::BREAK); +// } -static SizeLimits getSizeLimitsForOrderedSet(const Settings & settings) -{ - if (settings.use_index_for_in_with_subqueries_max_values && - settings.use_index_for_in_with_subqueries_max_values < settings.max_rows_in_set) - return getSizeLimitsForUnorderedSet(settings); - - return SizeLimits(settings.use_index_for_in_with_subqueries_max_values, settings.max_bytes_in_set, OverflowMode::BREAK); -} - -SizeLimits FutureSet::getSizeLimitsForSet(const Settings & settings, bool ordered_set) -{ - return ordered_set ? getSizeLimitsForOrderedSet(settings) : getSizeLimitsForUnorderedSet(settings); -} +// SizeLimits FutureSet::getSizeLimitsForSet(const Settings & settings, bool ordered_set) +// { +// return ordered_set ? getSizeLimitsForOrderedSet(settings) : getSizeLimitsForUnorderedSet(settings); +// } FutureSetFromTuple::FutureSetFromTuple(Block block, const Settings & settings) { bool create_ordered_set = false; - auto size_limits = getSizeLimitsForSet(settings, create_ordered_set); - set = std::make_shared(size_limits, create_ordered_set, settings.transform_null_in); + auto size_limits = getSizeLimitsForSet(settings); + set = std::make_shared(size_limits, create_ordered_set, settings.use_index_for_in_with_subqueries_max_values, settings.transform_null_in); set->setHeader(block.cloneEmpty().getColumnsWithTypeAndName()); Columns columns; @@ -345,8 +349,16 @@ FutureSetFromTuple::FutureSetFromTuple(Block block, const Settings & settings) //block(std::move(block_)) } -FutureSetFromSubquery::FutureSetFromSubquery(SubqueryForSet subquery_, FutureSetPtr external_table_set_) - : subquery(std::move(subquery_)), external_table_set(std::move(external_table_set_)) {} +FutureSetFromSubquery::FutureSetFromSubquery(SubqueryForSet subquery_, FutureSetPtr external_table_set_, bool transform_null_in_) + : subquery(std::move(subquery_)), external_table_set(std::move(external_table_set_)), transform_null_in(transform_null_in_) {} + +DataTypes FutureSetFromSubquery::getTypes() const +{ + if (subquery.set) + return subquery.set->getElementsTypes(); + + return Set::getElementTypes(subquery.source->getCurrentDataStream().header.getColumnsWithTypeAndName(), transform_null_in); +} FutureSetFromStorage::FutureSetFromStorage(SetPtr set_) : set(std::move(set_)) {} @@ -356,13 +368,14 @@ SetPtr FutureSetFromTuple::buildOrderedSetInplace(const ContextPtr & context) return set; const auto & settings = context->getSettingsRef(); - auto limits = getSizeLimitsForSet(settings, true); + size_t max_values = settings.use_index_for_in_with_subqueries_max_values; + bool too_many_values = max_values && max_values < set->getTotalRowCount(); + if (!too_many_values) + { + set->initSetElements(); + set->appendSetElements(set_key_columns); + } - if (!limits.check(set->getTotalRowCount(), set->getTotalByteCount(), "IN-set", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED)) - return nullptr; - - set->initSetElements(); - set->appendSetElements(set_key_columns); return set; } diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index c510e690e25..fa7f7069994 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -72,7 +72,9 @@ public: virtual SetPtr buildOrderedSetInplace(const ContextPtr & context) = 0; virtual std::unique_ptr build(const ContextPtr & context) = 0; - static SizeLimits getSizeLimitsForSet(const Settings & settings, bool ordered_set); + virtual DataTypes getTypes() const = 0; + + // static SizeLimits getSizeLimitsForSet(const Settings & settings, bool ordered_set); }; using FutureSetPtr = std::shared_ptr; @@ -90,6 +92,8 @@ public: std::unique_ptr build(const ContextPtr &) override; + DataTypes getTypes() const override { return set->getElementsTypes(); } + /// void buildForTuple(SizeLimits size_limits, bool transform_null_in); private: @@ -129,14 +133,14 @@ public: std::unique_ptr source; }; -class FutureSetFromSubquery : public FutureSet +class FutureSetFromSubquery : public FutureSet, public std::enable_shared_from_this { public: - FutureSetFromSubquery(SubqueryForSet subquery_, FutureSetPtr external_table_set_); + FutureSetFromSubquery(SubqueryForSet subquery_, FutureSetPtr external_table_set_, bool transform_null_in_); - bool isReady() const override { return set != nullptr; } + bool isReady() const override { return subquery.set != nullptr && subquery.set->isCreated(); } bool isFilled() const override { return isReady(); } - SetPtr get() const override { return set; } + SetPtr get() const override { return subquery.set; } SetPtr buildOrderedSetInplace(const ContextPtr & context) override; @@ -145,12 +149,15 @@ public: return buildPlan(context, false); } - void addStorage(StoragePtr storage) { subquery.table = std::move(storage); } + DataTypes getTypes() const override; + + // void addStorage(StoragePtr storage) { subquery.table = std::move(storage); } private: - SetPtr set; + //SetPtr set; SubqueryForSet subquery; FutureSetPtr external_table_set; + bool transform_null_in; std::unique_ptr buildPlan(const ContextPtr & context, bool create_ordered_set); }; @@ -169,6 +176,8 @@ public: return set->hasExplicitSetElements() ? set : nullptr; } + DataTypes getTypes() const override { return set->getElementsTypes(); } + std::unique_ptr build(const ContextPtr &) override { return nullptr; } private: @@ -240,7 +249,7 @@ public: FutureSetPtr addFromStorage(const PreparedSetKey & key, SetPtr set_); FutureSetPtr addFromTuple(const PreparedSetKey & key, Block block, const Settings & settings); - FutureSetPtr addFromSubquery(const PreparedSetKey & key, SubqueryForSet subquery, FutureSetPtr external_table_set); + FutureSetPtr addFromSubquery(const PreparedSetKey & key, SubqueryForSet subquery, const Settings & settings, FutureSetPtr external_table_set); //void addStorageToSubquery(const String & subquery_id, StoragePtr external_storage); diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index bd9cafc66eb..52e9a3a9bcb 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -103,6 +103,25 @@ void NO_INLINE Set::insertFromBlockImplCase( } +DataTypes Set::getElementTypes(const ColumnsWithTypeAndName & header, bool transform_null_in) +{ + DataTypes data_types; + data_types.reserve(header.size()); + + for (const auto & column : header) + { + data_types.push_back(column.type); + if (const auto * low_cardinality_type = typeid_cast(data_types.back().get())) + data_types.back() = low_cardinality_type->getDictionaryType(); + + if (!transform_null_in) + data_types.back() = removeNullable(data_types.back()); + } + + return data_types; +} + + void Set::setHeader(const ColumnsWithTypeAndName & header) { std::lock_guard lock(rwlock); @@ -190,7 +209,16 @@ bool Set::insertFromColumns(const Columns & columns) bool inserted = insertFromColumns(columns, holder); if (inserted && fill_set_elements) - appendSetElements(holder); + { + if (max_elements_to_fill && max_elements_to_fill < data.getTotalRowCount()) + { + /// Drop filled elementes + fill_set_elements = false; + set_elements.clear(); + } + else + appendSetElements(holder); + } return inserted; } diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index c2931d79de0..9b76ab30a1b 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -30,9 +30,9 @@ public: /// (that is useful only for checking that some value is in the set and may not store the original values), /// store all set elements in explicit form. /// This is needed for subsequent use for index. - Set(const SizeLimits & limits_, bool fill_set_elements_, bool transform_null_in_) + Set(const SizeLimits & limits_, bool fill_set_elements_, size_t max_elements_to_fill_, bool transform_null_in_) : log(&Poco::Logger::get("Set")), - limits(limits_), fill_set_elements(fill_set_elements_), transform_null_in(transform_null_in_) + limits(limits_), fill_set_elements(fill_set_elements_), max_elements_to_fill(max_elements_to_fill_), transform_null_in(transform_null_in_) { } @@ -90,6 +90,8 @@ public: bool areTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const; void checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const; + static DataTypes getElementTypes(const ColumnsWithTypeAndName & header, bool transform_null_in); + private: size_t keys_size = 0; Sizes key_sizes; @@ -126,6 +128,7 @@ private: /// Do we need to additionally store all elements of the set in explicit form for subsequent use for index. bool fill_set_elements; + size_t max_elements_to_fill; /// If true, insert NULL values to set. bool transform_null_in; diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp index 913cf1c6ee3..d264810645a 100644 --- a/src/Planner/CollectSets.cpp +++ b/src/Planner/CollectSets.cpp @@ -110,7 +110,7 @@ public: subquery_for_set.source = std::make_unique(std::move(subquery_planner).extractQueryPlan()); /// TODO - sets.addFromSubquery(set_key, std::move(subquery_for_set), nullptr); + sets.addFromSubquery(set_key, std::move(subquery_for_set), settings, nullptr); //planner_context.registerSet(set_key, PlannerSet(in_second_argument)); } diff --git a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp index fe362f64b96..ebd58c3dc95 100644 --- a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp +++ b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp @@ -105,7 +105,7 @@ CreateSetAndFilterOnTheFlyStep::CreateSetAndFilterOnTheFlyStep( : ITransformingStep(input_stream_, input_stream_.header, getTraits()) , column_names(column_names_) , max_rows_in_set(max_rows_in_set_) - , own_set(std::make_shared(SizeLimits(max_rows_in_set, 0, OverflowMode::BREAK), false, true)) + , own_set(std::make_shared(SizeLimits(max_rows_in_set, 0, OverflowMode::BREAK), false, 0, true)) , filtering_set(nullptr) , crosswise_connection(crosswise_connection_) , position(position_) diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp index 38506412531..0909ee9f1eb 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.cpp +++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp @@ -35,12 +35,14 @@ static ITransformingStep::Traits getTraits() CreatingSetStep::CreatingSetStep( const DataStream & input_stream_, String description_, - SubqueryForSet subquery_for_set_, + SubqueryForSet & subquery_for_set_, + FutureSetPtr set_, SizeLimits network_transfer_limits_, ContextPtr context_) : ITransformingStep(input_stream_, Block{}, getTraits()) , description(std::move(description_)) - , subquery_for_set(std::move(subquery_for_set_)) + , subquery_for_set(subquery_for_set_) + , set(std::move(set_)) , network_transfer_limits(std::move(network_transfer_limits_)) , context(std::move(context_)) { @@ -48,7 +50,7 @@ CreatingSetStep::CreatingSetStep( void CreatingSetStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - pipeline.addCreatingSetsTransform(getOutputStream().header, std::move(subquery_for_set), network_transfer_limits, context->getPreparedSetsCache()); + pipeline.addCreatingSetsTransform(getOutputStream().header, subquery_for_set, std::move(set), network_transfer_limits, context->getPreparedSetsCache()); } void CreatingSetStep::updateOutputStream() diff --git a/src/Processors/QueryPlan/CreatingSetsStep.h b/src/Processors/QueryPlan/CreatingSetsStep.h index 244bb27ba78..2a6cb43c45a 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.h +++ b/src/Processors/QueryPlan/CreatingSetsStep.h @@ -15,7 +15,8 @@ public: CreatingSetStep( const DataStream & input_stream_, String description_, - SubqueryForSet subquery_for_set_, + SubqueryForSet & subquery_for_set_, + FutureSetPtr set_, SizeLimits network_transfer_limits_, ContextPtr context_); @@ -30,7 +31,8 @@ private: void updateOutputStream() override; String description; - SubqueryForSet subquery_for_set; + SubqueryForSet & subquery_for_set; + FutureSetPtr set; SizeLimits network_transfer_limits; ContextPtr context; }; diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp index 24e17ff210f..f6e6f7a191e 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.cpp +++ b/src/Processors/Transforms/CreatingSetsTransform.cpp @@ -25,11 +25,13 @@ CreatingSetsTransform::~CreatingSetsTransform() = default; CreatingSetsTransform::CreatingSetsTransform( Block in_header_, Block out_header_, - SubqueryForSet subquery_for_set_, + SubqueryForSet & subquery_for_set_, + FutureSetPtr set_, SizeLimits network_transfer_limits_, PreparedSetsCachePtr prepared_sets_cache_) : IAccumulatingTransform(std::move(in_header_), std::move(out_header_)) - , subquery(std::move(subquery_for_set_)) + , subquery(subquery_for_set_) + , set(std::move(set_)) , network_transfer_limits(std::move(network_transfer_limits_)) , prepared_sets_cache(std::move(prepared_sets_cache_)) { @@ -61,6 +63,7 @@ void CreatingSetsTransform::startSubquery() auto from_cache = prepared_sets_cache->findOrPromiseToBuild(subquery.key); if (from_cache.index() == 0) { + LOG_TRACE(log, "Building set, key: {}", subquery.key); promise_to_build = std::move(std::get<0>(from_cache)); } else @@ -74,8 +77,8 @@ void CreatingSetsTransform::startSubquery() continue; } - subquery.promise_to_fill_set.set_value(ready_set); - subquery.set.reset(); + //subquery.promise_to_fill_set.set_value(ready_set); + subquery.set = ready_set; //.reset(); done_with_set = true; set_from_cache = true; } diff --git a/src/Processors/Transforms/CreatingSetsTransform.h b/src/Processors/Transforms/CreatingSetsTransform.h index ef586b0655c..27c330bdbc3 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.h +++ b/src/Processors/Transforms/CreatingSetsTransform.h @@ -29,7 +29,8 @@ public: CreatingSetsTransform( Block in_header_, Block out_header_, - SubqueryForSet subquery_for_set_, + SubqueryForSet & subquery_for_set_, + FutureSetPtr set_, SizeLimits network_transfer_limits_, PreparedSetsCachePtr prepared_sets_cache_); @@ -42,7 +43,8 @@ public: Chunk generate() override; private: - SubqueryForSet subquery; + SubqueryForSet & subquery; + FutureSetPtr set; std::optional> promise_to_build; QueryPipeline table_out; diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 4898501b980..08d90899f4a 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -569,14 +569,20 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe return left; } -void QueryPipelineBuilder::addCreatingSetsTransform(const Block & res_header, SubqueryForSet subquery_for_set, const SizeLimits & limits, PreparedSetsCachePtr prepared_sets_cache) +void QueryPipelineBuilder::addCreatingSetsTransform( + const Block & res_header, + SubqueryForSet & subquery_for_set, + FutureSetPtr set, + const SizeLimits & limits, + PreparedSetsCachePtr prepared_sets_cache) { resize(1); auto transform = std::make_shared( getHeader(), res_header, - std::move(subquery_for_set), + subquery_for_set, + std::move(set), limits, std::move(prepared_sets_cache)); diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index 85b6f5c6772..da8443a7e33 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -33,6 +33,9 @@ class TableJoin; class QueryPipelineBuilder; using QueryPipelineBuilderPtr = std::unique_ptr; +class FutureSet; +using FutureSetPtr = std::shared_ptr; + class PreparedSetsCache; using PreparedSetsCachePtr = std::shared_ptr; @@ -141,7 +144,12 @@ public: /// This is used for CreatingSets. void addPipelineBefore(QueryPipelineBuilder pipeline); - void addCreatingSetsTransform(const Block & res_header, SubqueryForSet subquery_for_set, const SizeLimits & limits, PreparedSetsCachePtr prepared_sets_cache); + void addCreatingSetsTransform( + const Block & res_header, + SubqueryForSet & subquery_for_set, + FutureSetPtr set, + const SizeLimits & limits, + PreparedSetsCachePtr prepared_sets_cache); PipelineExecutorPtr execute(); diff --git a/src/Storages/MergeTree/RPNBuilder.cpp b/src/Storages/MergeTree/RPNBuilder.cpp index cc7ec45be6a..c9feb533ea3 100644 --- a/src/Storages/MergeTree/RPNBuilder.cpp +++ b/src/Storages/MergeTree/RPNBuilder.cpp @@ -353,34 +353,34 @@ FutureSetPtr RPNBuilderTreeNode::tryGetPreparedSet( { const auto & prepared_sets = getTreeContext().getPreparedSets(); + /// We have `PreparedSetKey::forLiteral` but it is useless here as we don't have enough information + /// about types in left argument of the IN operator. Instead, we manually iterate through all the sets + /// and find the one for the right arg based on the AST structure (getTreeHash), after that we check + /// that the types it was prepared with are compatible with the types of the primary key. + auto types_match = [&indexes_mapping, &data_types](const DataTypes & set_types) + { + assert(indexes_mapping.size() == data_types.size()); + + for (size_t i = 0; i < indexes_mapping.size(); ++i) + { + if (indexes_mapping[i].tuple_index >= set_types.size()) + return false; + + auto lhs = recursiveRemoveLowCardinality(data_types[i]); + auto rhs = recursiveRemoveLowCardinality(set_types[indexes_mapping[i].tuple_index]); + + if (!lhs->equals(*rhs)) + return false; + } + + return true; + }; + if (prepared_sets && ast_node) { if (ast_node->as() || ast_node->as()) return prepared_sets->getFuture(PreparedSetKey::forSubquery(ast_node->getTreeHash())); - /// We have `PreparedSetKey::forLiteral` but it is useless here as we don't have enough information - /// about types in left argument of the IN operator. Instead, we manually iterate through all the sets - /// and find the one for the right arg based on the AST structure (getTreeHash), after that we check - /// that the types it was prepared with are compatible with the types of the primary key. - auto types_match = [&indexes_mapping, &data_types](const DataTypes & set_types) - { - assert(indexes_mapping.size() == data_types.size()); - - for (size_t i = 0; i < indexes_mapping.size(); ++i) - { - if (indexes_mapping[i].tuple_index >= set_types.size()) - return false; - - auto lhs = recursiveRemoveLowCardinality(data_types[i]); - auto rhs = recursiveRemoveLowCardinality(set_types[indexes_mapping[i].tuple_index]); - - if (!lhs->equals(*rhs)) - return false; - } - - return true; - }; - auto tree_hash = ast_node->getTreeHash(); for (const auto & [key, future_set] : prepared_sets->getSets()) { @@ -392,7 +392,12 @@ FutureSetPtr RPNBuilderTreeNode::tryGetPreparedSet( { const auto * node_without_alias = getNodeWithoutAlias(dag_node); if (node_without_alias->column) - return tryGetSetFromDAGNode(node_without_alias); + { + auto future_set = tryGetSetFromDAGNode(node_without_alias); + auto set_types = future_set->getTypes(); + if (types_match(future_set->getTypes())) + return future_set; + } } return nullptr; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 00b5dbfc5e3..da94c87b69e 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -147,7 +147,7 @@ StorageSet::StorageSet( const String & comment, bool persistent_) : StorageSetOrJoinBase{disk_, relative_path_, table_id_, columns_, constraints_, comment, persistent_} - , set(std::make_shared(SizeLimits(), false, true)) + , set(std::make_shared(SizeLimits(), false, 0, true)) { Block header = getInMemoryMetadataPtr()->getSampleBlock(); set->setHeader(header.getColumnsWithTypeAndName()); @@ -176,7 +176,7 @@ void StorageSet::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_sn Block header = metadata_snapshot->getSampleBlock(); increment = 0; - set = std::make_shared(SizeLimits(), false, true); + set = std::make_shared(SizeLimits(), false, 0, true); set->setHeader(header.getColumnsWithTypeAndName()); } diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.reference b/tests/queries/0_stateless/01786_explain_merge_tree.reference index e6628813dbd..8d3954484dd 100644 --- a/tests/queries/0_stateless/01786_explain_merge_tree.reference +++ b/tests/queries/0_stateless/01786_explain_merge_tree.reference @@ -10,7 +10,7 @@ Keys: y bitAnd(z, 3) - Condition: and((bitAnd(z, 3) not in [1, 1]), and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))) + Condition: and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1])) Parts: 3/4 Granules: 10/11 PrimaryKey @@ -46,7 +46,7 @@ { "Type": "Partition", "Keys": ["y", "bitAnd(z, 3)"], - "Condition": "and((bitAnd(z, 3) not in [1, 1]), and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1])))", + "Condition": "and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))", "Initial Parts": 4, "Selected Parts": 3, "Initial Granules": 11, From 88bf4e49d4767b6a3a3ccfcc383a42ca90ae12f1 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Thu, 1 Jun 2023 17:40:40 -0400 Subject: [PATCH 0218/1997] update for min_chunk_bytes_for_parallel_parsing --- docs/en/sql-reference/transactions.md | 53 +++++++++++++++++++-------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/docs/en/sql-reference/transactions.md b/docs/en/sql-reference/transactions.md index 68fbfe0b22a..1ca2db44b13 100644 --- a/docs/en/sql-reference/transactions.md +++ b/docs/en/sql-reference/transactions.md @@ -3,23 +3,44 @@ slug: /en/guides/developer/transactional --- # Transactional (ACID) support -INSERT into one partition* in one table* of MergeTree* family up to max_insert_block_size rows* is transactional (ACID): -- Atomic: INSERT is succeeded or rejected as a whole: if confirmation is sent to the client, all rows INSERTed; if error is sent to the client, no rows INSERTed. +## Case 1: INSERT into one partition, of one table, of the MergeTree* family + +This is transactional (ACID) if the number of rows inserted is less than or equal to `max_insert_block_size rows`, and in the case of data in TSV, TKSV, CSV, or JSONEachRow format if the number of bytes is less than `min_chunk_bytes_for_parallel_parsing`: +- Atomic: an INSERT succeeds or is rejected as a whole: if a confirmation is sent to the client, then all rows were inserted; if an error is sent to the client, then no rows were inserted. - Consistent: if there are no table constraints violated, then all rows in an INSERT are inserted and the INSERT succeeds; if constraints are violated, then no rows are inserted. -- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as if before INSERT or after successful INSERT; no partial state is seen; -- Durable: successful INSERT is written to the filesystem before answering to the client, on single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting). -* If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own; -* INSERT into multiple tables with one statement is possible if materialized views are involved; -* INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional; -* another example: insert into Buffer tables is neither atomic nor isolated or consistent or durable; -* atomicity is ensured even if `async_insert` is enabled, but it can be turned off by the wait_for_async_insert setting; -* max_insert_block_size is 1 000 000 by default and can be adjusted as needed; -* if client did not receive the answer from the server, the client does not know if transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties; -* ClickHouse is using MVCC with snapshot isolation internally; -* all ACID properties are valid even in case of server kill / crash; -* either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in typical setup; -* "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency) -* this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. +- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as it was before the INSERT attempt, or after the successful INSERT; no partial state is seen +- Durable: a successful INSERT is written to the filesystem before answering to the client, on a single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting). +- INSERT into multiple tables with one statement is possible if materialized views are involved (the INSERT from the client is to a table which has associate materialized views). + +## Case 2: INSERT into multiple partitions, of one table, of the MergeTree* family + +Same as Case 1 above, with this detail: +- If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own + + +## Case 3: INSERT into one distributed table of the MergeTree* family + +Same as Case 1 above, with this detail: +- INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional + +## Case 4: Using a Buffer table + +- insert into Buffer tables is neither atomic nor isolated nor consistent nor durable + +## Case 5: Using async_insert + +Same as Case 1 above, with this detail: +- atomicity is ensured even if `async_insert` is enabled and `wait_for_async_insert` is set to 1 (the default), but if `wait_for_async_insert` is set to 0, then atomicity is not ensured. + +## Notes +- `max_insert_block_size` is 1 000 000 by default and can be adjusted as needed +- `min_chunk_bytes_for_parallel_parsing` is 1 000 000 by default and can be adjusted as needed +- if the client did not receive an answer from the server, the client does not know if the transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties +- ClickHouse is using MVCC with snapshot isolation internally +- all ACID properties are valid even in the case of server kill/crash +- either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in the typical setup +- "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency) +- this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. (see the next section on Transactions, Commit, and Rollback). ## Transactions, Commit, and Rollback From d868e35863c3a80c9924b347ac017e9e93c33ba2 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Thu, 1 Jun 2023 19:08:44 -0400 Subject: [PATCH 0219/1997] update spelling list --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index ded7a4643a9..0787ead76cf 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -169,6 +169,7 @@ SelfManaged Stateful Submodules Subqueries +TKSV TSVRaw TSan TabItem From 2f08b6738f307c7e04886a879632e1183b40b725 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 1 Jun 2023 18:34:00 +0200 Subject: [PATCH 0220/1997] Support parallel replicas with the analyzer --- src/Storages/StorageReplicatedMergeTree.cpp | 15 ++++-- ...02771_parallel_replicas_analyzer.reference | 12 +++++ .../02771_parallel_replicas_analyzer.sql | 52 +++++++++++++++++++ 3 files changed, 75 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference create mode 100644 tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 56896f88423..893e976d432 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -77,16 +77,17 @@ #include #include -#include #include +#include #include #include #include #include +#include #include +#include #include #include -#include #include @@ -4707,8 +4708,14 @@ void StorageReplicatedMergeTree::read( auto cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas); - Block header = - InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); + Block header; + + if (local_context->getSettingsRef().allow_experimental_analyzer) + header = InterpreterSelectQueryAnalyzer::getSampleBlock( + modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()); + else + header + = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); ClusterProxy::SelectStreamFactory select_stream_factory = ClusterProxy::SelectStreamFactory( diff --git a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference new file mode 100644 index 00000000000..4e93c530f7b --- /dev/null +++ b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference @@ -0,0 +1,12 @@ +-8888150036649430454 +-2788931093724180887 +-75175454385331084 +368066018677693974 +821735343441964030 +2804162938822577320 +4357435422797280898 +5935810273536892891 +7885388429666205427 +8124171311239967992 +1 1 -- Simple query with analyzer and pure parallel replicas\nSELECT number\nFROM join_inner_table__fuzz_146_replicated\n SETTINGS\n allow_experimental_analyzer = 1,\n max_parallel_replicas = 2,\n cluster_for_parallel_replicas = \'test_cluster_one_shard_three_replicas_localhost\',\n allow_experimental_parallel_reading_from_replicas = 1,\n use_hedged_requests = 0; +0 2 SELECT `default`.`join_inner_table__fuzz_146_replicated`.`number` AS `number` FROM `default`.`join_inner_table__fuzz_146_replicated` diff --git a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql new file mode 100644 index 00000000000..35089c0cedb --- /dev/null +++ b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql @@ -0,0 +1,52 @@ +-- Tags: zookeeper + +CREATE TABLE join_inner_table__fuzz_146_replicated +( + `id` UUID, + `key` String, + `number` Int64, + `value1` String, + `value2` String, + `time` Nullable(Int64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/join_inner_table__fuzz_146_replicated', '{replica}') +ORDER BY (id, number, key) +SETTINGS index_granularity = 8192; + +INSERT INTO join_inner_table__fuzz_146_replicated + SELECT CAST('833c9e22-c245-4eb5-8745-117a9a1f26b1', 'UUID') AS id, CAST(rowNumberInAllBlocks(), 'String') AS key, * + FROM generateRandom('number Int64, value1 String, value2 String, time Int64', 1, 10, 2) LIMIT 10; + +-- Simple query with analyzer and pure parallel replicas +SELECT number +FROM join_inner_table__fuzz_146_replicated + SETTINGS + allow_experimental_analyzer = 1, + max_parallel_replicas = 2, + cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', + allow_experimental_parallel_reading_from_replicas = 1, + use_hedged_requests = 0; + +SYSTEM FLUSH LOGS; +-- There should be 2 different queries +-- The initial query +-- The query sent to each replica (which should appear 2 times as we are setting max_parallel_replicas to 2) +SELECT + is_initial_query, + count() as c, query, +FROM system.query_log +WHERE + event_date >= yesterday() + AND type = 'QueryFinish' + AND initial_query_id = + ( + SELECT query_id + FROM system.query_log + WHERE + current_database = currentDatabase() + AND event_date >= yesterday() + AND type = 'QueryFinish' + AND query LIKE '-- Simple query with analyzer and pure parallel replicas%' + ) +GROUP BY is_initial_query, query +ORDER BY is_initial_query DESC, c, query; From c1958c8bed68529c04c37a4b81d139088da3f2f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 2 Jun 2023 14:24:32 +0200 Subject: [PATCH 0221/1997] Remove 02764_parallel_replicas_plain_merge_tree from list of broken tests --- tests/broken_tests.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/broken_tests.txt b/tests/broken_tests.txt index 02935712325..96219323700 100644 --- a/tests/broken_tests.txt +++ b/tests/broken_tests.txt @@ -135,6 +135,5 @@ 02703_row_policy_for_database 02721_url_cluster 02534_s3_cluster_insert_select_schema_inference -02764_parallel_replicas_plain_merge_tree 02765_parallel_replicas_final_modifier From 36129c3d20bef74c4502126b2c37001e2c80b8d2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 2 Jun 2023 13:36:14 +0000 Subject: [PATCH 0222/1997] Try to fix last tests. --- src/Storages/MergeTree/RPNBuilder.cpp | 6 ++++-- .../0_stateless/00981_in_subquery_with_tuple.reference | 3 --- tests/queries/0_stateless/01651_bugs_from_15889.reference | 1 + tests/queries/0_stateless/01651_bugs_from_15889.sql | 5 ++--- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/Storages/MergeTree/RPNBuilder.cpp b/src/Storages/MergeTree/RPNBuilder.cpp index c9feb533ea3..b82c350fa1a 100644 --- a/src/Storages/MergeTree/RPNBuilder.cpp +++ b/src/Storages/MergeTree/RPNBuilder.cpp @@ -366,8 +366,10 @@ FutureSetPtr RPNBuilderTreeNode::tryGetPreparedSet( if (indexes_mapping[i].tuple_index >= set_types.size()) return false; - auto lhs = recursiveRemoveLowCardinality(data_types[i]); - auto rhs = recursiveRemoveLowCardinality(set_types[indexes_mapping[i].tuple_index]); + auto lhs = removeNullable(recursiveRemoveLowCardinality(data_types[i])); + auto rhs = removeNullable(recursiveRemoveLowCardinality(set_types[indexes_mapping[i].tuple_index])); + + // std::cerr << "============ " << lhs->getName() << ' ' << rhs->getName() << std::endl; if (!lhs->equals(*rhs)) return false; diff --git a/tests/queries/0_stateless/00981_in_subquery_with_tuple.reference b/tests/queries/0_stateless/00981_in_subquery_with_tuple.reference index 673d035ede6..833a8c93d24 100644 --- a/tests/queries/0_stateless/00981_in_subquery_with_tuple.reference +++ b/tests/queries/0_stateless/00981_in_subquery_with_tuple.reference @@ -1,7 +1,4 @@ -OK1 OK2 OK3 -OK4 -OK5 2019-08-11 world 2019-08-12 hello diff --git a/tests/queries/0_stateless/01651_bugs_from_15889.reference b/tests/queries/0_stateless/01651_bugs_from_15889.reference index 8b137891791..77ac542d4fb 100644 --- a/tests/queries/0_stateless/01651_bugs_from_15889.reference +++ b/tests/queries/0_stateless/01651_bugs_from_15889.reference @@ -1 +1,2 @@ +0 diff --git a/tests/queries/0_stateless/01651_bugs_from_15889.sql b/tests/queries/0_stateless/01651_bugs_from_15889.sql index 4717a8dcc0d..6a8c6d35911 100644 --- a/tests/queries/0_stateless/01651_bugs_from_15889.sql +++ b/tests/queries/0_stateless/01651_bugs_from_15889.sql @@ -8,10 +8,9 @@ INSERT INTO xp SELECT '2020-01-01', number, '' FROM numbers(100000); CREATE TABLE xp_d AS xp ENGINE = Distributed(test_shard_localhost, currentDatabase(), xp); --- FIXME: this query spontaneously returns either 8 or 20 error code. Looks like it's potentially flaky. --- SELECT count(7 = (SELECT number FROM numbers(0) ORDER BY number ASC NULLS FIRST LIMIT 7)) FROM xp_d PREWHERE toYYYYMM(A) GLOBAL IN (SELECT NULL = (SELECT number FROM numbers(1) ORDER BY number DESC NULLS LAST LIMIT 1), toYYYYMM(min(A)) FROM xp_d) WHERE B > NULL; -- { serverError 8 } +SELECT count(7 = (SELECT number FROM numbers(0) ORDER BY number ASC NULLS FIRST LIMIT 7)) FROM xp_d PREWHERE toYYYYMM(A) GLOBAL IN (SELECT NULL = (SELECT number FROM numbers(1) ORDER BY number DESC NULLS LAST LIMIT 1), toYYYYMM(min(A)) FROM xp_d) WHERE B > NULL; -- { serverError 8 } -SELECT count() FROM xp_d WHERE A GLOBAL IN (SELECT NULL); -- { serverError 53 } +SELECT count() FROM xp_d WHERE A GLOBAL IN (SELECT NULL); DROP TABLE IF EXISTS xp; DROP TABLE IF EXISTS xp_d; From 47966c4e8129c3777679e42966cfec9c7a2aa383 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 2 Jun 2023 14:10:52 +0000 Subject: [PATCH 0223/1997] Finalize bufferg --- src/Coordination/KeeperSnapshotManager.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index 8dd6d27c379..44e990c7b95 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -618,12 +618,14 @@ SnapshotFileInfo KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::bu auto disk = getLatestSnapshotDisk(); { - disk->writeFile(tmp_snapshot_file_name); + auto buf = disk->writeFile(tmp_snapshot_file_name); + buf->finalize(); } auto plain_buf = disk->writeFile(snapshot_file_name); copyData(reader, *plain_buf); plain_buf->sync(); + plain_buf->finalize(); disk->removeFile(tmp_snapshot_file_name); From 975e58c56d5cf7038052e0be9699c094c7203161 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 2 Jun 2023 15:15:44 +0000 Subject: [PATCH 0224/1997] Fix style. --- src/Interpreters/PreparedSets.cpp | 1 - src/Planner/CollectSets.cpp | 2 ++ src/Processors/QueryPlan/CreatingSetsStep.cpp | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index 1e475e8403d..6df7c748e60 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -14,7 +14,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int SET_SIZE_LIMIT_EXCEEDED; } PreparedSetKey PreparedSetKey::forLiteral(Hash hash, DataTypes types_) diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp index d264810645a..5f44994c14b 100644 --- a/src/Planner/CollectSets.cpp +++ b/src/Planner/CollectSets.cpp @@ -81,6 +81,8 @@ public: element_type = low_cardinality_type->getDictionaryType(); auto set_key = PreparedSetKey::forLiteral(in_second_argument->getTreeHash(), set_element_types); + if (sets.getFuture(set_key)) + return; sets.addFromTuple(set_key, std::move(set), settings); diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp index 0909ee9f1eb..264c4b9ef47 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.cpp +++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp @@ -46,6 +46,7 @@ CreatingSetStep::CreatingSetStep( , network_transfer_limits(std::move(network_transfer_limits_)) , context(std::move(context_)) { + std::cerr << StackTrace().toString() << std::endl; } void CreatingSetStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) From c19866f72d3b356f2c4fed334c2beeecd441ef0e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 2 Jun 2023 15:16:36 +0000 Subject: [PATCH 0225/1997] Fix style. --- src/Processors/QueryPlan/CreatingSetsStep.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp index 264c4b9ef47..0909ee9f1eb 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.cpp +++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp @@ -46,7 +46,6 @@ CreatingSetStep::CreatingSetStep( , network_transfer_limits(std::move(network_transfer_limits_)) , context(std::move(context_)) { - std::cerr << StackTrace().toString() << std::endl; } void CreatingSetStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) From 423afec70542c266187d49cf571d5f6bb4324977 Mon Sep 17 00:00:00 2001 From: tpanetti Date: Fri, 2 Jun 2023 10:05:38 -0700 Subject: [PATCH 0226/1997] Change case and function name for MySQL Compatible types This changes the function name for MySQL Compatible types from getMySQLName to getSQLCompatibleName and changes the casing of the types to upper --- src/DataTypes/DataTypeAggregateFunction.h | 2 +- src/DataTypes/DataTypeArray.h | 4 +- src/DataTypes/DataTypeDate.h | 2 +- src/DataTypes/DataTypeDate32.h | 2 +- src/DataTypes/DataTypeDateTime.h | 2 +- src/DataTypes/DataTypeDateTime64.h | 2 +- src/DataTypes/DataTypeEnum.cpp | 2 +- src/DataTypes/DataTypeEnum.h | 2 +- src/DataTypes/DataTypeFixedString.h | 2 +- src/DataTypes/DataTypeFunction.h | 2 +- src/DataTypes/DataTypeIPv4andIPv6.h | 4 +- src/DataTypes/DataTypeInterval.h | 2 +- src/DataTypes/DataTypeLowCardinality.cpp | 2 +- src/DataTypes/DataTypeLowCardinality.h | 2 +- src/DataTypes/DataTypeMap.h | 2 +- src/DataTypes/DataTypeNothing.h | 2 +- src/DataTypes/DataTypeNullable.h | 2 +- src/DataTypes/DataTypeNumberBase.cpp | 28 +- src/DataTypes/DataTypeNumberBase.h | 2 +- src/DataTypes/DataTypeObject.h | 2 +- src/DataTypes/DataTypeSet.h | 2 +- src/DataTypes/DataTypeString.h | 2 +- src/DataTypes/DataTypeTuple.h | 2 +- src/DataTypes/DataTypeUUID.h | 2 +- src/DataTypes/DataTypesDecimal.h | 4 +- src/DataTypes/IDataType.h | 4 +- ...show_columns_mysql_compatibility.reference | 424 +++++++++--------- .../02775_show_columns_mysql_compatibility.sh | 6 +- 28 files changed, 260 insertions(+), 256 deletions(-) diff --git a/src/DataTypes/DataTypeAggregateFunction.h b/src/DataTypes/DataTypeAggregateFunction.h index 697be13652c..13ca3508580 100644 --- a/src/DataTypes/DataTypeAggregateFunction.h +++ b/src/DataTypes/DataTypeAggregateFunction.h @@ -45,7 +45,7 @@ public: String doGetName() const override; String getNameWithoutVersion() const; const char * getFamilyName() const override { return "AggregateFunction"; } - const char * getMySQLName() const override { return "text"; } + const char * getSQLCompatibleName() const override { return "TEXT"; } TypeIndex getTypeId() const override { return TypeIndex::AggregateFunction; } Array getParameters() const { return parameters; } diff --git a/src/DataTypes/DataTypeArray.h b/src/DataTypes/DataTypeArray.h index b031f411975..528062b60be 100644 --- a/src/DataTypes/DataTypeArray.h +++ b/src/DataTypes/DataTypeArray.h @@ -30,9 +30,9 @@ public: { return "Array"; } - const char * getMySQLName() const override + const char * getSQLCompatibleName() const override { - return "text"; + return "TEXT"; } bool canBeInsideNullable() const override diff --git a/src/DataTypes/DataTypeDate.h b/src/DataTypes/DataTypeDate.h index 33bcb6123ff..7b622ae04a3 100644 --- a/src/DataTypes/DataTypeDate.h +++ b/src/DataTypes/DataTypeDate.h @@ -13,7 +13,7 @@ public: TypeIndex getTypeId() const override { return TypeIndex::Date; } const char * getFamilyName() const override { return family_name; } - const char * getMySQLName() const override { return "date"; } + const char * getSQLCompatibleName() const override { return "DATE"; } bool canBeUsedAsVersion() const override { return true; } bool canBeInsideNullable() const override { return true; } diff --git a/src/DataTypes/DataTypeDate32.h b/src/DataTypes/DataTypeDate32.h index 56315f46e8c..65b0ec7407e 100644 --- a/src/DataTypes/DataTypeDate32.h +++ b/src/DataTypes/DataTypeDate32.h @@ -13,7 +13,7 @@ public: TypeIndex getTypeId() const override { return TypeIndex::Date32; } const char * getFamilyName() const override { return family_name; } - const char * getMySQLName() const override { return "date"; } + const char * getSQLCompatibleName() const override { return "DATE"; } Field getDefault() const override { diff --git a/src/DataTypes/DataTypeDateTime.h b/src/DataTypes/DataTypeDateTime.h index c868f92c311..2facc758f90 100644 --- a/src/DataTypes/DataTypeDateTime.h +++ b/src/DataTypes/DataTypeDateTime.h @@ -36,7 +36,7 @@ public: static constexpr auto family_name = "DateTime"; const char * getFamilyName() const override { return family_name; } - const char * getMySQLName() const override { return "datetime"; } + const char * getSQLCompatibleName() const override { return "DATETIME"; } String doGetName() const override; TypeIndex getTypeId() const override { return TypeIndex::DateTime; } diff --git a/src/DataTypes/DataTypeDateTime64.h b/src/DataTypes/DataTypeDateTime64.h index 8d317bb9430..b836b84918f 100644 --- a/src/DataTypes/DataTypeDateTime64.h +++ b/src/DataTypes/DataTypeDateTime64.h @@ -28,7 +28,7 @@ public: DataTypeDateTime64(UInt32 scale_, const TimezoneMixin & time_zone_info); const char * getFamilyName() const override { return family_name; } - const char * getMySQLName() const override { return "datetime"; } + const char * getSQLCompatibleName() const override { return "DATETIME"; } std::string doGetName() const override; TypeIndex getTypeId() const override { return type_id; } diff --git a/src/DataTypes/DataTypeEnum.cpp b/src/DataTypes/DataTypeEnum.cpp index bfed4d4d5a2..24a3976179d 100644 --- a/src/DataTypes/DataTypeEnum.cpp +++ b/src/DataTypes/DataTypeEnum.cpp @@ -41,7 +41,7 @@ std::string DataTypeEnum::generateMySQLName(const Values & values) { WriteBufferFromOwnString out; - writeString("enum", out); + writeString("ENUM", out); writeChar('(', out); auto first = true; diff --git a/src/DataTypes/DataTypeEnum.h b/src/DataTypes/DataTypeEnum.h index c6e523adf96..2cdaa2db06c 100644 --- a/src/DataTypes/DataTypeEnum.h +++ b/src/DataTypes/DataTypeEnum.h @@ -54,7 +54,7 @@ public: std::string doGetName() const override { return type_name; } const char * getFamilyName() const override; - const char * getMySQLName() const override { return my_sql_type_name.c_str(); } + const char * getSQLCompatibleName() const override { return my_sql_type_name.c_str(); } TypeIndex getTypeId() const override { return type_id; } diff --git a/src/DataTypes/DataTypeFixedString.h b/src/DataTypes/DataTypeFixedString.h index eb09914ec9c..2900efd5a34 100644 --- a/src/DataTypes/DataTypeFixedString.h +++ b/src/DataTypes/DataTypeFixedString.h @@ -42,7 +42,7 @@ public: TypeIndex getTypeId() const override { return type_id; } const char * getFamilyName() const override { return "FixedString"; } - const char * getMySQLName() const override { return "text"; } + const char * getSQLCompatibleName() const override { return "TEXT"; } size_t getN() const { diff --git a/src/DataTypes/DataTypeFunction.h b/src/DataTypes/DataTypeFunction.h index f3423796126..df59f7738b2 100644 --- a/src/DataTypes/DataTypeFunction.h +++ b/src/DataTypes/DataTypeFunction.h @@ -24,7 +24,7 @@ public: std::string doGetName() const override; const char * getFamilyName() const override { return "Function"; } - const char * getMySQLName() const override { return "text"; } + const char * getSQLCompatibleName() const override { return "TEXT"; } TypeIndex getTypeId() const override { return TypeIndex::Function; } const DataTypes & getArgumentTypes() const diff --git a/src/DataTypes/DataTypeIPv4andIPv6.h b/src/DataTypes/DataTypeIPv4andIPv6.h index 8f7fe79793b..be0ebb90f3c 100644 --- a/src/DataTypes/DataTypeIPv4andIPv6.h +++ b/src/DataTypes/DataTypeIPv4andIPv6.h @@ -19,7 +19,7 @@ public: static constexpr auto type_id = TypeToTypeIndex; const char * getFamilyName() const override { return TypeName.data(); } - const char * getMySQLName() const override { return "text"; } + const char * getSQLCompatibleName() const override { return "TEXT"; } TypeIndex getTypeId() const override { return type_id; } @@ -61,7 +61,7 @@ public: static constexpr auto type_id = TypeToTypeIndex; const char * getFamilyName() const override { return TypeName.data(); } - const char * getMySQLName() const override { return "text"; } + const char * getSQLCompatibleName() const override { return "TEXT"; } TypeIndex getTypeId() const override { return type_id; } diff --git a/src/DataTypes/DataTypeInterval.h b/src/DataTypes/DataTypeInterval.h index 69a56e8aadd..ee2157431dd 100644 --- a/src/DataTypes/DataTypeInterval.h +++ b/src/DataTypes/DataTypeInterval.h @@ -26,7 +26,7 @@ public: std::string doGetName() const override { return fmt::format("Interval{}", kind.toString()); } const char * getFamilyName() const override { return "Interval"; } - const char * getMySQLName() const override { return "text"; } + const char * getSQLCompatibleName() const override { return "TEXT"; } TypeIndex getTypeId() const override { return TypeIndex::Interval; } bool equals(const IDataType & rhs) const override; diff --git a/src/DataTypes/DataTypeLowCardinality.cpp b/src/DataTypes/DataTypeLowCardinality.cpp index b1c32317015..e59613e6974 100644 --- a/src/DataTypes/DataTypeLowCardinality.cpp +++ b/src/DataTypes/DataTypeLowCardinality.cpp @@ -29,7 +29,7 @@ namespace ErrorCodes DataTypeLowCardinality::DataTypeLowCardinality(DataTypePtr dictionary_type_) : dictionary_type(std::move(dictionary_type_)), - mysql_name(dictionary_type->getMySQLName()) + mysql_name(dictionary_type->getSQLCompatibleName()) { auto inner_type = dictionary_type; if (dictionary_type->isNullable()) diff --git a/src/DataTypes/DataTypeLowCardinality.h b/src/DataTypes/DataTypeLowCardinality.h index bcc39f58ff7..4dee8565568 100644 --- a/src/DataTypes/DataTypeLowCardinality.h +++ b/src/DataTypes/DataTypeLowCardinality.h @@ -24,7 +24,7 @@ public: return "LowCardinality(" + dictionary_type->getName() + ")"; } const char * getFamilyName() const override { return "LowCardinality"; } - const char * getMySQLName() const override { return mysql_name.c_str(); } + const char * getSQLCompatibleName() const override { return mysql_name.c_str(); } TypeIndex getTypeId() const override { return TypeIndex::LowCardinality; } diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h index 526dc321f44..299119f1759 100644 --- a/src/DataTypes/DataTypeMap.h +++ b/src/DataTypes/DataTypeMap.h @@ -30,7 +30,7 @@ public: TypeIndex getTypeId() const override { return TypeIndex::Map; } std::string doGetName() const override; const char * getFamilyName() const override { return "Map"; } - const char * getMySQLName() const override { return "json"; } + const char * getSQLCompatibleName() const override { return "JSON"; } bool canBeInsideNullable() const override { return false; } diff --git a/src/DataTypes/DataTypeNothing.h b/src/DataTypes/DataTypeNothing.h index fdef6026603..b35ced5dcb3 100644 --- a/src/DataTypes/DataTypeNothing.h +++ b/src/DataTypes/DataTypeNothing.h @@ -16,7 +16,7 @@ public: static constexpr bool is_parametric = false; const char * getFamilyName() const override { return "Nothing"; } - const char * getMySQLName() const override { return "text"; } + const char * getSQLCompatibleName() const override { return "TEXT"; } TypeIndex getTypeId() const override { return TypeIndex::Nothing; } diff --git a/src/DataTypes/DataTypeNullable.h b/src/DataTypes/DataTypeNullable.h index 64b201d32b2..b5fe1bb2dd9 100644 --- a/src/DataTypes/DataTypeNullable.h +++ b/src/DataTypes/DataTypeNullable.h @@ -16,7 +16,7 @@ public: explicit DataTypeNullable(const DataTypePtr & nested_data_type_); std::string doGetName() const override { return "Nullable(" + nested_data_type->getName() + ")"; } const char * getFamilyName() const override { return "Nullable"; } - const char * getMySQLName() const override { return nested_data_type->getMySQLName(); } + const char * getSQLCompatibleName() const override { return nested_data_type->getSQLCompatibleName(); } TypeIndex getTypeId() const override { return TypeIndex::Nullable; } MutableColumnPtr createColumn() const override; diff --git a/src/DataTypes/DataTypeNumberBase.cpp b/src/DataTypes/DataTypeNumberBase.cpp index 7d200de7996..db654448e83 100644 --- a/src/DataTypes/DataTypeNumberBase.cpp +++ b/src/DataTypes/DataTypeNumberBase.cpp @@ -32,20 +32,20 @@ bool DataTypeNumberBase::isValueRepresentedByUnsignedInteger() const template const std::map DataTypeNumberBase::mysqlTypeMap = { - {"UInt8", "tinyint unsigned"}, - {"UInt16", "smallint unsigned"}, - {"UInt32", "mediumint unsigned"}, - {"UInt64", "bigint unsigned"}, - {"UInt128", "text"}, - {"UInt256", "text"}, - {"Int8", "tinyint"}, - {"Int16", "smallint"}, - {"Int32", "int"}, - {"Int64", "bigint"}, - {"Int128", "text"}, - {"Int256", "text"}, - {"Float32", "float"}, - {"Float64", "double"}, + {"UInt8", "TINYINT UNSIGNED"}, + {"UInt16", "SMALLINT UNSIGNED"}, + {"UInt32", "MEDIUMINT UNSIGNEd"}, + {"UInt64", "BIGINT UNSIGNED"}, + {"UInt128", "TEXT"}, + {"UInt256", "TEXT"}, + {"Int8", "TINYINT"}, + {"Int16", "SMALLINT"}, + {"Int32", "INT"}, + {"Int64", "BIGINT"}, + {"Int128", "TEXT"}, + {"Int256", "TEXT"}, + {"Float32", "FLOAT"}, + {"Float64", "DOUBLE"}, }; /// Explicit template instantiations - to avoid code bloat in headers. diff --git a/src/DataTypes/DataTypeNumberBase.h b/src/DataTypes/DataTypeNumberBase.h index b5c963cf245..1a855a974f0 100644 --- a/src/DataTypes/DataTypeNumberBase.h +++ b/src/DataTypes/DataTypeNumberBase.h @@ -27,7 +27,7 @@ public: using ColumnType = ColumnVector; const char * getFamilyName() const override { return TypeName.data(); } - const char * getMySQLName() const override { return mysqlTypeMap.at(TypeName.data()).c_str(); } + const char * getSQLCompatibleName() const override { return mysqlTypeMap.at(TypeName.data()).c_str(); } TypeIndex getTypeId() const override { return TypeToTypeIndex; } Field getDefault() const override; diff --git a/src/DataTypes/DataTypeObject.h b/src/DataTypes/DataTypeObject.h index 8a2c36abcd7..618c7389758 100644 --- a/src/DataTypes/DataTypeObject.h +++ b/src/DataTypes/DataTypeObject.h @@ -23,7 +23,7 @@ public: DataTypeObject(const String & schema_format_, bool is_nullable_); const char * getFamilyName() const override { return "Object"; } - const char * getMySQLName() const override { return "json"; } + const char * getSQLCompatibleName() const override { return "JSON"; } String doGetName() const override; TypeIndex getTypeId() const override { return TypeIndex::Object; } diff --git a/src/DataTypes/DataTypeSet.h b/src/DataTypes/DataTypeSet.h index bdad638b5d5..916b4f071a5 100644 --- a/src/DataTypes/DataTypeSet.h +++ b/src/DataTypes/DataTypeSet.h @@ -15,7 +15,7 @@ class DataTypeSet final : public IDataTypeDummy public: static constexpr bool is_parametric = true; const char * getFamilyName() const override { return "Set"; } - const char * getMySQLName() const override { return "text"; } + const char * getSQLCompatibleName() const override { return "TEXT"; } TypeIndex getTypeId() const override { return TypeIndex::Set; } bool equals(const IDataType & rhs) const override { return typeid(rhs) == typeid(*this); } diff --git a/src/DataTypes/DataTypeString.h b/src/DataTypes/DataTypeString.h index bddfb4ae287..338b3846266 100644 --- a/src/DataTypes/DataTypeString.h +++ b/src/DataTypes/DataTypeString.h @@ -22,7 +22,7 @@ public: } // FIXME: string can contain arbitrary bytes, not only UTF-8 sequences - const char * getMySQLName() const override { return "blob"; } + const char * getSQLCompatibleName() const override { return "BLOB"; } TypeIndex getTypeId() const override { return type_id; } diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h index d264cc97f60..93fa87b1332 100644 --- a/src/DataTypes/DataTypeTuple.h +++ b/src/DataTypes/DataTypeTuple.h @@ -33,7 +33,7 @@ public: TypeIndex getTypeId() const override { return TypeIndex::Tuple; } std::string doGetName() const override; const char * getFamilyName() const override { return "Tuple"; } - const char * getMySQLName() const override { return "json"; } + const char * getSQLCompatibleName() const override { return "JSON"; } bool canBeInsideNullable() const override { return false; } bool supportsSparseSerialization() const override { return true; } diff --git a/src/DataTypes/DataTypeUUID.h b/src/DataTypes/DataTypeUUID.h index 4d54db42b45..bbf35074df3 100644 --- a/src/DataTypes/DataTypeUUID.h +++ b/src/DataTypes/DataTypeUUID.h @@ -18,7 +18,7 @@ public: static constexpr auto type_id = TypeIndex::UUID; const char * getFamilyName() const override { return "UUID"; } - const char * getMySQLName() const override { return "char"; } + const char * getSQLCompatibleName() const override { return "CHAR"; } TypeIndex getTypeId() const override { return type_id; } diff --git a/src/DataTypes/DataTypesDecimal.h b/src/DataTypes/DataTypesDecimal.h index 5c9405cb060..6f3bf582aeb 100644 --- a/src/DataTypes/DataTypesDecimal.h +++ b/src/DataTypes/DataTypesDecimal.h @@ -37,10 +37,10 @@ public: using Base::Base; static constexpr auto family_name = "Decimal"; - static constexpr auto mysql_name = "decimal"; + static constexpr auto mysql_name = "DECIMAL"; const char * getFamilyName() const override { return family_name; } - const char * getMySQLName() const override { return mysql_name; } + const char * getSQLCompatibleName() const override { return mysql_name; } std::string doGetName() const override; TypeIndex getTypeId() const override { return TypeToTypeIndex; } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 2bed18897ce..93fdbab05ef 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -83,7 +83,7 @@ public: /// Name of data type family (example: FixedString, Array). virtual const char * getFamilyName() const = 0; - virtual const char * getMySQLName() const = 0; + virtual const char * getSQLCompatibleName() const = 0; /// Data type id. It's used for runtime type checks. virtual TypeIndex getTypeId() const = 0; @@ -135,7 +135,7 @@ public: protected: virtual String doGetName() const { return getFamilyName(); } - virtual String doGetMySQLName() const { return getMySQLName(); } + virtual String doGetMySQLName() const { return getSQLCompatibleName(); } virtual SerializationPtr doGetDefaultSerialization() const = 0; public: diff --git a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.reference b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.reference index 96e542611c6..1742cd9c90c 100644 --- a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.reference +++ b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.reference @@ -4,226 +4,226 @@ Create pseudo-random database name Create tab duplicate table Run MySQL test field type null key default extra -aggregate_function text 0 NULL -array_value text 0 NULL -boolean_value tinyint unsigned 0 NULL -date32_value date 0 NULL -date_value date 0 NULL -datetime64_value datetime 0 NULL -datetime_value datetime 0 NULL -decimal_value decimal 0 NULL -enum_value enum('apple', 'banana', 'orange') 0 NULL -fixed_string_value text 0 NULL -float32 float 0 NULL -float64 double 0 NULL -int32 int 0 NULL -ipv4_value text 0 NULL -ipv6_value text 0 NULL -json_value json 0 NULL -low_cardinality blob 0 NULL -low_cardinality_date datetime 0 NULL -map_value json 0 NULL -nested.nested_int text 0 NULL -nested.nested_string text 0 NULL -nullable_value int 0 NULL -string_value blob 0 NULL -tuple_value json 0 NULL -uint64 bigint unsigned 0 PRI SOR NULL -uuid_value char 0 NULL +aggregate_function TEXT 0 NULL +array_value TEXT 0 NULL +boolean_value TINYINT UNSIGNED 0 NULL +date32_value DATE 0 NULL +date_value DATE 0 NULL +datetime64_value DATETIME 0 NULL +datetime_value DATETIME 0 NULL +decimal_value DECIMAL 0 NULL +enum_value ENUM('apple', 'banana', 'orange') 0 NULL +fixed_string_value TEXT 0 NULL +float32 FLOAT 0 NULL +float64 DOUBLE 0 NULL +int32 INT 0 NULL +ipv4_value TEXT 0 NULL +ipv6_value TEXT 0 NULL +json_value JSON 0 NULL +low_cardinality BLOB 0 NULL +low_cardinality_date DATETIME 0 NULL +map_value JSON 0 NULL +nested.nested_int TEXT 0 NULL +nested.nested_string TEXT 0 NULL +nullable_value INT 0 NULL +string_value BLOB 0 NULL +tuple_value JSON 0 NULL +uint64 BIGINT UNSIGNED 0 PRI SOR NULL +uuid_value CHAR 0 NULL field type null key default extra -aggregate_function text 0 NULL -array_value text 0 NULL -boolean_value tinyint unsigned 0 NULL -date32_value date 0 NULL -date_value date 0 NULL -datetime64_value datetime 0 NULL -datetime_value datetime 0 NULL -decimal_value decimal 0 NULL -enum_value enum('apple', 'banana', 'orange') 0 NULL -fixed_string_value text 0 NULL -float32 float 0 NULL -float64 double 0 NULL -int32 int 0 NULL -ipv4_value text 0 NULL -ipv6_value text 0 NULL -json_value json 0 NULL -low_cardinality blob 0 NULL -low_cardinality_date datetime 0 NULL -map_value json 0 NULL -nested.nested_int text 0 NULL -nested.nested_string text 0 NULL -nullable_value int 0 NULL -string_value blob 0 NULL -tuple_value json 0 NULL -uint64 bigint unsigned 0 PRI SOR NULL -uuid_value char 0 NULL +aggregate_function TEXT 0 NULL +array_value TEXT 0 NULL +boolean_value TINYINT UNSIGNED 0 NULL +date32_value DATE 0 NULL +date_value DATE 0 NULL +datetime64_value DATETIME 0 NULL +datetime_value DATETIME 0 NULL +decimal_value DECIMAL 0 NULL +enum_value ENUM('apple', 'banana', 'orange') 0 NULL +fixed_string_value TEXT 0 NULL +float32 FLOAT 0 NULL +float64 DOUBLE 0 NULL +int32 INT 0 NULL +ipv4_value TEXT 0 NULL +ipv6_value TEXT 0 NULL +json_value JSON 0 NULL +low_cardinality BLOB 0 NULL +low_cardinality_date DATETIME 0 NULL +map_value JSON 0 NULL +nested.nested_int TEXT 0 NULL +nested.nested_string TEXT 0 NULL +nullable_value INT 0 NULL +string_value BLOB 0 NULL +tuple_value JSON 0 NULL +uint64 BIGINT UNSIGNED 0 PRI SOR NULL +uuid_value CHAR 0 NULL field type null key default extra collation comment privileges -aggregate_function text 0 NULL NULL -array_value text 0 NULL NULL -boolean_value tinyint unsigned 0 NULL NULL -date32_value date 0 NULL NULL -date_value date 0 NULL NULL -datetime64_value datetime 0 NULL NULL -datetime_value datetime 0 NULL NULL -decimal_value decimal 0 NULL NULL -enum_value enum('apple', 'banana', 'orange') 0 NULL NULL -fixed_string_value text 0 NULL NULL -float32 float 0 NULL NULL -float64 double 0 NULL NULL -int32 int 0 NULL NULL -ipv4_value text 0 NULL NULL -ipv6_value text 0 NULL NULL -json_value json 0 NULL NULL -low_cardinality blob 0 NULL NULL -low_cardinality_date datetime 0 NULL NULL -map_value json 0 NULL NULL -nested.nested_int text 0 NULL NULL -nested.nested_string text 0 NULL NULL -nullable_value int 0 NULL NULL -string_value blob 0 NULL NULL -tuple_value json 0 NULL NULL -uint64 bigint unsigned 0 PRI SOR NULL NULL -uuid_value char 0 NULL NULL +aggregate_function TEXT 0 NULL NULL +array_value TEXT 0 NULL NULL +boolean_value TINYINT UNSIGNED 0 NULL NULL +date32_value DATE 0 NULL NULL +date_value DATE 0 NULL NULL +datetime64_value DATETIME 0 NULL NULL +datetime_value DATETIME 0 NULL NULL +decimal_value DECIMAL 0 NULL NULL +enum_value ENUM('apple', 'banana', 'orange') 0 NULL NULL +fixed_string_value TEXT 0 NULL NULL +float32 FLOAT 0 NULL NULL +float64 DOUBLE 0 NULL NULL +int32 INT 0 NULL NULL +ipv4_value TEXT 0 NULL NULL +ipv6_value TEXT 0 NULL NULL +json_value JSON 0 NULL NULL +low_cardinality BLOB 0 NULL NULL +low_cardinality_date DATETIME 0 NULL NULL +map_value JSON 0 NULL NULL +nested.nested_int TEXT 0 NULL NULL +nested.nested_string TEXT 0 NULL NULL +nullable_value INT 0 NULL NULL +string_value BLOB 0 NULL NULL +tuple_value JSON 0 NULL NULL +uint64 BIGINT UNSIGNED 0 PRI SOR NULL NULL +uuid_value CHAR 0 NULL NULL field type null key default extra -int32 int 0 NULL -nested.nested_int text 0 NULL -uint64 bigint unsigned 0 PRI SOR NULL +int32 INT 0 NULL +nested.nested_int TEXT 0 NULL +uint64 BIGINT UNSIGNED 0 PRI SOR NULL field type null key default extra -aggregate_function text 0 NULL -array_value text 0 NULL -boolean_value tinyint unsigned 0 NULL -date32_value date 0 NULL -date_value date 0 NULL -datetime64_value datetime 0 NULL -datetime_value datetime 0 NULL -decimal_value decimal 0 NULL -enum_value enum('apple', 'banana', 'orange') 0 NULL -fixed_string_value text 0 NULL -float32 float 0 NULL -float64 double 0 NULL -ipv4_value text 0 NULL -ipv6_value text 0 NULL -json_value json 0 NULL -low_cardinality blob 0 NULL -low_cardinality_date datetime 0 NULL -map_value json 0 NULL -nested.nested_string text 0 NULL -nullable_value int 0 NULL -string_value blob 0 NULL -tuple_value json 0 NULL -uuid_value char 0 NULL +aggregate_function TEXT 0 NULL +array_value TEXT 0 NULL +boolean_value TINYINT UNSIGNED 0 NULL +date32_value DATE 0 NULL +date_value DATE 0 NULL +datetime64_value DATETIME 0 NULL +datetime_value DATETIME 0 NULL +decimal_value DECIMAL 0 NULL +enum_value ENUM('apple', 'banana', 'orange') 0 NULL +fixed_string_value TEXT 0 NULL +float32 FLOAT 0 NULL +float64 DOUBLE 0 NULL +ipv4_value TEXT 0 NULL +ipv6_value TEXT 0 NULL +json_value JSON 0 NULL +low_cardinality BLOB 0 NULL +low_cardinality_date DATETIME 0 NULL +map_value JSON 0 NULL +nested.nested_string TEXT 0 NULL +nullable_value INT 0 NULL +string_value BLOB 0 NULL +tuple_value JSON 0 NULL +uuid_value CHAR 0 NULL field type null key default extra -int32 int 0 NULL -nested.nested_int text 0 NULL -uint64 bigint unsigned 0 PRI SOR NULL +int32 INT 0 NULL +nested.nested_int TEXT 0 NULL +uint64 BIGINT UNSIGNED 0 PRI SOR NULL field type null key default extra -aggregate_function text 0 NULL -array_value text 0 NULL -boolean_value tinyint unsigned 0 NULL -date32_value date 0 NULL -date_value date 0 NULL -datetime64_value datetime 0 NULL -datetime_value datetime 0 NULL -decimal_value decimal 0 NULL -enum_value enum('apple', 'banana', 'orange') 0 NULL -fixed_string_value text 0 NULL -float32 float 0 NULL -float64 double 0 NULL -ipv4_value text 0 NULL -ipv6_value text 0 NULL -json_value json 0 NULL -low_cardinality blob 0 NULL -low_cardinality_date datetime 0 NULL -map_value json 0 NULL -nested.nested_string text 0 NULL -nullable_value int 0 NULL -string_value blob 0 NULL -tuple_value json 0 NULL -uuid_value char 0 NULL +aggregate_function TEXT 0 NULL +array_value TEXT 0 NULL +boolean_value TINYINT UNSIGNED 0 NULL +date32_value DATE 0 NULL +date_value DATE 0 NULL +datetime64_value DATETIME 0 NULL +datetime_value DATETIME 0 NULL +decimal_value DECIMAL 0 NULL +enum_value ENUM('apple', 'banana', 'orange') 0 NULL +fixed_string_value TEXT 0 NULL +float32 FLOAT 0 NULL +float64 DOUBLE 0 NULL +ipv4_value TEXT 0 NULL +ipv6_value TEXT 0 NULL +json_value JSON 0 NULL +low_cardinality BLOB 0 NULL +low_cardinality_date DATETIME 0 NULL +map_value JSON 0 NULL +nested.nested_string TEXT 0 NULL +nullable_value INT 0 NULL +string_value BLOB 0 NULL +tuple_value JSON 0 NULL +uuid_value CHAR 0 NULL field type null key default extra -int32 int 0 NULL -nested.nested_int text 0 NULL -uint64 bigint unsigned 0 PRI SOR NULL +int32 INT 0 NULL +nested.nested_int TEXT 0 NULL +uint64 BIGINT UNSIGNED 0 PRI SOR NULL field type null key default extra -aggregate_function text 0 NULL +aggregate_function TEXT 0 NULL field type null key default extra -aggregate_function text 0 NULL -array_value text 0 NULL -boolean_value tinyint unsigned 0 NULL -date32_value date 0 NULL -date_value date 0 NULL -datetime64_value datetime 0 NULL -datetime_value datetime 0 NULL -decimal_value decimal 0 NULL -enum_value enum('apple', 'banana', 'orange') 0 NULL -fixed_string_value text 0 NULL -float32 float 0 NULL -float64 double 0 NULL -int32 int 0 NULL -ipv4_value text 0 NULL -ipv6_value text 0 NULL -json_value json 0 NULL -low_cardinality blob 0 NULL -low_cardinality_date datetime 0 NULL -map_value json 0 NULL -nested.nested_int text 0 NULL -nested.nested_string text 0 NULL -nullable_value int 0 NULL -string_value blob 0 NULL -tuple_value json 0 NULL -uint64 bigint unsigned 0 PRI SOR NULL -uuid_value char 0 NULL +aggregate_function TEXT 0 NULL +array_value TEXT 0 NULL +boolean_value TINYINT UNSIGNED 0 NULL +date32_value DATE 0 NULL +date_value DATE 0 NULL +datetime64_value DATETIME 0 NULL +datetime_value DATETIME 0 NULL +decimal_value DECIMAL 0 NULL +enum_value ENUM('apple', 'banana', 'orange') 0 NULL +fixed_string_value TEXT 0 NULL +float32 FLOAT 0 NULL +float64 DOUBLE 0 NULL +int32 INT 0 NULL +ipv4_value TEXT 0 NULL +ipv6_value TEXT 0 NULL +json_value JSON 0 NULL +low_cardinality BLOB 0 NULL +low_cardinality_date DATETIME 0 NULL +map_value JSON 0 NULL +nested.nested_int TEXT 0 NULL +nested.nested_string TEXT 0 NULL +nullable_value INT 0 NULL +string_value BLOB 0 NULL +tuple_value JSON 0 NULL +uint64 BIGINT UNSIGNED 0 PRI SOR NULL +uuid_value CHAR 0 NULL field type null key default extra -aggregate_function text 0 NULL -array_value text 0 NULL -boolean_value tinyint unsigned 0 NULL -date32_value date 0 NULL -date_value date 0 NULL -datetime64_value datetime 0 NULL -datetime_value datetime 0 NULL -decimal_value decimal 0 NULL -enum_value enum('apple', 'banana', 'orange') 0 NULL -fixed_string_value text 0 NULL -float32 float 0 NULL -float64 double 0 NULL -int32 int 0 NULL -ipv4_value text 0 NULL -ipv6_value text 0 NULL -json_value json 0 NULL -low_cardinality blob 0 NULL -low_cardinality_date datetime 0 NULL -map_value json 0 NULL -nested.nested_int text 0 NULL -nested.nested_string text 0 NULL -nullable_value int 0 NULL -string_value blob 0 NULL -tuple_value json 0 NULL -uint64 bigint unsigned 0 PRI SOR NULL -uuid_value char 0 NULL +aggregate_function TEXT 0 NULL +array_value TEXT 0 NULL +boolean_value TINYINT UNSIGNED 0 NULL +date32_value DATE 0 NULL +date_value DATE 0 NULL +datetime64_value DATETIME 0 NULL +datetime_value DATETIME 0 NULL +decimal_value DECIMAL 0 NULL +enum_value ENUM('apple', 'banana', 'orange') 0 NULL +fixed_string_value TEXT 0 NULL +float32 FLOAT 0 NULL +float64 DOUBLE 0 NULL +int32 INT 0 NULL +ipv4_value TEXT 0 NULL +ipv6_value TEXT 0 NULL +json_value JSON 0 NULL +low_cardinality BLOB 0 NULL +low_cardinality_date DATETIME 0 NULL +map_value JSON 0 NULL +nested.nested_int TEXT 0 NULL +nested.nested_string TEXT 0 NULL +nullable_value INT 0 NULL +string_value BLOB 0 NULL +tuple_value JSON 0 NULL +uint64 BIGINT UNSIGNED 0 PRI SOR NULL +uuid_value CHAR 0 NULL field type null key default extra -aggregate_function text 0 NULL -array_value text 0 NULL -boolean_value tinyint unsigned 0 NULL -date32_value date 0 NULL -date_value date 0 NULL -datetime64_value datetime 0 NULL -datetime_value datetime 0 NULL -decimal_value decimal 0 NULL -enum_value enum('apple', 'banana', 'orange') 0 NULL -fixed_string_value text 0 NULL -float32 float 0 NULL -float64 double 0 NULL -int32 int 0 NULL -ipv4_value text 0 NULL -ipv6_value text 0 NULL -json_value json 0 NULL -low_cardinality blob 0 NULL -low_cardinality_date datetime 0 NULL -map_value json 0 NULL -nested.nested_int text 0 NULL -nested.nested_string text 0 NULL -nullable_value int 0 NULL -string_value blob 0 NULL -tuple_value json 0 NULL -uint64 bigint unsigned 0 PRI SOR NULL -uuid_value char 0 NULL +aggregate_function TEXT 0 NULL +array_value TEXT 0 NULL +boolean_value TINYINT UNSIGNED 0 NULL +date32_value DATE 0 NULL +date_value DATE 0 NULL +datetime64_value DATETIME 0 NULL +datetime_value DATETIME 0 NULL +decimal_value DECIMAL 0 NULL +enum_value ENUM('apple', 'banana', 'orange') 0 NULL +fixed_string_value TEXT 0 NULL +float32 FLOAT 0 NULL +float64 DOUBLE 0 NULL +int32 INT 0 NULL +ipv4_value TEXT 0 NULL +ipv6_value TEXT 0 NULL +json_value JSON 0 NULL +low_cardinality BLOB 0 NULL +low_cardinality_date DATETIME 0 NULL +map_value JSON 0 NULL +nested.nested_int TEXT 0 NULL +nested.nested_string TEXT 0 NULL +nullable_value INT 0 NULL +string_value BLOB 0 NULL +tuple_value JSON 0 NULL +uint64 BIGINT UNSIGNED 0 PRI SOR NULL +uuid_value CHAR 0 NULL diff --git a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh index a446c6e817e..fd1ad92f060 100755 --- a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh +++ b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh @@ -1,4 +1,8 @@ -#!/bin/bash +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh # This script tests the MySQL compatibility of the SHOW COLUMNS command in ClickHouse USER="default" From 34c4b89b161df4427912d402fa6aee4a6821dde1 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 4 Jun 2023 16:23:34 +0000 Subject: [PATCH 0227/1997] fix backward compatibility for IP types hashing in aggregate functions --- base/base/IPv4andIPv6.h | 24 +++++++++++++------ .../AggregateFunctionMap.cpp | 3 +++ src/AggregateFunctions/AggregateFunctionMap.h | 3 +++ .../AggregateFunctionUniq.cpp | 9 +++++++ .../AggregateFunctionUniq.h | 12 +++++----- src/Core/Types_fwd.h | 2 +- src/DataTypes/DataTypeMap.cpp | 3 ++- 7 files changed, 41 insertions(+), 15 deletions(-) diff --git a/base/base/IPv4andIPv6.h b/base/base/IPv4andIPv6.h index 7b745ec7b84..4aee2329572 100644 --- a/base/base/IPv4andIPv6.h +++ b/base/base/IPv4andIPv6.h @@ -7,16 +7,17 @@ namespace DB { - using IPv4 = StrongTypedef; + struct IPv4 : StrongTypedef + { + using StrongTypedef::StrongTypedef; + using StrongTypedef::operator=; + constexpr explicit IPv4(UInt64 value): StrongTypedef(static_cast(value)) {} + }; struct IPv6 : StrongTypedef { - constexpr IPv6() = default; - constexpr explicit IPv6(const UInt128 & x) : StrongTypedef(x) {} - constexpr explicit IPv6(UInt128 && x) : StrongTypedef(std::move(x)) {} - - IPv6 & operator=(const UInt128 & rhs) { StrongTypedef::operator=(rhs); return *this; } - IPv6 & operator=(UInt128 && rhs) { StrongTypedef::operator=(std::move(rhs)); return *this; } + using StrongTypedef::StrongTypedef; + using StrongTypedef::operator=; bool operator<(const IPv6 & rhs) const { @@ -62,4 +63,13 @@ namespace std return std::hash()(x.toUnderType()); } }; + + template <> + struct hash + { + size_t operator()(const DB::IPv4 & x) const + { + return std::hash()(x.toUnderType()); + } + }; } diff --git a/src/AggregateFunctions/AggregateFunctionMap.cpp b/src/AggregateFunctions/AggregateFunctionMap.cpp index f6100602f3f..38e4f49d9a2 100644 --- a/src/AggregateFunctions/AggregateFunctionMap.cpp +++ b/src/AggregateFunctions/AggregateFunctionMap.cpp @@ -100,6 +100,9 @@ public: return std::make_shared>(nested_function, arguments); case TypeIndex::UUID: return std::make_shared>(nested_function, arguments); + case TypeIndex::IPv4: + return std::make_shared>(nested_function, arguments); + case TypeIndex::IPv6: case TypeIndex::FixedString: case TypeIndex::String: return std::make_shared>(nested_function, arguments); diff --git a/src/AggregateFunctions/AggregateFunctionMap.h b/src/AggregateFunctions/AggregateFunctionMap.h index 55f6611974e..4a4ae92735b 100644 --- a/src/AggregateFunctions/AggregateFunctionMap.h +++ b/src/AggregateFunctions/AggregateFunctionMap.h @@ -19,6 +19,7 @@ #include #include #include "DataTypes/Serializations/ISerialization.h" +#include #include "base/types.h" #include #include "AggregateFunctions/AggregateFunctionFactory.h" @@ -147,6 +148,8 @@ public: StringRef key_ref; if (key_type->getTypeId() == TypeIndex::FixedString) key_ref = assert_cast(key_column).getDataAt(offset + i); + else if (key_type->getTypeId() == TypeIndex::IPv6) + key_ref = assert_cast(key_column).getDataAt(offset + i); else key_ref = assert_cast(key_column).getDataAt(offset + i); diff --git a/src/AggregateFunctions/AggregateFunctionUniq.cpp b/src/AggregateFunctions/AggregateFunctionUniq.cpp index bf998c5ee9e..f5147daa97b 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.cpp +++ b/src/AggregateFunctions/AggregateFunctionUniq.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -60,6 +61,10 @@ createAggregateFunctionUniq(const std::string & name, const DataTypes & argument return std::make_shared>(argument_types); else if (which.isUUID()) return std::make_shared>(argument_types); + else if (which.isIPv4()) + return std::make_shared>(argument_types); + else if (which.isIPv6()) + return std::make_shared>(argument_types); else if (which.isTuple()) { if (use_exact_hash_function) @@ -109,6 +114,10 @@ createAggregateFunctionUniq(const std::string & name, const DataTypes & argument return std::make_shared>>(argument_types); else if (which.isUUID()) return std::make_shared>>(argument_types); + else if (which.isIPv4()) + return std::make_shared>>(argument_types); + else if (which.isIPv6()) + return std::make_shared>>(argument_types); else if (which.isTuple()) { if (use_exact_hash_function) diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index c782b9314fd..0524dd53ec0 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -248,17 +248,17 @@ struct Adder AggregateFunctionUniqUniquesHashSetData> || std::is_same_v>) { const auto & column = *columns[0]; - if constexpr (!std::is_same_v) + if constexpr (std::is_same_v || std::is_same_v) + { + StringRef value = column.getDataAt(row_num); + data.set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size)); + } + else { using ValueType = typename decltype(data.set)::value_type; const auto & value = assert_cast &>(column).getElement(row_num); data.set.insert(static_cast(AggregateFunctionUniqTraits::hash(value))); } - else - { - StringRef value = column.getDataAt(row_num); - data.set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size)); - } } else if constexpr (std::is_same_v>) { diff --git a/src/Core/Types_fwd.h b/src/Core/Types_fwd.h index 3db1127fe5a..a59e4b6eab8 100644 --- a/src/Core/Types_fwd.h +++ b/src/Core/Types_fwd.h @@ -27,7 +27,7 @@ namespace DB using UUID = StrongTypedef; -using IPv4 = StrongTypedef; +struct IPv4; struct IPv6; diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp index 0f5d97e6761..90561857fad 100644 --- a/src/DataTypes/DataTypeMap.cpp +++ b/src/DataTypes/DataTypeMap.cpp @@ -69,7 +69,7 @@ void DataTypeMap::assertKeyType() const if (!checkKeyType(key_type)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Type of Map key must be a type, that can be represented by integer " - "or String or FixedString (possibly LowCardinality) or UUID," + "or String or FixedString (possibly LowCardinality) or UUID or IPv6," " but {} given", key_type->getName()); } @@ -120,6 +120,7 @@ bool DataTypeMap::checkKeyType(DataTypePtr key_type) else if (!key_type->isValueRepresentedByInteger() && !isStringOrFixedString(*key_type) && !WhichDataType(key_type).isNothing() + && !WhichDataType(key_type).isIPv6() && !WhichDataType(key_type).isUUID()) { return false; From 2cc457141ed83a50c7a6e4dc395325c6fd4a898d Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sun, 4 Jun 2023 15:32:54 -0300 Subject: [PATCH 0228/1997] clean documentation of ip4 ip6 from domains --- docs/en/interfaces/formats.md | 34 +++++++++---------- docs/en/operations/system-tables/query_log.md | 4 +-- .../system-tables/query_thread_log.md | 4 +-- .../operations/system-tables/session_log.md | 2 +- .../operations/system-tables/zookeeper_log.md | 2 +- docs/en/sql-reference/data-types/index.md | 2 +- .../data-types/{domains => }/ipv4.md | 27 +++------------ .../data-types/{domains => }/ipv6.md | 29 +++------------- .../functions/ip-address-functions.md | 6 ++-- docs/redirects.txt | 10 +++--- 10 files changed, 41 insertions(+), 79 deletions(-) rename docs/en/sql-reference/data-types/{domains => }/ipv4.md (60%) rename docs/en/sql-reference/data-types/{domains => }/ipv6.md (61%) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 2ab9e8caec4..d75fb32b571 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1292,8 +1292,8 @@ For output it uses the following correspondence between ClickHouse types and BSO | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x04` array | | [Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x03` document | | [Map](/docs/en/sql-reference/data-types/map.md) | `\x03` document | -| [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `\x10` int32 | -| [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `\x05` binary, `\x00` binary subtype | +| [IPv4](/docs/en/sql-reference/data-types/ipv4.md) | `\x10` int32 | +| [IPv6](/docs/en/sql-reference/data-types/ipv6.md) | `\x05` binary, `\x00` binary subtype | For input it uses the following correspondence between BSON types and ClickHouse types: @@ -1303,7 +1303,7 @@ For input it uses the following correspondence between BSON types and ClickHouse | `\x02` string | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | | `\x03` document | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | | `\x04` array | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) | -| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | +| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/ipv6.md) | | `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | | `\x05` binary, `\x03` old uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) | | `\x05` binary, `\x04` uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) | @@ -1313,7 +1313,7 @@ For input it uses the following correspondence between BSON types and ClickHouse | `\x0A` null value | [NULL](/docs/en/sql-reference/data-types/nullable.md) | | `\x0D` JavaScript code | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | | `\x0E` symbol | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | -| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)/[Enum8/Enum16](/docs/en/sql-reference/data-types/enum.md) | +| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/ipv4.md)/[Enum8/Enum16](/docs/en/sql-reference/data-types/enum.md) | | `\x12` int64 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8). @@ -1663,8 +1663,8 @@ The table below shows supported data types and how they match ClickHouse [data t | `ENUM` | [Enum(8/16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` | | `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | | `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | -| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` | -| `DATA` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `DATA` | +| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/ipv4.md) | `UINT32` | +| `DATA` | [IPv6](/docs/en/sql-reference/data-types/ipv6.md) | `DATA` | | `DATA` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `DATA` | | `DATA` | [Decimal128/Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `DATA` | | `STRUCT(entries LIST(STRUCT(key Key, value Value)))` | [Map](/docs/en/sql-reference/data-types/map.md) | `STRUCT(entries LIST(STRUCT(key Key, value Value)))` | @@ -1866,8 +1866,8 @@ The table below shows supported data types and how they match ClickHouse [data t | `long (timestamp-millis)` \** | [DateTime64(3)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \** | | `long (timestamp-micros)` \** | [DateTime64(6)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \** | | `bytes (decimal)` \** | [DateTime64(N)](/docs/en/sql-reference/data-types/datetime.md) | `bytes (decimal)` \** | -| `int` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `int` | -| `fixed(16)` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `fixed(16)` | +| `int` | [IPv4](/docs/en/sql-reference/data-types/ipv4.md) | `int` | +| `fixed(16)` | [IPv6](/docs/en/sql-reference/data-types/ipv6.md) | `fixed(16)` | | `bytes (decimal)` \** | [Decimal(P, S)](/docs/en/sql-reference/data-types/decimal.md) | `bytes (decimal)` \** | | `string (uuid)` \** | [UUID](/docs/en/sql-reference/data-types/uuid.md) | `string (uuid)` \** | | `fixed(16)` | [Int128/UInt128](/docs/en/sql-reference/data-types/int-uint.md) | `fixed(16)` | @@ -2001,9 +2001,9 @@ The table below shows supported data types and how they match ClickHouse [data t | `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | | `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | | `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` | -| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` | -| `FIXED_LENGTH_BYTE_ARRAY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_LENGTH_BYTE_ARRAY` | -| `FIXED_LENGTH_BYTE_ARRAY`, `BINARY` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `FIXED_LENGTH_BYTE_ARRAY` | +| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/ipv4.md) | `UINT32` | +| `FIXED_LENGTH_BYTE_ARRAY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/ipv6.md) | `FIXED_LENGTH_BYTE_ARRAY` | +| `FIXED_LENGTH_BYTE_ARRAY`, `BINARY` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `FIXED_LENGTH_BYTE_ARRAY` | Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested. @@ -2057,7 +2057,7 @@ Special format for reading Parquet file metadata (https://parquet.apache.org/doc - logical_type - column logical type - compression - compression used for this column - total_uncompressed_size - total uncompressed bytes size of the column, calculated as the sum of total_uncompressed_size of the column from all row groups - - total_compressed_size - total compressed bytes size of the column, calculated as the sum of total_compressed_size of the column from all row groups + - total_compressed_size - total compressed bytes size of the column, calculated as the sum of total_compressed_size of the column from all row groups - space_saved - percent of space saved by compression, calculated as (1 - total_compressed_size/total_uncompressed_size). - encodings - the list of encodings used for this column - row_groups - the list of row groups metadata with the next structure: @@ -2204,9 +2204,9 @@ The table below shows supported data types and how they match ClickHouse [data t | `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | | `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | | `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` | -| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` | -| `FIXED_SIZE_BINARY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_SIZE_BINARY` | -| `FIXED_SIZE_BINARY`, `BINARY` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `FIXED_SIZE_BINARY` | +| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/ipv4.md) | `UINT32` | +| `FIXED_SIZE_BINARY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/ipv6.md) | `FIXED_SIZE_BINARY` | +| `FIXED_SIZE_BINARY`, `BINARY` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `FIXED_SIZE_BINARY` | Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested. @@ -2272,7 +2272,7 @@ The table below shows supported data types and how they match ClickHouse [data t | `Struct` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `Struct` | | `Map` | [Map](/docs/en/sql-reference/data-types/map.md) | `Map` | | `Int` | [IPv4](/docs/en/sql-reference/data-types/int-uint.md) | `Int` | -| `Binary` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `Binary` | +| `Binary` | [IPv6](/docs/en/sql-reference/data-types/ipv6.md) | `Binary` | | `Binary` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `Binary` | | `Binary` | [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `Binary` | @@ -2485,7 +2485,7 @@ ClickHouse supports reading and writing [MessagePack](https://msgpack.org/) data | `uint 64` | [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `uint 64` | | `fixarray`, `array 16`, `array 32` | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) | `fixarray`, `array 16`, `array 32` | | `fixmap`, `map 16`, `map 32` | [Map](/docs/en/sql-reference/data-types/map.md) | `fixmap`, `map 16`, `map 32` | -| `uint 32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `uint 32` | +| `uint 32` | [IPv4](/docs/en/sql-reference/data-types/ipv4.md) | `uint 32` | | `bin 8` | [String](/docs/en/sql-reference/data-types/string.md) | `bin 8` | | `int 8` | [Enum8](/docs/en/sql-reference/data-types/enum.md) | `int 8` | | `bin 8` | [(U)Int128/(U)Int256](/docs/en/sql-reference/data-types/int-uint.md) | `bin 8` | diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index 71e1452cef1..b9fdd19c643 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -71,11 +71,11 @@ Columns: - 0 — Query was initiated by another query as part of distributed query execution. - `user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query. - `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query. -- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query. +- `address` ([IPv6](../../sql-reference/data-types/ipv6.md)) — IP address that was used to make the query. - `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to make the query. - `initial_user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution). - `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). -- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from. +- `initial_address` ([IPv6](../../sql-reference/data-types/ipv6.md)) — IP address that the parent query was launched from. - `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to make the parent query. - `initial_query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Initial query starting time (for distributed query execution). - `initial_query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Initial query starting time with microseconds precision (for distributed query execution). diff --git a/docs/en/operations/system-tables/query_thread_log.md b/docs/en/operations/system-tables/query_thread_log.md index cdd23bb15db..a6d5632ade9 100644 --- a/docs/en/operations/system-tables/query_thread_log.md +++ b/docs/en/operations/system-tables/query_thread_log.md @@ -40,11 +40,11 @@ Columns: - 0 — Query was initiated by another query for distributed query execution. - `user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query. - `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query. -- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query. +- `address` ([IPv6](../../sql-reference/data-types/ipv6.md)) — IP address that was used to make the query. - `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the query. - `initial_user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution). - `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). -- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from. +- `initial_address` ([IPv6](../../sql-reference/data-types/ipv6.md)) — IP address that the parent query was launched from. - `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the parent query. - `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Interface that the query was initiated from. Possible values: - 1 — TCP. diff --git a/docs/en/operations/system-tables/session_log.md b/docs/en/operations/system-tables/session_log.md index 661d34677e4..5b1a2b2a489 100644 --- a/docs/en/operations/system-tables/session_log.md +++ b/docs/en/operations/system-tables/session_log.md @@ -28,7 +28,7 @@ Columns: - `profiles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — The list of profiles set for all roles and/or users. - `roles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — The list of roles to which the profile is applied. - `settings` ([Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md), [String](../../sql-reference/data-types/string.md)))) — Settings that were changed when the client logged in/out. -- `client_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — The IP address that was used to log in/out. +- `client_address` ([IPv6](../../sql-reference/data-types/ipv6.md)) — The IP address that was used to log in/out. - `client_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to log in/out. - `interface` ([Enum8](../../sql-reference/data-types/enum.md)) — The interface from which the login was initiated. Possible values: - `TCP` diff --git a/docs/en/operations/system-tables/zookeeper_log.md b/docs/en/operations/system-tables/zookeeper_log.md index b7cc4e22cd6..dce5be29f62 100644 --- a/docs/en/operations/system-tables/zookeeper_log.md +++ b/docs/en/operations/system-tables/zookeeper_log.md @@ -15,7 +15,7 @@ Columns with request parameters: - `Finalize` — The connection is lost, no response was received. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the event happened. - `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time when the event happened. -- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address of ZooKeeper server that was used to make the request. +- `address` ([IPv6](../../sql-reference/data-types/ipv6.md)) — IP address of ZooKeeper server that was used to make the request. - `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The port of ZooKeeper server that was used to make the request. - `session_id` ([Int64](../../sql-reference/data-types/int-uint.md)) — The session ID that the ZooKeeper server sets for each connection. - `xid` ([Int32](../../sql-reference/data-types/int-uint.md)) — The ID of the request within the session. This is usually a sequential request number. It is the same for the request row and the paired `response`/`finalize` row. diff --git a/docs/en/sql-reference/data-types/index.md b/docs/en/sql-reference/data-types/index.md index 508307a0543..ffd063590fa 100644 --- a/docs/en/sql-reference/data-types/index.md +++ b/docs/en/sql-reference/data-types/index.md @@ -28,6 +28,6 @@ ClickHouse data types include: - **Nested data structures**: A [`Nested` data structure](./nested-data-structures/index.md) is like a table inside a cell - **Tuples**: A [`Tuple` of elements](./tuple.md), each having an individual type. - **Nullable**: [`Nullable`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column settings its default value for the data type) -- **IP addresses**: use [`IPv4`](./domains/ipv4.md) and [`IPv6`](./domains/ipv6.md) to efficiently store IP addresses +- **IP addresses**: use [`IPv4`](./ipv4.md) and [`IPv6`](./ipv6.md) to efficiently store IP addresses - **Geo types**: for [geographical data](./geo.md), including `Point`, `Ring`, `Polygon` and `MultiPolygon` - **Special data types**: including [`Expression`](./special-data-types/expression.md), [`Set`](./special-data-types/set.md), [`Nothing`](./special-data-types/nothing.md) and [`Interval`](./special-data-types/interval.md) diff --git a/docs/en/sql-reference/data-types/domains/ipv4.md b/docs/en/sql-reference/data-types/ipv4.md similarity index 60% rename from docs/en/sql-reference/data-types/domains/ipv4.md rename to docs/en/sql-reference/data-types/ipv4.md index b34814211fc..288806f47b3 100644 --- a/docs/en/sql-reference/data-types/domains/ipv4.md +++ b/docs/en/sql-reference/data-types/ipv4.md @@ -1,12 +1,12 @@ --- -slug: /en/sql-reference/data-types/domains/ipv4 +slug: /en/sql-reference/data-types/ipv4 sidebar_position: 59 sidebar_label: IPv4 --- ## IPv4 -`IPv4` is a domain based on `UInt32` type and serves as a typed replacement for storing IPv4 values. It provides compact storage with the human-friendly input-output format and column type information on inspection. +IPv4 addresses. Stored in 4 bytes as UInt32. ### Basic Usage @@ -57,25 +57,6 @@ SELECT toTypeName(from), hex(from) FROM hits LIMIT 1; └──────────────────┴───────────┘ ``` -Domain values are not implicitly convertible to types other than `UInt32`. -If you want to convert `IPv4` value to a string, you have to do that explicitly with `IPv4NumToString()` function: +**See Also** -``` sql -SELECT toTypeName(s), IPv4NumToString(from) as s FROM hits LIMIT 1; -``` - - ┌─toTypeName(IPv4NumToString(from))─┬─s──────────────┐ - │ String │ 183.247.232.58 │ - └───────────────────────────────────┴────────────────┘ - -Or cast to a `UInt32` value: - -``` sql -SELECT toTypeName(i), CAST(from as UInt32) as i FROM hits LIMIT 1; -``` - -``` text -┌─toTypeName(CAST(from, 'UInt32'))─┬──────────i─┐ -│ UInt32 │ 3086477370 │ -└──────────────────────────────────┴────────────┘ -``` +- [Functions for Working with IPv4 and IPv6 Addresses](../functions/ip-address-functions.md) diff --git a/docs/en/sql-reference/data-types/domains/ipv6.md b/docs/en/sql-reference/data-types/ipv6.md similarity index 61% rename from docs/en/sql-reference/data-types/domains/ipv6.md rename to docs/en/sql-reference/data-types/ipv6.md index dcb22e3cb6d..284a1f80854 100644 --- a/docs/en/sql-reference/data-types/domains/ipv6.md +++ b/docs/en/sql-reference/data-types/ipv6.md @@ -1,12 +1,12 @@ --- -slug: /en/sql-reference/data-types/domains/ipv6 +slug: /en/sql-reference/data-types/ipv6 sidebar_position: 60 sidebar_label: IPv6 --- ## IPv6 -`IPv6` is a domain based on `FixedString(16)` type and serves as a typed replacement for storing IPv6 values. It provides compact storage with the human-friendly input-output format and column type information on inspection. +IPv6 addresses. Stored in 16 bytes as UInt128. ### Basic Usage @@ -57,27 +57,6 @@ SELECT toTypeName(from), hex(from) FROM hits LIMIT 1; └──────────────────┴──────────────────────────────────┘ ``` -Domain values are not implicitly convertible to types other than `FixedString(16)`. -If you want to convert `IPv6` value to a string, you have to do that explicitly with `IPv6NumToString()` function: +**See Also** -``` sql -SELECT toTypeName(s), IPv6NumToString(from) as s FROM hits LIMIT 1; -``` - -``` text -┌─toTypeName(IPv6NumToString(from))─┬─s─────────────────────────────┐ -│ String │ 2001:44c8:129:2632:33:0:252:2 │ -└───────────────────────────────────┴───────────────────────────────┘ -``` - -Or cast to a `FixedString(16)` value: - -``` sql -SELECT toTypeName(i), CAST(from as FixedString(16)) as i FROM hits LIMIT 1; -``` - -``` text -┌─toTypeName(CAST(from, 'FixedString(16)'))─┬─i───────┐ -│ FixedString(16) │ ��� │ -└───────────────────────────────────────────┴─────────┘ -``` +- [Functions for Working with IPv4 and IPv6 Addresses](../functions/ip-address-functions.md) diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index 0dc1db1161b..33c788a632e 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -248,7 +248,7 @@ SELECT IPv6CIDRToRange(toIPv6('2001:0db8:0000:85a3:0000:0000:ac1f:8001'), 32); ## toIPv4(string) -An alias to `IPv4StringToNum()` that takes a string form of IPv4 address and returns value of [IPv4](../../sql-reference/data-types/domains/ipv4.md) type, which is binary equal to value returned by `IPv4StringToNum()`. +An alias to `IPv4StringToNum()` that takes a string form of IPv4 address and returns value of [IPv4](../../sql-reference/data-types/ipv4.md) type, which is binary equal to value returned by `IPv4StringToNum()`. ``` sql WITH @@ -296,7 +296,7 @@ Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null ## toIPv6 -Converts a string form of IPv6 address to [IPv6](../../sql-reference/data-types/domains/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value. +Converts a string form of IPv6 address to [IPv6](../../sql-reference/data-types/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value. Similar to [IPv6StringToNum](#ipv6stringtonums) function, which converts IPv6 address to binary format. If the input string contains a valid IPv4 address, then the IPv6 equivalent of the IPv4 address is returned. @@ -315,7 +315,7 @@ toIPv6(string) - IP address. -Type: [IPv6](../../sql-reference/data-types/domains/ipv6.md). +Type: [IPv6](../../sql-reference/data-types/ipv6.md). **Examples** diff --git a/docs/redirects.txt b/docs/redirects.txt index cea138f7237..3abc8df2b7f 100644 --- a/docs/redirects.txt +++ b/docs/redirects.txt @@ -14,7 +14,7 @@ data_types/datetime.md sql-reference/data-types/datetime.md data_types/datetime64.md sql-reference/data-types/datetime64.md data_types/decimal.md sql-reference/data-types/decimal.md data_types/domains/ipv4.md sql-reference/data-types/domains/ipv4.md -data_types/domains/ipv6.md sql-reference/data-types/domains/ipv6.md +data_types/domains/ipv6.md sql-reference/data-types/ipv6.md data_types/domains/overview.md sql-reference/data-types/domains/overview.md data_types/enum.md sql-reference/data-types/enum.md data_types/fixedstring.md sql-reference/data-types/fixedstring.md @@ -162,7 +162,7 @@ interfaces/third-party/client_libraries.md interfaces/third-party/client-librari interfaces/third-party_client_libraries.md interfaces/third-party/client-libraries.md interfaces/third-party_gui.md interfaces/third-party/gui.md interfaces/third_party/index.md interfaces/third-party/index.md -introduction/index.md +introduction/index.md introduction/distinctive_features.md introduction/distinctive-features.md introduction/features_considered_disadvantages.md introduction/distinctive-features.md introduction/possible_silly_questions.md faq/general.md @@ -305,8 +305,10 @@ sql_reference/data_types/datetime.md sql-reference/data-types/datetime.md sql_reference/data_types/datetime64.md sql-reference/data-types/datetime64.md sql_reference/data_types/decimal.md sql-reference/data-types/decimal.md sql_reference/data_types/domains/index.md sql-reference/data-types/domains/index.md -sql_reference/data_types/domains/ipv4.md sql-reference/data-types/domains/ipv4.md -sql_reference/data_types/domains/ipv6.md sql-reference/data-types/domains/ipv6.md +sql_reference/data_types/domains/ipv4.md sql-reference/data-types/ipv4.md +sql_reference/data-types/domains/ipv4.md sql-reference/data-types/ipv4.md +sql_reference/data_types/domains/ipv6.md sql-reference/data-types/ipv6.md +sql_reference/data-types/domains/ipv6.md sql-reference/data-types/ipv6.md sql_reference/data_types/domains/overview.md sql-reference/data-types/domains/overview.md sql_reference/data_types/enum.md sql-reference/data-types/enum.md sql_reference/data_types/fixedstring.md sql-reference/data-types/fixedstring.md From 2923d57757cd28dde82d8edd762c1912129a68e4 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sun, 4 Jun 2023 15:37:32 -0300 Subject: [PATCH 0229/1997] Update redirects.txt --- docs/redirects.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/redirects.txt b/docs/redirects.txt index 3abc8df2b7f..98d6f6b8f7c 100644 --- a/docs/redirects.txt +++ b/docs/redirects.txt @@ -13,7 +13,7 @@ data_types/date.md sql-reference/data-types/date.md data_types/datetime.md sql-reference/data-types/datetime.md data_types/datetime64.md sql-reference/data-types/datetime64.md data_types/decimal.md sql-reference/data-types/decimal.md -data_types/domains/ipv4.md sql-reference/data-types/domains/ipv4.md +data_types/domains/ipv4.md sql-reference/data-types/ipv4.md data_types/domains/ipv6.md sql-reference/data-types/ipv6.md data_types/domains/overview.md sql-reference/data-types/domains/overview.md data_types/enum.md sql-reference/data-types/enum.md From aa35689cb10dbdbab0c8475a7f92b8978e6eb6b8 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sun, 4 Jun 2023 19:39:30 -0300 Subject: [PATCH 0230/1997] fix links in other lang-s --- docs/ru/operations/system-tables/query_log.md | 4 ++-- docs/ru/operations/system-tables/query_thread_log.md | 4 ++-- docs/ru/operations/system-tables/session_log.md | 2 +- docs/ru/operations/system-tables/zookeeper_log.md | 2 +- docs/ru/sql-reference/data-types/{domains => }/ipv4.md | 2 +- docs/ru/sql-reference/data-types/{domains => }/ipv6.md | 2 +- docs/ru/sql-reference/functions/ip-address-functions.md | 4 ++-- docs/zh/operations/system-tables/query_log.md | 4 ++-- docs/zh/operations/system-tables/query_thread_log.md | 4 ++-- docs/zh/operations/system-tables/zookeeper_log.md | 2 +- docs/zh/sql-reference/data-types/{domains => }/ipv4.md | 2 +- docs/zh/sql-reference/data-types/{domains => }/ipv6.md | 2 +- 12 files changed, 17 insertions(+), 17 deletions(-) rename docs/ru/sql-reference/data-types/{domains => }/ipv4.md (98%) rename docs/ru/sql-reference/data-types/{domains => }/ipv6.md (98%) rename docs/zh/sql-reference/data-types/{domains => }/ipv4.md (98%) rename docs/zh/sql-reference/data-types/{domains => }/ipv6.md (98%) diff --git a/docs/ru/operations/system-tables/query_log.md b/docs/ru/operations/system-tables/query_log.md index a55528bd829..8f858c14fb1 100644 --- a/docs/ru/operations/system-tables/query_log.md +++ b/docs/ru/operations/system-tables/query_log.md @@ -69,11 +69,11 @@ ClickHouse не удаляет данные из таблица автомати - 0 — запрос был инициирован другим запросом при выполнении распределенного запроса. - `user` ([String](../../sql-reference/data-types/string.md)) — пользователь, запустивший текущий запрос. - `query_id` ([String](../../sql-reference/data-types/string.md)) — ID запроса. -- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP адрес, с которого пришел запрос. +- `address` ([IPv6](../../sql-reference/data-types/ipv6.md)) — IP адрес, с которого пришел запрос. - `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — порт, с которого клиент сделал запрос - `initial_user` ([String](../../sql-reference/data-types/string.md)) — пользователь, запустивший первоначальный запрос (для распределенных запросов). - `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID родительского запроса. -- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP адрес, с которого пришел родительский запрос. +- `initial_address` ([IPv6](../../sql-reference/data-types/ipv6.md)) — IP адрес, с которого пришел родительский запрос. - `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — порт, с которого клиент сделал родительский запрос. - `initial_query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала обработки запроса (для распределенных запросов). - `initial_query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — время начала обработки запроса с точностью до микросекунд (для распределенных запросов). diff --git a/docs/ru/operations/system-tables/query_thread_log.md b/docs/ru/operations/system-tables/query_thread_log.md index c9aabb02cad..1a256e1657a 100644 --- a/docs/ru/operations/system-tables/query_thread_log.md +++ b/docs/ru/operations/system-tables/query_thread_log.md @@ -39,11 +39,11 @@ ClickHouse не удаляет данные из таблицы автомати - 0 — запрос был инициирован другим запросом при распределенном запросе. - `user` ([String](../../sql-reference/data-types/string.md)) — пользователь, запустивший текущий запрос. - `query_id` ([String](../../sql-reference/data-types/string.md)) — ID запроса. -- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP адрес, с которого пришел запрос. +- `address` ([IPv6](../../sql-reference/data-types/ipv6.md)) — IP адрес, с которого пришел запрос. - `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — порт, с которого пришел запрос. - `initial_user` ([String](../../sql-reference/data-types/string.md)) — пользователь, запустивший первоначальный запрос (для распределенных запросов). - `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID родительского запроса. -- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP адрес, с которого пришел родительский запрос. +- `initial_address` ([IPv6](../../sql-reference/data-types/ipv6.md)) — IP адрес, с которого пришел родительский запрос. - `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — порт, пришел родительский запрос. - `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — интерфейс, с которого ушёл запрос. Возможные значения: - 1 — TCP. diff --git a/docs/ru/operations/system-tables/session_log.md b/docs/ru/operations/system-tables/session_log.md index 1f313e7815a..5849cb51ab4 100644 --- a/docs/ru/operations/system-tables/session_log.md +++ b/docs/ru/operations/system-tables/session_log.md @@ -27,7 +27,7 @@ slug: /ru/operations/system-tables/session_log - `profiles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — список профилей, установленных для всех ролей и (или) пользователей. - `roles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — список ролей, к которым применяется данный профиль. - `settings` ([Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md), [String](../../sql-reference/data-types/string.md)))) — настройки, которые были изменены при входе или выходе клиента из системы. -- `client_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP-адрес, который использовался для входа или выхода из системы. +- `client_address` ([IPv6](../../sql-reference/data-types/ipv6.md)) — IP-адрес, который использовался для входа или выхода из системы. - `client_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — порт клиента, который использовался для входа или выхода из системы. - `interface` ([Enum8](../../sql-reference/data-types/enum.md)) — интерфейс, с которого был инициирован вход в систему. Возможные значения: - `TCP` diff --git a/docs/ru/operations/system-tables/zookeeper_log.md b/docs/ru/operations/system-tables/zookeeper_log.md index ccbdd5110ad..9874cb3a269 100644 --- a/docs/ru/operations/system-tables/zookeeper_log.md +++ b/docs/ru/operations/system-tables/zookeeper_log.md @@ -15,7 +15,7 @@ slug: /ru/operations/system-tables/zookeeper_log - `Finalize` — соединение разорвано, ответ не получен. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата, когда произошло событие. - `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — дата и время, когда произошло событие. -- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP адрес сервера ZooKeeper, с которого был сделан запрос. +- `address` ([IPv6](../../sql-reference/data-types/ipv6.md)) — IP адрес сервера ZooKeeper, с которого был сделан запрос. - `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — порт сервера ZooKeeper, с которого был сделан запрос. - `session_id` ([Int64](../../sql-reference/data-types/int-uint.md)) — идентификатор сессии, который сервер ZooKeeper создает для каждого соединения. - `xid` ([Int32](../../sql-reference/data-types/int-uint.md)) — идентификатор запроса внутри сессии. Обычно это последовательный номер запроса, одинаковый у строки запроса и у парной строки `response`/`finalize`. diff --git a/docs/ru/sql-reference/data-types/domains/ipv4.md b/docs/ru/sql-reference/data-types/ipv4.md similarity index 98% rename from docs/ru/sql-reference/data-types/domains/ipv4.md rename to docs/ru/sql-reference/data-types/ipv4.md index 57a19e282ae..8d308785eea 100644 --- a/docs/ru/sql-reference/data-types/domains/ipv4.md +++ b/docs/ru/sql-reference/data-types/ipv4.md @@ -1,5 +1,5 @@ --- -slug: /ru/sql-reference/data-types/domains/ipv4 +slug: /ru/sql-reference/data-types/ipv4 sidebar_position: 59 sidebar_label: IPv4 --- diff --git a/docs/ru/sql-reference/data-types/domains/ipv6.md b/docs/ru/sql-reference/data-types/ipv6.md similarity index 98% rename from docs/ru/sql-reference/data-types/domains/ipv6.md rename to docs/ru/sql-reference/data-types/ipv6.md index fdfb26f68c1..808068ce90a 100644 --- a/docs/ru/sql-reference/data-types/domains/ipv6.md +++ b/docs/ru/sql-reference/data-types/ipv6.md @@ -1,5 +1,5 @@ --- -slug: /ru/sql-reference/data-types/domains/ipv6 +slug: /ru/sql-reference/data-types/ipv6 sidebar_position: 60 sidebar_label: IPv6 --- diff --git a/docs/ru/sql-reference/functions/ip-address-functions.md b/docs/ru/sql-reference/functions/ip-address-functions.md index 96d4b737c88..d1a72b82b67 100644 --- a/docs/ru/sql-reference/functions/ip-address-functions.md +++ b/docs/ru/sql-reference/functions/ip-address-functions.md @@ -265,7 +265,7 @@ SELECT ## toIPv6 {#toipv6string} -Приводит строку с адресом в формате IPv6 к типу [IPv6](../../sql-reference/data-types/domains/ipv6.md). Возвращает пустое значение, если входящая строка не является корректным IP адресом. +Приводит строку с адресом в формате IPv6 к типу [IPv6](../../sql-reference/data-types/ipv6.md). Возвращает пустое значение, если входящая строка не является корректным IP адресом. Похоже на функцию [IPv6StringToNum](#ipv6stringtonums), которая представляет адрес IPv6 в двоичном виде. Если входящая строка содержит корректный IPv4 адрес, функция возвращает его IPv6 эквивалент. @@ -284,7 +284,7 @@ toIPv6(string) - IP адрес. -Тип: [IPv6](../../sql-reference/data-types/domains/ipv6.md). +Тип: [IPv6](../../sql-reference/data-types/ipv6.md). **Примеры** diff --git a/docs/zh/operations/system-tables/query_log.md b/docs/zh/operations/system-tables/query_log.md index 7149282dfcc..0ba669906cb 100644 --- a/docs/zh/operations/system-tables/query_log.md +++ b/docs/zh/operations/system-tables/query_log.md @@ -60,11 +60,11 @@ ClickHouse不会自动从表中删除数据。更多详情请看 [introduction]( - 0 — 由另一个查询发起的,作为分布式查询的一部分. - `user` ([String](../../sql-reference/data-types/string.md)) — 发起查询的用户. - `query_id` ([String](../../sql-reference/data-types/string.md)) — 查询ID. -- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — 发起查询的客户端IP地址. +- `address` ([IPv6](../../sql-reference/data-types/ipv6.md)) — 发起查询的客户端IP地址. - `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — 发起查询的客户端端口. - `initial_user` ([String](../../sql-reference/data-types/string.md)) — 初始查询的用户名(用于分布式查询执行). - `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — 运行初始查询的ID(用于分布式查询执行). -- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — 运行父查询的IP地址. +- `initial_address` ([IPv6](../../sql-reference/data-types/ipv6.md)) — 运行父查询的IP地址. - `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — 发起父查询的客户端端口. - `interface` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 发起查询的接口. 可能的值: - 1 — TCP. diff --git a/docs/zh/operations/system-tables/query_thread_log.md b/docs/zh/operations/system-tables/query_thread_log.md index 8a41c1501a6..c4b7e2f1043 100644 --- a/docs/zh/operations/system-tables/query_thread_log.md +++ b/docs/zh/operations/system-tables/query_thread_log.md @@ -36,11 +36,11 @@ ClickHouse不会自动从表中删除数据。 欲了解更多详情,请参照 - 0 — 由其他查询发起的分布式查询。 - `user` ([字符串](../../sql-reference/data-types/string.md)) — 发起查询的用户名。 - `query_id` ([字符串](../../sql-reference/data-types/string.md)) — 查询的ID。 -- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — 发起查询的IP地址。 +- `address` ([IPv6](../../sql-reference/data-types/ipv6.md)) — 发起查询的IP地址。 - `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的端口。 - `initial_user` ([字符串](../../sql-reference/data-types/string.md)) — 首次发起查询的用户名(对于分布式查询)。 - `initial_query_id` ([字符串](../../sql-reference/data-types/string.md)) — 首次发起查询的ID(对于分布式查询)。 -- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — 发起该查询的父查询IP地址。 +- `initial_address` ([IPv6](../../sql-reference/data-types/ipv6.md)) — 发起该查询的父查询IP地址。 - `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起该查询的父查询端口。 - `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的界面,可能的值: - 1 — TCP. diff --git a/docs/zh/operations/system-tables/zookeeper_log.md b/docs/zh/operations/system-tables/zookeeper_log.md index 59dcdaecdc1..ebc51a2e79d 100644 --- a/docs/zh/operations/system-tables/zookeeper_log.md +++ b/docs/zh/operations/system-tables/zookeeper_log.md @@ -15,7 +15,7 @@ slug: /zh/operations/system-tables/zookeeper_log - `Finalize` — 连接丢失, 未收到响应. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — 事件发生的日期. - `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — 事件发生的日期和时间. -- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — 用于发出请求的 ZooKeeper 服务器的 IP 地址. +- `address` ([IPv6](../../sql-reference/data-types/ipv6.md)) — 用于发出请求的 ZooKeeper 服务器的 IP 地址. - `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — 用于发出请求的 ZooKeeper 服务器的端口. - `session_id` ([Int64](../../sql-reference/data-types/int-uint.md)) — ZooKeeper 服务器为每个连接设置的会话 ID. - `xid` ([Int32](../../sql-reference/data-types/int-uint.md)) — 会话中请求的 ID. 这通常是一个连续的请求编号. 请求行和配对的 `response`/`finalize` 行相同. diff --git a/docs/zh/sql-reference/data-types/domains/ipv4.md b/docs/zh/sql-reference/data-types/ipv4.md similarity index 98% rename from docs/zh/sql-reference/data-types/domains/ipv4.md rename to docs/zh/sql-reference/data-types/ipv4.md index 69e17b2f617..b89af974b87 100644 --- a/docs/zh/sql-reference/data-types/domains/ipv4.md +++ b/docs/zh/sql-reference/data-types/ipv4.md @@ -1,5 +1,5 @@ --- -slug: /zh/sql-reference/data-types/domains/ipv4 +slug: /zh/sql-reference/data-types/ipv4 --- ## IPv4 {#ipv4} diff --git a/docs/zh/sql-reference/data-types/domains/ipv6.md b/docs/zh/sql-reference/data-types/ipv6.md similarity index 98% rename from docs/zh/sql-reference/data-types/domains/ipv6.md rename to docs/zh/sql-reference/data-types/ipv6.md index 9dd88692c37..3896bb873d8 100644 --- a/docs/zh/sql-reference/data-types/domains/ipv6.md +++ b/docs/zh/sql-reference/data-types/ipv6.md @@ -1,5 +1,5 @@ --- -slug: /zh/sql-reference/data-types/domains/ipv6 +slug: /zh/sql-reference/data-types/ipv6 --- ## IPv6 {#ipv6} From 2e187e0a0eae7f0109c6af30bd6baad0e75c9b71 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sun, 4 Jun 2023 20:12:35 -0300 Subject: [PATCH 0231/1997] try to fix redirect --- docs/redirects.txt | 2 -- docs/ru/sql-reference/data-types/ipv4.md | 27 +++--------------------- 2 files changed, 3 insertions(+), 26 deletions(-) diff --git a/docs/redirects.txt b/docs/redirects.txt index 98d6f6b8f7c..ddfc66aa48b 100644 --- a/docs/redirects.txt +++ b/docs/redirects.txt @@ -306,9 +306,7 @@ sql_reference/data_types/datetime64.md sql-reference/data-types/datetime64.md sql_reference/data_types/decimal.md sql-reference/data-types/decimal.md sql_reference/data_types/domains/index.md sql-reference/data-types/domains/index.md sql_reference/data_types/domains/ipv4.md sql-reference/data-types/ipv4.md -sql_reference/data-types/domains/ipv4.md sql-reference/data-types/ipv4.md sql_reference/data_types/domains/ipv6.md sql-reference/data-types/ipv6.md -sql_reference/data-types/domains/ipv6.md sql-reference/data-types/ipv6.md sql_reference/data_types/domains/overview.md sql-reference/data-types/domains/overview.md sql_reference/data_types/enum.md sql-reference/data-types/enum.md sql_reference/data_types/fixedstring.md sql-reference/data-types/fixedstring.md diff --git a/docs/ru/sql-reference/data-types/ipv4.md b/docs/ru/sql-reference/data-types/ipv4.md index 8d308785eea..5cb977c64c9 100644 --- a/docs/ru/sql-reference/data-types/ipv4.md +++ b/docs/ru/sql-reference/data-types/ipv4.md @@ -6,7 +6,7 @@ sidebar_label: IPv4 ## IPv4 {#ipv4} -`IPv4` — это домен, базирующийся на типе данных `UInt32` предназначенный для хранения адресов IPv4. Он обеспечивает компактное хранение данных с удобным для человека форматом ввода-вывода, и явно отображаемым типом данных в структуре таблицы. +IPv4-адреса. Хранится в 4 байтах как UInt32. ### Применение {#primenenie} @@ -57,27 +57,6 @@ SELECT toTypeName(from), hex(from) FROM hits LIMIT 1; └──────────────────┴───────────┘ ``` -Значения с доменным типом данных не преобразуются неявно в другие типы данных, кроме `UInt32`. -Если необходимо преобразовать значение типа `IPv4` в строку, то это необходимо делать явно с помощью функции `IPv4NumToString()`: +**См. также** -``` sql -SELECT toTypeName(s), IPv4NumToString(from) AS s FROM hits LIMIT 1; -``` - -``` text -┌─toTypeName(IPv4NumToString(from))─┬─s──────────────┐ -│ String │ 183.247.232.58 │ -└───────────────────────────────────┴────────────────┘ -``` - -Или приводить к типу данных `UInt32`: - -``` sql -SELECT toTypeName(i), CAST(from AS UInt32) AS i FROM hits LIMIT 1; -``` - -``` text -┌─toTypeName(CAST(from, 'UInt32'))─┬──────────i─┐ -│ UInt32 │ 3086477370 │ -└──────────────────────────────────┴────────────┘ -``` +- [Functions for Working with IPv4 and IPv6 Addresses](../functions/ip-address-functions.md) From 1a361ef3060a4d271e6e27bb816df1d18cffaa02 Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 5 Jun 2023 03:21:43 +0200 Subject: [PATCH 0232/1997] works for file --- src/Storages/StorageFile.cpp | 103 +++++++++++++++++++++++++++++++---- 1 file changed, 93 insertions(+), 10 deletions(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 647f9511052..53da509d383 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -90,6 +90,57 @@ namespace ErrorCodes namespace { +/// Forward-declare to use in listFilesWithFoldedRegexpMatchingImpl() +void listFilesWithRegexpMatchingImpl( + const std::string & path_for_ls, + const std::string & for_match, + size_t & total_bytes_to_read, + std::vector & result, + bool recursive = false); + +/// When `{...}` has any `/`s, it must be processed in a different way +void listFilesWithFoldedRegexpMatchingImpl(const std::string & start_dir, const std::string & processed_suffix, + const std::string & suffix_with_globs, + const std::string & glob, re2::RE2 & matcher, size_t & total_bytes_to_read, + const size_t max_depth, const size_t next_slash_after_glob_pos, + std::vector & result) +{ + /// We don't need to go all the way in every directory if max_depth is reached + /// as it is upper limit of depth by simply counting `/`s in curly braces + if (!max_depth) + return; + + const fs::directory_iterator end; + for (fs::directory_iterator it(start_dir + processed_suffix); it != end; ++it) + { + const std::string full_path = it->path().string(); + const size_t last_slash = full_path.rfind('/'); + const String dir_or_file_name = full_path.substr(last_slash); + + if (re2::RE2::FullMatch(processed_suffix + dir_or_file_name, matcher)) + { + if (next_slash_after_glob_pos == std::string::npos) + { + total_bytes_to_read += it->file_size(); + result.push_back(it->path().string()); + } + else + { + listFilesWithRegexpMatchingImpl(fs::path(full_path).append(processed_suffix).append(it->path().string()) / "" , + suffix_with_globs.substr(next_slash_after_glob_pos), + total_bytes_to_read, result); + } + } + else if (it->is_directory()) + { + listFilesWithFoldedRegexpMatchingImpl(start_dir, processed_suffix + dir_or_file_name, suffix_with_globs, + glob, matcher, total_bytes_to_read, max_depth - 1, + next_slash_after_glob_pos, result); + } + + } +} + /* Recursive directory listing with matched paths as a result. * Have the same method in StorageHDFS. */ @@ -98,15 +149,40 @@ void listFilesWithRegexpMatchingImpl( const std::string & for_match, size_t & total_bytes_to_read, std::vector & result, - bool recursive = false) + bool recursive) { - const size_t first_glob = for_match.find_first_of("*?{"); + const size_t first_glob_pos = for_match.find_first_of("*?{"); + const bool has_glob = first_glob_pos != std::string::npos; - const size_t end_of_path_without_globs = for_match.substr(0, first_glob).rfind('/'); + const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/'); const std::string suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/' - const size_t next_slash = suffix_with_globs.find('/', 1); - const std::string current_glob = suffix_with_globs.substr(0, next_slash); + /// slashes_in_glob counter is a upper-bound estimate of recursion depth + /// needed to process complex cases when `/` is included into glob, e.g. /pa{th1/a,th2/b}.csv + size_t slashes_in_glob = 0; + const size_t next_slash_after_glob_pos = [&](){ + if (!has_glob) + return suffix_with_globs.find('/', 1); + + size_t in_curly = 0; + for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++) { + if (*it == '{') + ++in_curly; + else if (*it == '/') + { + if (in_curly) + ++slashes_in_glob; + else + return size_t(std::distance(suffix_with_globs.begin(), it)); + } + else if (*it == '}') + --in_curly; + } + return std::string::npos; + }(); + + const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos); + auto regexp = makeRegexpPatternFromGlobs(current_glob); re2::RE2 matcher(regexp); @@ -123,13 +199,22 @@ void listFilesWithRegexpMatchingImpl( if (!fs::exists(prefix_without_globs)) return; + const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos; + + if (slashes_in_glob) + { + listFilesWithFoldedRegexpMatchingImpl(prefix_without_globs, "", suffix_with_globs, + current_glob, matcher, total_bytes_to_read, slashes_in_glob, + next_slash_after_glob_pos, result); + return; + } + const fs::directory_iterator end; for (fs::directory_iterator it(prefix_without_globs); it != end; ++it) { const std::string full_path = it->path().string(); const size_t last_slash = full_path.rfind('/'); const String file_name = full_path.substr(last_slash); - const bool looking_for_directory = next_slash != std::string::npos; /// Condition is_directory means what kind of path is it in current iteration of ls if (!it->is_directory() && !looking_for_directory) @@ -145,14 +230,12 @@ void listFilesWithRegexpMatchingImpl( if (recursive) { listFilesWithRegexpMatchingImpl(fs::path(full_path).append(it->path().string()) / "" , - looking_for_directory ? suffix_with_globs.substr(next_slash) : current_glob , + looking_for_directory ? suffix_with_globs.substr(next_slash_after_glob_pos) : current_glob , total_bytes_to_read, result, recursive); } else if (looking_for_directory && re2::RE2::FullMatch(file_name, matcher)) - { /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check. - listFilesWithRegexpMatchingImpl(fs::path(full_path) / "", suffix_with_globs.substr(next_slash), total_bytes_to_read, result); - } + listFilesWithRegexpMatchingImpl(fs::path(full_path) / "", suffix_with_globs.substr(next_slash_after_glob_pos), total_bytes_to_read, result); } } } From ece96f54e96b526693e58b859c9c835f17eff5f4 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 5 Jun 2023 09:47:58 +0000 Subject: [PATCH 0233/1997] Fix tests --- src/Coordination/KeeperSnapshotManager.cpp | 8 +++----- src/Coordination/tests/gtest_coordination.cpp | 2 ++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index 44e990c7b95..d10df0fd785 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -62,7 +62,7 @@ namespace std::string getSnapshotFileName(uint64_t up_to_log_idx, bool compress_zstd) { - auto base = std::string{"snapshot_"} + std::to_string(up_to_log_idx) + ".bin"; + auto base = fmt::format("snapshot_{}.bin", up_to_log_idx); if (compress_zstd) base += ".zstd"; return base; @@ -567,10 +567,8 @@ KeeperSnapshotManager::KeeperSnapshotManager( continue; } - if (clean_incomplete_file(it->path())) - continue; - - snapshot_files.push_back(it->path()); + if (it->name().starts_with("snapshot_") && !clean_incomplete_file(it->path())) + snapshot_files.push_back(it->path()); } for (const auto & snapshot_file : snapshot_files) diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index ff7d545ecdd..50e81eca8ca 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -1944,6 +1944,8 @@ TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite) changelog1.end_of_append_batch(0, 0); } + waitDurableLogs(changelog1); + DB::KeeperLogStore changelog2( DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, keeper_context); changelog2.init(0, 3); From 495482cdb2b6a6a2d272c50bb3995b0409f7fb91 Mon Sep 17 00:00:00 2001 From: tpanetti Date: Mon, 5 Jun 2023 15:22:29 -0700 Subject: [PATCH 0234/1997] Refactor ClickHouse->MySQL Type conversion and add configuration setting to trigger type conversion --- src/Core/Settings.h | 1 + src/DataTypes/DataTypeAggregateFunction.h | 2 +- src/DataTypes/DataTypeArray.h | 2 +- src/DataTypes/DataTypeDate.h | 2 +- src/DataTypes/DataTypeDate32.h | 2 +- src/DataTypes/DataTypeDateTime.h | 2 +- src/DataTypes/DataTypeDateTime64.h | 2 +- src/DataTypes/DataTypeEnum.cpp | 1 - src/DataTypes/DataTypeEnum.h | 3 +- src/DataTypes/DataTypeFixedString.h | 3 +- src/DataTypes/DataTypeFunction.h | 2 +- src/DataTypes/DataTypeIPv4andIPv6.h | 4 +- src/DataTypes/DataTypeInterval.h | 2 +- src/DataTypes/DataTypeLowCardinality.cpp | 3 +- src/DataTypes/DataTypeLowCardinality.h | 3 +- src/DataTypes/DataTypeMap.h | 2 +- src/DataTypes/DataTypeNothing.h | 2 +- src/DataTypes/DataTypeNullable.h | 2 +- src/DataTypes/DataTypeNumberBase.cpp | 67 +++++-- src/DataTypes/DataTypeNumberBase.h | 4 +- src/DataTypes/DataTypeObject.h | 2 +- src/DataTypes/DataTypeSet.h | 2 +- src/DataTypes/DataTypeString.h | 3 +- src/DataTypes/DataTypeTuple.h | 2 +- src/DataTypes/DataTypeUUID.h | 2 +- src/DataTypes/DataTypesDecimal.cpp | 5 + src/DataTypes/DataTypesDecimal.h | 3 +- src/DataTypes/IDataType.h | 12 +- src/Storages/System/StorageSystemColumns.cpp | 11 +- ...show_columns_mysql_compatibility.reference | 187 +++++++++++++++--- .../02775_show_columns_mysql_compatibility.sh | 31 ++- 31 files changed, 278 insertions(+), 93 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 67c92a0be8b..1ce30ff121f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -190,6 +190,7 @@ class IColumn; M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \ \ M(UInt64, mysql_max_rows_to_insert, 65536, "The maximum number of rows in MySQL batch insertion of the MySQL storage engine", 0) \ + M(Bool, output_format_mysql_types, false, "Use MySQL converted types when connected via MySQL compatibility", 0) \ \ M(UInt64, optimize_min_equality_disjunction_chain_length, 3, "The minimum length of the expression `expr = x1 OR ... expr = xN` for optimization ", 0) \ \ diff --git a/src/DataTypes/DataTypeAggregateFunction.h b/src/DataTypes/DataTypeAggregateFunction.h index 13ca3508580..83c9f10f407 100644 --- a/src/DataTypes/DataTypeAggregateFunction.h +++ b/src/DataTypes/DataTypeAggregateFunction.h @@ -45,7 +45,7 @@ public: String doGetName() const override; String getNameWithoutVersion() const; const char * getFamilyName() const override { return "AggregateFunction"; } - const char * getSQLCompatibleName() const override { return "TEXT"; } + String getSQLCompatibleName() const override { return "TEXT"; } TypeIndex getTypeId() const override { return TypeIndex::AggregateFunction; } Array getParameters() const { return parameters; } diff --git a/src/DataTypes/DataTypeArray.h b/src/DataTypes/DataTypeArray.h index 528062b60be..2714ca1d023 100644 --- a/src/DataTypes/DataTypeArray.h +++ b/src/DataTypes/DataTypeArray.h @@ -30,7 +30,7 @@ public: { return "Array"; } - const char * getSQLCompatibleName() const override + String getSQLCompatibleName() const override { return "TEXT"; } diff --git a/src/DataTypes/DataTypeDate.h b/src/DataTypes/DataTypeDate.h index 7b622ae04a3..0d557cad5f0 100644 --- a/src/DataTypes/DataTypeDate.h +++ b/src/DataTypes/DataTypeDate.h @@ -13,7 +13,7 @@ public: TypeIndex getTypeId() const override { return TypeIndex::Date; } const char * getFamilyName() const override { return family_name; } - const char * getSQLCompatibleName() const override { return "DATE"; } + String getSQLCompatibleName() const override { return "DATE"; } bool canBeUsedAsVersion() const override { return true; } bool canBeInsideNullable() const override { return true; } diff --git a/src/DataTypes/DataTypeDate32.h b/src/DataTypes/DataTypeDate32.h index 65b0ec7407e..0879a404179 100644 --- a/src/DataTypes/DataTypeDate32.h +++ b/src/DataTypes/DataTypeDate32.h @@ -13,7 +13,7 @@ public: TypeIndex getTypeId() const override { return TypeIndex::Date32; } const char * getFamilyName() const override { return family_name; } - const char * getSQLCompatibleName() const override { return "DATE"; } + String getSQLCompatibleName() const override { return "DATE"; } Field getDefault() const override { diff --git a/src/DataTypes/DataTypeDateTime.h b/src/DataTypes/DataTypeDateTime.h index 2facc758f90..edc8b016490 100644 --- a/src/DataTypes/DataTypeDateTime.h +++ b/src/DataTypes/DataTypeDateTime.h @@ -36,7 +36,7 @@ public: static constexpr auto family_name = "DateTime"; const char * getFamilyName() const override { return family_name; } - const char * getSQLCompatibleName() const override { return "DATETIME"; } + String getSQLCompatibleName() const override { return "DATETIME"; } String doGetName() const override; TypeIndex getTypeId() const override { return TypeIndex::DateTime; } diff --git a/src/DataTypes/DataTypeDateTime64.h b/src/DataTypes/DataTypeDateTime64.h index b836b84918f..e786cc09f28 100644 --- a/src/DataTypes/DataTypeDateTime64.h +++ b/src/DataTypes/DataTypeDateTime64.h @@ -28,7 +28,7 @@ public: DataTypeDateTime64(UInt32 scale_, const TimezoneMixin & time_zone_info); const char * getFamilyName() const override { return family_name; } - const char * getSQLCompatibleName() const override { return "DATETIME"; } + String getSQLCompatibleName() const override { return "DATETIME"; } std::string doGetName() const override; TypeIndex getTypeId() const override { return type_id; } diff --git a/src/DataTypes/DataTypeEnum.cpp b/src/DataTypes/DataTypeEnum.cpp index 24a3976179d..1750ae785bf 100644 --- a/src/DataTypes/DataTypeEnum.cpp +++ b/src/DataTypes/DataTypeEnum.cpp @@ -90,7 +90,6 @@ template DataTypeEnum::DataTypeEnum(const Values & values_) : EnumValues(values_) , type_name(generateName(this->getValues())) - , my_sql_type_name(generateMySQLName(this->getValues())) { } diff --git a/src/DataTypes/DataTypeEnum.h b/src/DataTypes/DataTypeEnum.h index 2cdaa2db06c..d148f753c82 100644 --- a/src/DataTypes/DataTypeEnum.h +++ b/src/DataTypes/DataTypeEnum.h @@ -45,7 +45,6 @@ public: private: std::string type_name; - std::string my_sql_type_name; static std::string generateName(const Values & values); static std::string generateMySQLName(const Values & values); @@ -54,7 +53,7 @@ public: std::string doGetName() const override { return type_name; } const char * getFamilyName() const override; - const char * getSQLCompatibleName() const override { return my_sql_type_name.c_str(); } + String getSQLCompatibleName() const override { return generateMySQLName(this->getValues()); } TypeIndex getTypeId() const override { return type_id; } diff --git a/src/DataTypes/DataTypeFixedString.h b/src/DataTypes/DataTypeFixedString.h index 2900efd5a34..22ec793208d 100644 --- a/src/DataTypes/DataTypeFixedString.h +++ b/src/DataTypes/DataTypeFixedString.h @@ -42,7 +42,8 @@ public: TypeIndex getTypeId() const override { return type_id; } const char * getFamilyName() const override { return "FixedString"; } - const char * getSQLCompatibleName() const override { return "TEXT"; } + /// Use TEXT for compatibility with MySQL to allow arbitrary bytes. + String getSQLCompatibleName() const override { return "TEXT"; } size_t getN() const { diff --git a/src/DataTypes/DataTypeFunction.h b/src/DataTypes/DataTypeFunction.h index df59f7738b2..b57c0587dde 100644 --- a/src/DataTypes/DataTypeFunction.h +++ b/src/DataTypes/DataTypeFunction.h @@ -24,7 +24,7 @@ public: std::string doGetName() const override; const char * getFamilyName() const override { return "Function"; } - const char * getSQLCompatibleName() const override { return "TEXT"; } + String getSQLCompatibleName() const override { return "TEXT"; } TypeIndex getTypeId() const override { return TypeIndex::Function; } const DataTypes & getArgumentTypes() const diff --git a/src/DataTypes/DataTypeIPv4andIPv6.h b/src/DataTypes/DataTypeIPv4andIPv6.h index be0ebb90f3c..487ce04f67c 100644 --- a/src/DataTypes/DataTypeIPv4andIPv6.h +++ b/src/DataTypes/DataTypeIPv4andIPv6.h @@ -19,7 +19,7 @@ public: static constexpr auto type_id = TypeToTypeIndex; const char * getFamilyName() const override { return TypeName.data(); } - const char * getSQLCompatibleName() const override { return "TEXT"; } + String getSQLCompatibleName() const override { return "TEXT"; } TypeIndex getTypeId() const override { return type_id; } @@ -61,7 +61,7 @@ public: static constexpr auto type_id = TypeToTypeIndex; const char * getFamilyName() const override { return TypeName.data(); } - const char * getSQLCompatibleName() const override { return "TEXT"; } + String getSQLCompatibleName() const override { return "TEXT"; } TypeIndex getTypeId() const override { return type_id; } diff --git a/src/DataTypes/DataTypeInterval.h b/src/DataTypes/DataTypeInterval.h index ee2157431dd..7de56c13b56 100644 --- a/src/DataTypes/DataTypeInterval.h +++ b/src/DataTypes/DataTypeInterval.h @@ -26,7 +26,7 @@ public: std::string doGetName() const override { return fmt::format("Interval{}", kind.toString()); } const char * getFamilyName() const override { return "Interval"; } - const char * getSQLCompatibleName() const override { return "TEXT"; } + String getSQLCompatibleName() const override { return "TEXT"; } TypeIndex getTypeId() const override { return TypeIndex::Interval; } bool equals(const IDataType & rhs) const override; diff --git a/src/DataTypes/DataTypeLowCardinality.cpp b/src/DataTypes/DataTypeLowCardinality.cpp index e59613e6974..8293455cabc 100644 --- a/src/DataTypes/DataTypeLowCardinality.cpp +++ b/src/DataTypes/DataTypeLowCardinality.cpp @@ -28,8 +28,7 @@ namespace ErrorCodes } DataTypeLowCardinality::DataTypeLowCardinality(DataTypePtr dictionary_type_) - : dictionary_type(std::move(dictionary_type_)), - mysql_name(dictionary_type->getSQLCompatibleName()) + : dictionary_type(std::move(dictionary_type_)) { auto inner_type = dictionary_type; if (dictionary_type->isNullable()) diff --git a/src/DataTypes/DataTypeLowCardinality.h b/src/DataTypes/DataTypeLowCardinality.h index 4dee8565568..f6d8d07a312 100644 --- a/src/DataTypes/DataTypeLowCardinality.h +++ b/src/DataTypes/DataTypeLowCardinality.h @@ -11,7 +11,6 @@ class DataTypeLowCardinality : public IDataType { private: DataTypePtr dictionary_type; - std::string mysql_name; public: @@ -24,7 +23,7 @@ public: return "LowCardinality(" + dictionary_type->getName() + ")"; } const char * getFamilyName() const override { return "LowCardinality"; } - const char * getSQLCompatibleName() const override { return mysql_name.c_str(); } + String getSQLCompatibleName() const override { return dictionary_type->getSQLCompatibleName(); } TypeIndex getTypeId() const override { return TypeIndex::LowCardinality; } diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h index 299119f1759..294c5d7ac77 100644 --- a/src/DataTypes/DataTypeMap.h +++ b/src/DataTypes/DataTypeMap.h @@ -30,7 +30,7 @@ public: TypeIndex getTypeId() const override { return TypeIndex::Map; } std::string doGetName() const override; const char * getFamilyName() const override { return "Map"; } - const char * getSQLCompatibleName() const override { return "JSON"; } + String getSQLCompatibleName() const override { return "JSON"; } bool canBeInsideNullable() const override { return false; } diff --git a/src/DataTypes/DataTypeNothing.h b/src/DataTypes/DataTypeNothing.h index b35ced5dcb3..c3a7e2d09f0 100644 --- a/src/DataTypes/DataTypeNothing.h +++ b/src/DataTypes/DataTypeNothing.h @@ -16,7 +16,7 @@ public: static constexpr bool is_parametric = false; const char * getFamilyName() const override { return "Nothing"; } - const char * getSQLCompatibleName() const override { return "TEXT"; } + String getSQLCompatibleName() const override { return "TEXT"; } TypeIndex getTypeId() const override { return TypeIndex::Nothing; } diff --git a/src/DataTypes/DataTypeNullable.h b/src/DataTypes/DataTypeNullable.h index b5fe1bb2dd9..e3165414c07 100644 --- a/src/DataTypes/DataTypeNullable.h +++ b/src/DataTypes/DataTypeNullable.h @@ -16,7 +16,7 @@ public: explicit DataTypeNullable(const DataTypePtr & nested_data_type_); std::string doGetName() const override { return "Nullable(" + nested_data_type->getName() + ")"; } const char * getFamilyName() const override { return "Nullable"; } - const char * getSQLCompatibleName() const override { return nested_data_type->getSQLCompatibleName(); } + String getSQLCompatibleName() const override { return nested_data_type->getSQLCompatibleName(); } TypeIndex getTypeId() const override { return TypeIndex::Nullable; } MutableColumnPtr createColumn() const override; diff --git a/src/DataTypes/DataTypeNumberBase.cpp b/src/DataTypes/DataTypeNumberBase.cpp index db654448e83..e4c0fb96483 100644 --- a/src/DataTypes/DataTypeNumberBase.cpp +++ b/src/DataTypes/DataTypeNumberBase.cpp @@ -11,6 +11,55 @@ Field DataTypeNumberBase::getDefault() const { return NearestFieldType(); } +template +String DataTypeNumberBase::getSQLCompatibleName() const +{ + if constexpr (std::is_same_v) + { + return "TINYINT"; + } + else if constexpr (std::is_same_v) + { + return "SMALLINT"; + } + else if constexpr (std::is_same_v) + { + return "INTEGER"; + } + else if constexpr (std::is_same_v) + { + return "BIGINT"; + } + else if constexpr (std::is_same_v) + { + return "TINYINT UNSIGNED"; + } + else if constexpr (std::is_same_v) + { + return "SMALLINT UNSIGNED"; + } + else if constexpr (std::is_same_v) + { + return "INTEGER UNSIGNED"; + } + else if constexpr (std::is_same_v) + { + return "BIGINT UNSIGNED"; + } + else if constexpr (std::is_same_v) + { + return "FLOAT"; + } + else if constexpr (std::is_same_v) + { + return "DOUBLE"; + } + /// Unsupported types are converted to TEXT + else + { + return "TEXT"; + } +} template MutableColumnPtr DataTypeNumberBase::createColumn() const @@ -30,24 +79,6 @@ bool DataTypeNumberBase::isValueRepresentedByUnsignedInteger() const return is_integer && is_unsigned_v; } -template -const std::map DataTypeNumberBase::mysqlTypeMap = { - {"UInt8", "TINYINT UNSIGNED"}, - {"UInt16", "SMALLINT UNSIGNED"}, - {"UInt32", "MEDIUMINT UNSIGNEd"}, - {"UInt64", "BIGINT UNSIGNED"}, - {"UInt128", "TEXT"}, - {"UInt256", "TEXT"}, - {"Int8", "TINYINT"}, - {"Int16", "SMALLINT"}, - {"Int32", "INT"}, - {"Int64", "BIGINT"}, - {"Int128", "TEXT"}, - {"Int256", "TEXT"}, - {"Float32", "FLOAT"}, - {"Float64", "DOUBLE"}, -}; - /// Explicit template instantiations - to avoid code bloat in headers. template class DataTypeNumberBase; template class DataTypeNumberBase; diff --git a/src/DataTypes/DataTypeNumberBase.h b/src/DataTypes/DataTypeNumberBase.h index 1a855a974f0..d902c62505e 100644 --- a/src/DataTypes/DataTypeNumberBase.h +++ b/src/DataTypes/DataTypeNumberBase.h @@ -20,14 +20,12 @@ public: static constexpr bool is_parametric = false; static constexpr auto family_name = TypeName; static constexpr auto type_id = TypeToTypeIndex; - // Create a map from the name of the type to the name of the type in MySQL. - static const std::map mysqlTypeMap; using FieldType = T; using ColumnType = ColumnVector; const char * getFamilyName() const override { return TypeName.data(); } - const char * getSQLCompatibleName() const override { return mysqlTypeMap.at(TypeName.data()).c_str(); } + String getSQLCompatibleName() const override; TypeIndex getTypeId() const override { return TypeToTypeIndex; } Field getDefault() const override; diff --git a/src/DataTypes/DataTypeObject.h b/src/DataTypes/DataTypeObject.h index 618c7389758..2e1e5398f7e 100644 --- a/src/DataTypes/DataTypeObject.h +++ b/src/DataTypes/DataTypeObject.h @@ -23,7 +23,7 @@ public: DataTypeObject(const String & schema_format_, bool is_nullable_); const char * getFamilyName() const override { return "Object"; } - const char * getSQLCompatibleName() const override { return "JSON"; } + String getSQLCompatibleName() const override { return "JSON"; } String doGetName() const override; TypeIndex getTypeId() const override { return TypeIndex::Object; } diff --git a/src/DataTypes/DataTypeSet.h b/src/DataTypes/DataTypeSet.h index 916b4f071a5..d88d76b31be 100644 --- a/src/DataTypes/DataTypeSet.h +++ b/src/DataTypes/DataTypeSet.h @@ -15,7 +15,7 @@ class DataTypeSet final : public IDataTypeDummy public: static constexpr bool is_parametric = true; const char * getFamilyName() const override { return "Set"; } - const char * getSQLCompatibleName() const override { return "TEXT"; } + String getSQLCompatibleName() const override { return "TEXT"; } TypeIndex getTypeId() const override { return TypeIndex::Set; } bool equals(const IDataType & rhs) const override { return typeid(rhs) == typeid(*this); } diff --git a/src/DataTypes/DataTypeString.h b/src/DataTypes/DataTypeString.h index 338b3846266..c39fa90f6e7 100644 --- a/src/DataTypes/DataTypeString.h +++ b/src/DataTypes/DataTypeString.h @@ -21,8 +21,7 @@ public: return "String"; } - // FIXME: string can contain arbitrary bytes, not only UTF-8 sequences - const char * getSQLCompatibleName() const override { return "BLOB"; } + String getSQLCompatibleName() const override { return "BLOB"; } TypeIndex getTypeId() const override { return type_id; } diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h index 93fa87b1332..ea05e6ae59b 100644 --- a/src/DataTypes/DataTypeTuple.h +++ b/src/DataTypes/DataTypeTuple.h @@ -33,7 +33,7 @@ public: TypeIndex getTypeId() const override { return TypeIndex::Tuple; } std::string doGetName() const override; const char * getFamilyName() const override { return "Tuple"; } - const char * getSQLCompatibleName() const override { return "JSON"; } + String getSQLCompatibleName() const override { return "JSON"; } bool canBeInsideNullable() const override { return false; } bool supportsSparseSerialization() const override { return true; } diff --git a/src/DataTypes/DataTypeUUID.h b/src/DataTypes/DataTypeUUID.h index bbf35074df3..8664c3bcfd1 100644 --- a/src/DataTypes/DataTypeUUID.h +++ b/src/DataTypes/DataTypeUUID.h @@ -18,7 +18,7 @@ public: static constexpr auto type_id = TypeIndex::UUID; const char * getFamilyName() const override { return "UUID"; } - const char * getSQLCompatibleName() const override { return "CHAR"; } + String getSQLCompatibleName() const override { return "CHAR"; } TypeIndex getTypeId() const override { return type_id; } diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp index 1c2a63371ee..fa044d4ac9c 100644 --- a/src/DataTypes/DataTypesDecimal.cpp +++ b/src/DataTypes/DataTypesDecimal.cpp @@ -28,6 +28,11 @@ std::string DataTypeDecimal::doGetName() const return fmt::format("Decimal({}, {})", this->precision, this->scale); } +template +std::string DataTypeDecimal::getSQLCompatibleName() const +{ + return fmt::format("DECIMAL({}, {})", this->precision, this->scale); +} template bool DataTypeDecimal::equals(const IDataType & rhs) const diff --git a/src/DataTypes/DataTypesDecimal.h b/src/DataTypes/DataTypesDecimal.h index 6f3bf582aeb..5e4cfab7928 100644 --- a/src/DataTypes/DataTypesDecimal.h +++ b/src/DataTypes/DataTypesDecimal.h @@ -37,10 +37,9 @@ public: using Base::Base; static constexpr auto family_name = "Decimal"; - static constexpr auto mysql_name = "DECIMAL"; const char * getFamilyName() const override { return family_name; } - const char * getSQLCompatibleName() const override { return mysql_name; } + String getSQLCompatibleName() const override; std::string doGetName() const override; TypeIndex getTypeId() const override { return TypeToTypeIndex; } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 93fdbab05ef..51a9ecef0cc 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -71,19 +71,12 @@ public: return doGetName(); } - /// MySQL equivalent Name of data type (examples: UInt64, Array(String)). - String getMySQLTypeName() const - { - if (custom_name) - return custom_name->getName(); - else - return doGetMySQLName(); - } DataTypePtr getPtr() const { return shared_from_this(); } /// Name of data type family (example: FixedString, Array). virtual const char * getFamilyName() const = 0; - virtual const char * getSQLCompatibleName() const = 0; + /// Name of corresponding data type in MySQL (exampe: Bigint, Blob, etc) + virtual String getSQLCompatibleName() const = 0; /// Data type id. It's used for runtime type checks. virtual TypeIndex getTypeId() const = 0; @@ -135,7 +128,6 @@ public: protected: virtual String doGetName() const { return getFamilyName(); } - virtual String doGetMySQLName() const { return getSQLCompatibleName(); } virtual SerializationPtr doGetDefaultSerialization() const = 0; public: diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index f391a392dbb..684c35709a4 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -74,7 +74,8 @@ public: : ISource(header_) , columns_mask(std::move(columns_mask_)), max_block_size(max_block_size_) , databases(std::move(databases_)), tables(std::move(tables_)), storages(std::move(storages_)) - , clientInfo(context->getClientInfo()) + , client_info_interface(context->getClientInfo().interface) + , use_mysql_types(context->getSettingsRef().output_format_mysql_types) , total_tables(tables->size()), access(context->getAccess()) , query_id(context->getCurrentQueryId()), lock_acquire_timeout(context->getSettingsRef().lock_acquire_timeout) { @@ -132,9 +133,10 @@ protected: auto get_type_name = [this](const IDataType& type) -> std::string { - if (clientInfo.interface == DB::ClientInfo::Interface::MYSQL) + // Check if the output_format_mysql_types setting is enabled and client is connected via MySQL protocol + if (use_mysql_types && client_info_interface == DB::ClientInfo::Interface::MYSQL) { - return type.getMySQLTypeName(); + return type.getSQLCompatibleName(); } else { @@ -293,7 +295,8 @@ private: ColumnPtr databases; ColumnPtr tables; Storages storages; - ClientInfo clientInfo; + ClientInfo::Interface client_info_interface; + bool use_mysql_types; size_t db_table_num = 0; size_t total_tables; std::shared_ptr access; diff --git a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.reference b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.reference index 1742cd9c90c..68e7be9ae6f 100644 --- a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.reference +++ b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.reference @@ -4,6 +4,44 @@ Create pseudo-random database name Create tab duplicate table Run MySQL test field type null key default extra +aggregate_function AggregateFunction(sum, Int32) 0 NULL +array_value Array(Int32) 0 NULL +boolean_value UInt8 0 NULL +date32_value Date32 0 NULL +date_value Date 0 NULL +datetime64_value DateTime64(3) 0 NULL +datetime_value DateTime 0 NULL +decimal_value Decimal(10, 2) 0 NULL +enum_value Enum8('apple' = 1, 'banana' = 2, 'orange' = 3) 0 NULL +fixed_string_value FixedString(10) 0 NULL +float32 Float32 0 NULL +float64 Float64 0 NULL +int128 Int128 0 NULL +int16 Int16 0 NULL +int256 Int256 0 NULL +int32 Int32 0 NULL +int64 Int64 0 NULL +int8 Int8 0 NULL +ipv4_value IPv4 0 NULL +ipv6_value IPv6 0 NULL +json_value Object('json') 0 NULL +low_cardinality LowCardinality(String) 0 NULL +low_cardinality_date LowCardinality(DateTime) 0 NULL +map_value Map(String, Int32) 0 NULL +nested.nested_int Array(Int32) 0 NULL +nested.nested_string Array(String) 0 NULL +nint32 Nullable(Int32) 1 NULL +nullable_value Nullable(Int32) 1 NULL +string_value String 0 NULL +tuple_value Tuple(Int32, String) 0 NULL +uint128 UInt128 0 NULL +uint16 UInt16 0 NULL +uint256 UInt256 0 NULL +uint32 UInt32 0 NULL +uint64 UInt64 0 PRI SOR NULL +uint8 UInt8 0 NULL +uuid_value UUID 0 NULL +field type null key default extra aggregate_function TEXT 0 NULL array_value TEXT 0 NULL boolean_value TINYINT UNSIGNED 0 NULL @@ -11,12 +49,17 @@ date32_value DATE 0 NULL date_value DATE 0 NULL datetime64_value DATETIME 0 NULL datetime_value DATETIME 0 NULL -decimal_value DECIMAL 0 NULL +decimal_value DECIMAL(10, 2) 0 NULL enum_value ENUM('apple', 'banana', 'orange') 0 NULL fixed_string_value TEXT 0 NULL float32 FLOAT 0 NULL float64 DOUBLE 0 NULL -int32 INT 0 NULL +int128 TEXT 0 NULL +int16 SMALLINT 0 NULL +int256 TEXT 0 NULL +int32 INTEGER 0 NULL +int64 BIGINT 0 NULL +int8 TINYINT 0 NULL ipv4_value TEXT 0 NULL ipv6_value TEXT 0 NULL json_value JSON 0 NULL @@ -25,10 +68,16 @@ low_cardinality_date DATETIME 0 NULL map_value JSON 0 NULL nested.nested_int TEXT 0 NULL nested.nested_string TEXT 0 NULL -nullable_value INT 0 NULL +nint32 INTEGER 0 NULL +nullable_value INTEGER 0 NULL string_value BLOB 0 NULL tuple_value JSON 0 NULL +uint128 TEXT 0 NULL +uint16 SMALLINT UNSIGNED 0 NULL +uint256 TEXT 0 NULL +uint32 INTEGER UNSIGNED 0 NULL uint64 BIGINT UNSIGNED 0 PRI SOR NULL +uint8 TINYINT UNSIGNED 0 NULL uuid_value CHAR 0 NULL field type null key default extra aggregate_function TEXT 0 NULL @@ -38,12 +87,17 @@ date32_value DATE 0 NULL date_value DATE 0 NULL datetime64_value DATETIME 0 NULL datetime_value DATETIME 0 NULL -decimal_value DECIMAL 0 NULL +decimal_value DECIMAL(10, 2) 0 NULL enum_value ENUM('apple', 'banana', 'orange') 0 NULL fixed_string_value TEXT 0 NULL float32 FLOAT 0 NULL float64 DOUBLE 0 NULL -int32 INT 0 NULL +int128 TEXT 0 NULL +int16 SMALLINT 0 NULL +int256 TEXT 0 NULL +int32 INTEGER 0 NULL +int64 BIGINT 0 NULL +int8 TINYINT 0 NULL ipv4_value TEXT 0 NULL ipv6_value TEXT 0 NULL json_value JSON 0 NULL @@ -52,10 +106,16 @@ low_cardinality_date DATETIME 0 NULL map_value JSON 0 NULL nested.nested_int TEXT 0 NULL nested.nested_string TEXT 0 NULL -nullable_value INT 0 NULL +nint32 INTEGER 0 NULL +nullable_value INTEGER 0 NULL string_value BLOB 0 NULL tuple_value JSON 0 NULL +uint128 TEXT 0 NULL +uint16 SMALLINT UNSIGNED 0 NULL +uint256 TEXT 0 NULL +uint32 INTEGER UNSIGNED 0 NULL uint64 BIGINT UNSIGNED 0 PRI SOR NULL +uint8 TINYINT UNSIGNED 0 NULL uuid_value CHAR 0 NULL field type null key default extra collation comment privileges aggregate_function TEXT 0 NULL NULL @@ -65,12 +125,17 @@ date32_value DATE 0 NULL NULL date_value DATE 0 NULL NULL datetime64_value DATETIME 0 NULL NULL datetime_value DATETIME 0 NULL NULL -decimal_value DECIMAL 0 NULL NULL +decimal_value DECIMAL(10, 2) 0 NULL NULL enum_value ENUM('apple', 'banana', 'orange') 0 NULL NULL fixed_string_value TEXT 0 NULL NULL float32 FLOAT 0 NULL NULL float64 DOUBLE 0 NULL NULL -int32 INT 0 NULL NULL +int128 TEXT 0 NULL NULL +int16 SMALLINT 0 NULL NULL +int256 TEXT 0 NULL NULL +int32 INTEGER 0 NULL NULL +int64 BIGINT 0 NULL NULL +int8 TINYINT 0 NULL NULL ipv4_value TEXT 0 NULL NULL ipv6_value TEXT 0 NULL NULL json_value JSON 0 NULL NULL @@ -79,15 +144,32 @@ low_cardinality_date DATETIME 0 NULL NULL map_value JSON 0 NULL NULL nested.nested_int TEXT 0 NULL NULL nested.nested_string TEXT 0 NULL NULL -nullable_value INT 0 NULL NULL +nint32 INTEGER 0 NULL NULL +nullable_value INTEGER 0 NULL NULL string_value BLOB 0 NULL NULL tuple_value JSON 0 NULL NULL +uint128 TEXT 0 NULL NULL +uint16 SMALLINT UNSIGNED 0 NULL NULL +uint256 TEXT 0 NULL NULL +uint32 INTEGER UNSIGNED 0 NULL NULL uint64 BIGINT UNSIGNED 0 PRI SOR NULL NULL +uint8 TINYINT UNSIGNED 0 NULL NULL uuid_value CHAR 0 NULL NULL field type null key default extra -int32 INT 0 NULL +int128 TEXT 0 NULL +int16 SMALLINT 0 NULL +int256 TEXT 0 NULL +int32 INTEGER 0 NULL +int64 BIGINT 0 NULL +int8 TINYINT 0 NULL nested.nested_int TEXT 0 NULL +nint32 INTEGER 0 NULL +uint128 TEXT 0 NULL +uint16 SMALLINT UNSIGNED 0 NULL +uint256 TEXT 0 NULL +uint32 INTEGER UNSIGNED 0 NULL uint64 BIGINT UNSIGNED 0 PRI SOR NULL +uint8 TINYINT UNSIGNED 0 NULL field type null key default extra aggregate_function TEXT 0 NULL array_value TEXT 0 NULL @@ -96,7 +178,7 @@ date32_value DATE 0 NULL date_value DATE 0 NULL datetime64_value DATETIME 0 NULL datetime_value DATETIME 0 NULL -decimal_value DECIMAL 0 NULL +decimal_value DECIMAL(10, 2) 0 NULL enum_value ENUM('apple', 'banana', 'orange') 0 NULL fixed_string_value TEXT 0 NULL float32 FLOAT 0 NULL @@ -108,14 +190,25 @@ low_cardinality BLOB 0 NULL low_cardinality_date DATETIME 0 NULL map_value JSON 0 NULL nested.nested_string TEXT 0 NULL -nullable_value INT 0 NULL +nullable_value INTEGER 0 NULL string_value BLOB 0 NULL tuple_value JSON 0 NULL uuid_value CHAR 0 NULL field type null key default extra -int32 INT 0 NULL +int128 TEXT 0 NULL +int16 SMALLINT 0 NULL +int256 TEXT 0 NULL +int32 INTEGER 0 NULL +int64 BIGINT 0 NULL +int8 TINYINT 0 NULL nested.nested_int TEXT 0 NULL +nint32 INTEGER 0 NULL +uint128 TEXT 0 NULL +uint16 SMALLINT UNSIGNED 0 NULL +uint256 TEXT 0 NULL +uint32 INTEGER UNSIGNED 0 NULL uint64 BIGINT UNSIGNED 0 PRI SOR NULL +uint8 TINYINT UNSIGNED 0 NULL field type null key default extra aggregate_function TEXT 0 NULL array_value TEXT 0 NULL @@ -124,7 +217,7 @@ date32_value DATE 0 NULL date_value DATE 0 NULL datetime64_value DATETIME 0 NULL datetime_value DATETIME 0 NULL -decimal_value DECIMAL 0 NULL +decimal_value DECIMAL(10, 2) 0 NULL enum_value ENUM('apple', 'banana', 'orange') 0 NULL fixed_string_value TEXT 0 NULL float32 FLOAT 0 NULL @@ -136,14 +229,25 @@ low_cardinality BLOB 0 NULL low_cardinality_date DATETIME 0 NULL map_value JSON 0 NULL nested.nested_string TEXT 0 NULL -nullable_value INT 0 NULL +nullable_value INTEGER 0 NULL string_value BLOB 0 NULL tuple_value JSON 0 NULL uuid_value CHAR 0 NULL field type null key default extra -int32 INT 0 NULL +int128 TEXT 0 NULL +int16 SMALLINT 0 NULL +int256 TEXT 0 NULL +int32 INTEGER 0 NULL +int64 BIGINT 0 NULL +int8 TINYINT 0 NULL nested.nested_int TEXT 0 NULL +nint32 INTEGER 0 NULL +uint128 TEXT 0 NULL +uint16 SMALLINT UNSIGNED 0 NULL +uint256 TEXT 0 NULL +uint32 INTEGER UNSIGNED 0 NULL uint64 BIGINT UNSIGNED 0 PRI SOR NULL +uint8 TINYINT UNSIGNED 0 NULL field type null key default extra aggregate_function TEXT 0 NULL field type null key default extra @@ -154,12 +258,17 @@ date32_value DATE 0 NULL date_value DATE 0 NULL datetime64_value DATETIME 0 NULL datetime_value DATETIME 0 NULL -decimal_value DECIMAL 0 NULL +decimal_value DECIMAL(10, 2) 0 NULL enum_value ENUM('apple', 'banana', 'orange') 0 NULL fixed_string_value TEXT 0 NULL float32 FLOAT 0 NULL float64 DOUBLE 0 NULL -int32 INT 0 NULL +int128 TEXT 0 NULL +int16 SMALLINT 0 NULL +int256 TEXT 0 NULL +int32 INTEGER 0 NULL +int64 BIGINT 0 NULL +int8 TINYINT 0 NULL ipv4_value TEXT 0 NULL ipv6_value TEXT 0 NULL json_value JSON 0 NULL @@ -168,10 +277,16 @@ low_cardinality_date DATETIME 0 NULL map_value JSON 0 NULL nested.nested_int TEXT 0 NULL nested.nested_string TEXT 0 NULL -nullable_value INT 0 NULL +nint32 INTEGER 0 NULL +nullable_value INTEGER 0 NULL string_value BLOB 0 NULL tuple_value JSON 0 NULL +uint128 TEXT 0 NULL +uint16 SMALLINT UNSIGNED 0 NULL +uint256 TEXT 0 NULL +uint32 INTEGER UNSIGNED 0 NULL uint64 BIGINT UNSIGNED 0 PRI SOR NULL +uint8 TINYINT UNSIGNED 0 NULL uuid_value CHAR 0 NULL field type null key default extra aggregate_function TEXT 0 NULL @@ -181,12 +296,17 @@ date32_value DATE 0 NULL date_value DATE 0 NULL datetime64_value DATETIME 0 NULL datetime_value DATETIME 0 NULL -decimal_value DECIMAL 0 NULL +decimal_value DECIMAL(10, 2) 0 NULL enum_value ENUM('apple', 'banana', 'orange') 0 NULL fixed_string_value TEXT 0 NULL float32 FLOAT 0 NULL float64 DOUBLE 0 NULL -int32 INT 0 NULL +int128 TEXT 0 NULL +int16 SMALLINT 0 NULL +int256 TEXT 0 NULL +int32 INTEGER 0 NULL +int64 BIGINT 0 NULL +int8 TINYINT 0 NULL ipv4_value TEXT 0 NULL ipv6_value TEXT 0 NULL json_value JSON 0 NULL @@ -195,10 +315,16 @@ low_cardinality_date DATETIME 0 NULL map_value JSON 0 NULL nested.nested_int TEXT 0 NULL nested.nested_string TEXT 0 NULL -nullable_value INT 0 NULL +nint32 INTEGER 0 NULL +nullable_value INTEGER 0 NULL string_value BLOB 0 NULL tuple_value JSON 0 NULL +uint128 TEXT 0 NULL +uint16 SMALLINT UNSIGNED 0 NULL +uint256 TEXT 0 NULL +uint32 INTEGER UNSIGNED 0 NULL uint64 BIGINT UNSIGNED 0 PRI SOR NULL +uint8 TINYINT UNSIGNED 0 NULL uuid_value CHAR 0 NULL field type null key default extra aggregate_function TEXT 0 NULL @@ -208,12 +334,17 @@ date32_value DATE 0 NULL date_value DATE 0 NULL datetime64_value DATETIME 0 NULL datetime_value DATETIME 0 NULL -decimal_value DECIMAL 0 NULL +decimal_value DECIMAL(10, 2) 0 NULL enum_value ENUM('apple', 'banana', 'orange') 0 NULL fixed_string_value TEXT 0 NULL float32 FLOAT 0 NULL float64 DOUBLE 0 NULL -int32 INT 0 NULL +int128 TEXT 0 NULL +int16 SMALLINT 0 NULL +int256 TEXT 0 NULL +int32 INTEGER 0 NULL +int64 BIGINT 0 NULL +int8 TINYINT 0 NULL ipv4_value TEXT 0 NULL ipv6_value TEXT 0 NULL json_value JSON 0 NULL @@ -222,8 +353,14 @@ low_cardinality_date DATETIME 0 NULL map_value JSON 0 NULL nested.nested_int TEXT 0 NULL nested.nested_string TEXT 0 NULL -nullable_value INT 0 NULL +nint32 INTEGER 0 NULL +nullable_value INTEGER 0 NULL string_value BLOB 0 NULL tuple_value JSON 0 NULL +uint128 TEXT 0 NULL +uint16 SMALLINT UNSIGNED 0 NULL +uint256 TEXT 0 NULL +uint32 INTEGER UNSIGNED 0 NULL uint64 BIGINT UNSIGNED 0 PRI SOR NULL +uint8 TINYINT UNSIGNED 0 NULL uuid_value CHAR 0 NULL diff --git a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh index fd1ad92f060..938102cb5fc 100755 --- a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh +++ b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh @@ -17,15 +17,25 @@ ${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS tab" ${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS database_123456789abcde" ${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS database_123456789abcde.tab" -#${CLICKHOUSE_LOCAL} --query "SET allow_suspicious_low_cardinality_types = 1;" echo "Create tab table " ${CLICKHOUSE_LOCAL} -n -q " SET allow_suspicious_low_cardinality_types=1; - SET allow_experimental_object_type =1; + SET allow_experimental_object_type=1; CREATE TABLE tab ( + uint8 UInt8, + uint16 UInt16, + uint32 UInt32, uint64 UInt64, - int32 Nullable(Int32), + uint128 UInt128, + uint256 UInt256, + int8 Int8, + int16 Int16, + int32 Int32, + int64 Int64, + int128 Int128, + int256 Int256, + nint32 Nullable(Int32), float32 Float32, float64 Float64, decimal_value Decimal(10, 2), @@ -67,8 +77,19 @@ ${CLICKHOUSE_LOCAL} -n -q " SET allow_experimental_object_type =1; CREATE TABLE database_123456789abcde.tab ( + uint8 UInt8, + uint16 UInt16, + uint32 UInt32, uint64 UInt64, - int32 Nullable(Int32), + uint128 UInt128, + uint256 UInt256, + int8 Int8, + int16 Int16, + int32 Int32, + int64 Int64, + int128 Int128, + int256 Int256, + nint32 Nullable(Int32), float32 Float32, float64 Float64, decimal_value Decimal(10, 2), @@ -105,6 +126,8 @@ TEMP_FILE=$(mktemp) cat < $TEMP_FILE SHOW COLUMNS FROM tab; +SET output_format_mysql_types=1; +SHOW COLUMNS FROM tab; SHOW EXTENDED COLUMNS FROM tab; SHOW FULL COLUMNS FROM tab; SHOW COLUMNS FROM tab LIKE '%int%'; From 760483d8d3a012b4b6456b9bdf14afd2d052a514 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 6 Jun 2023 04:18:51 +0000 Subject: [PATCH 0235/1997] multiple fixes --- base/base/IPv4andIPv6.h | 4 ++- .../AggregateFunctionMap.cpp | 1 + src/AggregateFunctions/AggregateFunctionMap.h | 26 +++++++++++++++++++ .../AggregateFunctionUniq.cpp | 2 +- .../AggregateFunctionUniq.h | 12 +++++++++ src/IO/ReadHelpers.h | 14 ++++++++++ src/IO/WriteHelpers.h | 8 ++++++ 7 files changed, 65 insertions(+), 2 deletions(-) diff --git a/base/base/IPv4andIPv6.h b/base/base/IPv4andIPv6.h index 4aee2329572..e2f93b54124 100644 --- a/base/base/IPv4andIPv6.h +++ b/base/base/IPv4andIPv6.h @@ -2,6 +2,7 @@ #include #include +#include #include namespace DB @@ -55,12 +56,13 @@ namespace DB namespace std { + /// For historical reasons we hash IPv6 as a FixedString(16) template <> struct hash { size_t operator()(const DB::IPv6 & x) const { - return std::hash()(x.toUnderType()); + return std::hash{}(std::string_view(reinterpret_cast(&x.toUnderType()), IPV6_BINARY_LENGTH)); } }; diff --git a/src/AggregateFunctions/AggregateFunctionMap.cpp b/src/AggregateFunctions/AggregateFunctionMap.cpp index 38e4f49d9a2..b957b541fe1 100644 --- a/src/AggregateFunctions/AggregateFunctionMap.cpp +++ b/src/AggregateFunctions/AggregateFunctionMap.cpp @@ -103,6 +103,7 @@ public: case TypeIndex::IPv4: return std::make_shared>(nested_function, arguments); case TypeIndex::IPv6: + return std::make_shared>(nested_function, arguments); case TypeIndex::FixedString: case TypeIndex::String: return std::make_shared>(nested_function, arguments); diff --git a/src/AggregateFunctions/AggregateFunctionMap.h b/src/AggregateFunctions/AggregateFunctionMap.h index 4a4ae92735b..7b9bb088d8f 100644 --- a/src/AggregateFunctions/AggregateFunctionMap.h +++ b/src/AggregateFunctions/AggregateFunctionMap.h @@ -21,6 +21,7 @@ #include "DataTypes/Serializations/ISerialization.h" #include #include "base/types.h" +#include #include #include "AggregateFunctions/AggregateFunctionFactory.h" @@ -70,6 +71,31 @@ struct AggregateFunctionMapCombinatorData } }; +/// Specialization for IPv6 - for historical reasons it should be stored as FixedString(16) +template <> +struct AggregateFunctionMapCombinatorData +{ + struct IPv6Hash + { + using hash_type = std::hash; + using is_transparent = void; + + size_t operator()(const IPv6 & ip) const { return hash_type{}(ip); } + }; + + using SearchType = IPv6; + std::unordered_map> merged_maps; + + static void writeKey(const IPv6 & key, WriteBuffer & buf) + { + writeIPv6Binary(key, buf); + } + static void readKey(IPv6 & key, ReadBuffer & buf) + { + readIPv6Binary(key, buf); + } +}; + template class AggregateFunctionMap final : public IAggregateFunctionDataHelper, AggregateFunctionMap> diff --git a/src/AggregateFunctions/AggregateFunctionUniq.cpp b/src/AggregateFunctions/AggregateFunctionUniq.cpp index f5147daa97b..748a232641e 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.cpp +++ b/src/AggregateFunctions/AggregateFunctionUniq.cpp @@ -117,7 +117,7 @@ createAggregateFunctionUniq(const std::string & name, const DataTypes & argument else if (which.isIPv4()) return std::make_shared>>(argument_types); else if (which.isIPv6()) - return std::make_shared>>(argument_types); + return std::make_shared>>(argument_types); else if (which.isTuple()) { if (use_exact_hash_function) diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index 0524dd53ec0..03d999b47e2 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -101,6 +101,18 @@ struct AggregateFunctionUniqHLL12Data static String getName() { return "uniqHLL12"; } }; +template <> +struct AggregateFunctionUniqHLL12Data +{ + using Set = HyperLogLogWithSmallSetOptimization; + Set set; + + constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_variadic = false; + + static String getName() { return "uniqHLL12"; } +}; + template struct AggregateFunctionUniqHLL12DataForVariadic { diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 32338552b66..1aa294f76bf 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -61,6 +61,7 @@ namespace ErrorCodes extern const int INCORRECT_DATA; extern const int TOO_LARGE_STRING_SIZE; extern const int TOO_LARGE_ARRAY_SIZE; + extern const int SIZE_OF_FIXED_STRING_DOESNT_MATCH; } /// Helper functions for formatted input. @@ -136,6 +137,19 @@ inline void readStringBinary(std::string & s, ReadBuffer & buf, size_t max_strin buf.readStrict(s.data(), size); } +/// For historical reasons we store IPv6 as a String +inline void readIPv6Binary(IPv6 & ip, ReadBuffer & buf) +{ + size_t size = 0; + readVarUInt(size, buf); + + if (size != IPV6_BINARY_LENGTH) + throw Exception(ErrorCodes::SIZE_OF_FIXED_STRING_DOESNT_MATCH, + "Size of the string {} doesn't match size of binary IPv6 {}", size, IPV6_BINARY_LENGTH); + + buf.readStrict(reinterpret_cast(&ip.toUnderType()), size); +} + template void readVectorBinary(std::vector & v, ReadBuffer & buf) { diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index cdbc952690c..505a2f988f0 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -10,6 +10,7 @@ #include +#include "Common/formatIPv6.h" #include #include #include @@ -104,6 +105,13 @@ inline void writeStringBinary(const std::string & s, WriteBuffer & buf) buf.write(s.data(), s.size()); } +/// For historical reasons we store IPv6 as a String +inline void writeIPv6Binary(const IPv6 & ip, WriteBuffer & buf) +{ + writeVarUInt(IPV6_BINARY_LENGTH, buf); + buf.write(reinterpret_cast(&ip.toUnderType()), IPV6_BINARY_LENGTH); +} + inline void writeStringBinary(StringRef s, WriteBuffer & buf) { writeVarUInt(s.size, buf); From adfedb4df01bd0dcd2870df5f6b28b82017650a0 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 6 Jun 2023 14:46:34 +0200 Subject: [PATCH 0236/1997] Add USE NAMED COLLECTION access --- src/Access/Common/AccessRightsElement.cpp | 2 +- src/Access/Common/AccessType.h | 1 + .../ClickHouseDictionarySource.cpp | 2 +- src/Dictionaries/MySQLDictionarySource.cpp | 2 +- src/Storages/NamedCollectionsHelpers.cpp | 29 +++++++++++------ src/Storages/NamedCollectionsHelpers.h | 2 +- .../helpers/0_common_instance_users.xml | 6 +++- .../test_storage_s3/configs/access.xml | 19 +++++++++++ tests/integration/test_storage_s3/test.py | 32 +++++++++++++++---- 9 files changed, 74 insertions(+), 21 deletions(-) create mode 100644 tests/integration/test_storage_s3/configs/access.xml diff --git a/src/Access/Common/AccessRightsElement.cpp b/src/Access/Common/AccessRightsElement.cpp index e11d43634ec..835f414df37 100644 --- a/src/Access/Common/AccessRightsElement.cpp +++ b/src/Access/Common/AccessRightsElement.cpp @@ -155,7 +155,7 @@ namespace AccessRightsElement::AccessRightsElement(AccessFlags access_flags_, std::string_view database_) - : access_flags(access_flags_), database(database_), any_database(false) + : access_flags(access_flags_), database(database_), parameter(database_), any_database(false), any_parameter(false) { } diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 6394c0279a7..6625ccb652b 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -142,6 +142,7 @@ enum class AccessType M(ACCESS_MANAGEMENT, "", GROUP, ALL) \ M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \ M(SHOW_NAMED_COLLECTIONS_SECRETS, "SHOW NAMED COLLECTIONS SECRETS", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \ + M(USE_NAMED_COLLECTION, "USE NAMED COLLECTION", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \ M(NAMED_COLLECTION_CONTROL, "", NAMED_COLLECTION, ALL) \ \ M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN", GLOBAL, SYSTEM) \ diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index 65147ee664e..2dc7f6145b3 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -217,7 +217,7 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) std::optional configuration; std::string settings_config_prefix = config_prefix + ".clickhouse"; - auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix) : nullptr; + auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, global_context) : nullptr; if (named_collection) { diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index 730217f96b7..e61409e2b54 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -71,7 +71,7 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory) MySQLSettings mysql_settings; std::optional dictionary_configuration; - auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix) : nullptr; + auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, global_context) : nullptr; if (named_collection) { auto allowed_arguments{dictionary_allowed_keys}; diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp index 83128ab025a..efd5af29f48 100644 --- a/src/Storages/NamedCollectionsHelpers.cpp +++ b/src/Storages/NamedCollectionsHelpers.cpp @@ -1,4 +1,5 @@ #include "NamedCollectionsHelpers.h" +#include #include #include #include @@ -15,19 +16,16 @@ namespace ErrorCodes namespace { - NamedCollectionPtr tryGetNamedCollectionFromASTs(ASTs asts, bool throw_unknown_collection) + std::optional getCollectionName(ASTs asts) { if (asts.empty()) - return nullptr; + return std::nullopt; const auto * identifier = asts[0]->as(); if (!identifier) - return nullptr; + return std::nullopt; - const auto & collection_name = identifier->name(); - if (throw_unknown_collection) - return NamedCollectionFactory::instance().get(collection_name); - return NamedCollectionFactory::instance().tryGet(collection_name); + return identifier->name(); } std::optional>> getKeyValueFromAST(ASTPtr ast, bool fallback_to_ast_value, ContextPtr context) @@ -74,10 +72,21 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( NamedCollectionUtils::loadIfNot(); - auto collection = tryGetNamedCollectionFromASTs(asts, throw_unknown_collection); + auto collection_name = getCollectionName(asts); + if (!collection_name.has_value()) + return nullptr; + + NamedCollectionPtr collection; + if (throw_unknown_collection) + collection = NamedCollectionFactory::instance().get(*collection_name); + else + collection = NamedCollectionFactory::instance().tryGet(*collection_name); + if (!collection) return nullptr; + context->checkAccess(AccessType::USE_NAMED_COLLECTION, *collection_name); + auto collection_copy = collection->duplicate(); if (asts.size() == 1) @@ -106,12 +115,14 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( } MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( - const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) + const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { auto collection_name = config.getString(config_prefix + ".name", ""); if (collection_name.empty()) return nullptr; + context->checkAccess(AccessType::USE_NAMED_COLLECTION, collection_name); + const auto & collection = NamedCollectionFactory::instance().get(collection_name); auto collection_copy = collection->duplicate(); diff --git a/src/Storages/NamedCollectionsHelpers.h b/src/Storages/NamedCollectionsHelpers.h index 1473a3fbe48..15ed7c9e19b 100644 --- a/src/Storages/NamedCollectionsHelpers.h +++ b/src/Storages/NamedCollectionsHelpers.h @@ -22,7 +22,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( ASTs asts, ContextPtr context, bool throw_unknown_collection = true, std::vector> * complex_args = nullptr); /// Helper function to get named collection for dictionary source. /// Dictionaries have collection name as name argument of dict configuration and other arguments are overrides. -MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); +MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); HTTPHeaderEntries getHeadersFromNamedCollection(const NamedCollection & collection); diff --git a/tests/integration/helpers/0_common_instance_users.xml b/tests/integration/helpers/0_common_instance_users.xml index 3399ef5915a..6aae12400fd 100644 --- a/tests/integration/helpers/0_common_instance_users.xml +++ b/tests/integration/helpers/0_common_instance_users.xml @@ -1,7 +1,11 @@ - 1 + + GRANT ACCESS MANAGEMENT ON *.* WITH GRANT OPTION + GRANT ALL ON *.* WITH GRANT OPTION + GRANT USE NAMED COLLECTION ON * WITH GRANT OPTION + diff --git a/tests/integration/test_storage_s3/configs/access.xml b/tests/integration/test_storage_s3/configs/access.xml new file mode 100644 index 00000000000..8bded9104f6 --- /dev/null +++ b/tests/integration/test_storage_s3/configs/access.xml @@ -0,0 +1,19 @@ + + + + + default + default + + GRANT admin_role + + + + + + + GRANT USE NAMED COLLECTION ON * WITH GRANT OPTION + + + + diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index f983bd618e3..01dd4fd7856 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -55,6 +55,7 @@ def started_cluster(): "configs/named_collections.xml", "configs/schema_cache.xml", ], + user_configs=["configs/access.xml"], ) cluster.add_instance( "s3_max_redirects", @@ -921,22 +922,39 @@ def test_predefined_connection_configuration(started_cluster): instance = started_cluster.instances["dummy"] # type: ClickHouseInstance name = "test_table" - instance.query("drop table if exists {}".format(name)) + instance.query("CREATE USER user") + instance.query("GRANT CREATE ON *.* TO user") + instance.query("GRANT SOURCES ON *.* TO user") + instance.query("GRANT SELECT ON *.* TO user") + + instance.query(f"drop table if exists {name}", user="user") + error = instance.query_and_get_error( + f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')" + ) + assert "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" in error + error = instance.query_and_get_error( + f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", user="user" + ) + assert "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" in error + + instance.query("GRANT USE NAMED COLLECTION ON s3_conf1 TO user", user="admin") instance.query( - "CREATE TABLE {} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')".format(name) + f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", user="user" ) - instance.query("INSERT INTO {} SELECT number FROM numbers(10)".format(name)) - result = instance.query("SELECT * FROM {}".format(name)) + instance.query(f"INSERT INTO {name} SELECT number FROM numbers(10)") + result = instance.query(f"SELECT * FROM {name}") assert result == instance.query("SELECT number FROM numbers(10)") result = instance.query( - "SELECT * FROM s3(s3_conf1, format='CSV', structure='id UInt32')" + "SELECT * FROM s3(s3_conf1, format='CSV', structure='id UInt32')", user="user" ) assert result == instance.query("SELECT number FROM numbers(10)") - result = instance.query_and_get_error("SELECT * FROM s3(no_collection)") - assert "There is no named collection `no_collection`" in result + error = instance.query_and_get_error("SELECT * FROM s3(no_collection)") + assert "There is no named collection `no_collection`" in error + error = instance.query_and_get_error("SELECT * FROM s3(no_collection)", user="user") + assert "There is no named collection `no_collection`" in error result = "" From ad2b926248a4fd464f6f278045c9103a75d92ca7 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 6 Jun 2023 07:17:30 +0000 Subject: [PATCH 0237/1997] Fix jepsen runs in PRs --- tests/ci/jepsen_check.py | 8 ++------ tests/jepsen.clickhouse/resources/keeper_config.xml | 8 ++++---- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py index 9d35d2d6e35..c21fafa2605 100644 --- a/tests/ci/jepsen_check.py +++ b/tests/ci/jepsen_check.py @@ -25,6 +25,7 @@ from stopwatch import Stopwatch from tee_popen import TeePopen from upload_result_helper import upload_results from version_helper import get_version_from_repo +from build_check import get_release_or_pr JEPSEN_GROUP_NAME = "jepsen_group" @@ -210,12 +211,7 @@ if __name__ == "__main__": build_name = get_build_name_for_check(check_name) - if pr_info.number == 0: - version = get_version_from_repo() - release_or_pr = f"{version.major}.{version.minor}" - else: - # PR number for anything else - release_or_pr = str(pr_info.number) + release_or_pr, _ = get_release_or_pr(pr_info, get_version_from_repo()) # This check run separately from other checks because it requires exclusive # run (see .github/workflows/jepsen.yml) So we cannot add explicit diff --git a/tests/jepsen.clickhouse/resources/keeper_config.xml b/tests/jepsen.clickhouse/resources/keeper_config.xml index 52f2a0dbdc2..1972ef6b917 100644 --- a/tests/jepsen.clickhouse/resources/keeper_config.xml +++ b/tests/jepsen.clickhouse/resources/keeper_config.xml @@ -13,19 +13,19 @@ local - /var/lib/clickhouse/coordination/logs/ + /home/robot-clickhouse/db/coordination/logs/ local - /var/lib/clickhouse/coordination/latest_log/ + /home/robot-clickhouse/db/coordination/latest_log/ local - /var/lib/clickhouse/coordination/snapshots/ + /home/robot-clickhouse/db/coordination/snapshots/ local - /var/lib/clickhouse/coordination/latest_snapshot/ + /home/robot-clickhouse/db/coordination/latest_snapshot/ From 5ffbe2d9d4de6e47268be38ac84e5de45faded49 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Tue, 6 Jun 2023 10:14:31 -0300 Subject: [PATCH 0238/1997] Update docs/en/sql-reference/data-types/ipv6.md Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> --- docs/en/sql-reference/data-types/ipv6.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/data-types/ipv6.md b/docs/en/sql-reference/data-types/ipv6.md index 284a1f80854..97959308b58 100644 --- a/docs/en/sql-reference/data-types/ipv6.md +++ b/docs/en/sql-reference/data-types/ipv6.md @@ -6,7 +6,7 @@ sidebar_label: IPv6 ## IPv6 -IPv6 addresses. Stored in 16 bytes as UInt128. +IPv6 addresses. Stored in 16 bytes as UInt128 big-endian. ### Basic Usage From d316add2f1f6ffa9cf6f2a1107a4d7d69960c72a Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Tue, 6 Jun 2023 15:24:29 +0000 Subject: [PATCH 0239/1997] Add integration test test_config_decryption --- .../test_config_decryption/__init__.py | 0 .../test_config_decryption/configs/config.xml | 12 +++++++ .../test_config_decryption/test.py | 31 +++++++++++++++++++ 3 files changed, 43 insertions(+) create mode 100644 tests/integration/test_config_decryption/__init__.py create mode 100644 tests/integration/test_config_decryption/configs/config.xml create mode 100644 tests/integration/test_config_decryption/test.py diff --git a/tests/integration/test_config_decryption/__init__.py b/tests/integration/test_config_decryption/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_config_decryption/configs/config.xml b/tests/integration/test_config_decryption/configs/config.xml new file mode 100644 index 00000000000..5c274128e39 --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config.xml @@ -0,0 +1,12 @@ + + + + 00112233445566778899aabbccddeeff + + + 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff + + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + diff --git a/tests/integration/test_config_decryption/test.py b/tests/integration/test_config_decryption/test.py new file mode 100644 index 00000000000..a3cb1bb57f3 --- /dev/null +++ b/tests/integration/test_config_decryption/test.py @@ -0,0 +1,31 @@ +import pytest +import os +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node", main_configs=["configs/config.xml"]) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_successful_decryption(started_cluster): + assert ( + node.query( + "select value from system.server_settings where name ='max_table_size_to_drop'" + ) + == "60000000000\n" + ) + assert ( + node.query( + "select value from system.server_settings where name ='max_partition_size_to_drop'" + ) + == "40000000000\n" + ) From c6acdd7008e625907e2207a5b4cff554b3490a9d Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 6 Jun 2023 18:53:31 +0200 Subject: [PATCH 0240/1997] Fix fast test, fix black check --- tests/integration/test_storage_s3/test.py | 18 +++++++++++++----- .../01271_show_privileges.reference | 1 + .../02117_show_create_table_system.reference | 6 +++--- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 01dd4fd7856..f1cbd3366b4 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -931,15 +931,23 @@ def test_predefined_connection_configuration(started_cluster): error = instance.query_and_get_error( f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')" ) - assert "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" in error - error = instance.query_and_get_error( - f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", user="user" + assert ( + "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" + in error + ) + error = instance.query_and_get_error( + f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", + user="user", + ) + assert ( + "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" + in error ) - assert "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" in error instance.query("GRANT USE NAMED COLLECTION ON s3_conf1 TO user", user="admin") instance.query( - f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", user="user" + f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", + user="user", ) instance.query(f"INSERT INTO {name} SELECT number FROM numbers(10)") diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index ec245d8b9e0..c32ac39a1f5 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -94,6 +94,7 @@ SHOW ACCESS [] \N ACCESS MANAGEMENT ACCESS MANAGEMENT [] \N ALL SHOW NAMED COLLECTIONS ['SHOW NAMED COLLECTIONS'] NAMED_COLLECTION NAMED COLLECTION CONTROL SHOW NAMED COLLECTIONS SECRETS ['SHOW NAMED COLLECTIONS SECRETS'] NAMED_COLLECTION NAMED COLLECTION CONTROL +USE NAMED COLLECTION ['USE NAMED COLLECTION'] NAMED_COLLECTION NAMED COLLECTION CONTROL NAMED COLLECTION CONTROL [] NAMED_COLLECTION ALL SYSTEM SHUTDOWN ['SYSTEM KILL','SHUTDOWN'] GLOBAL SYSTEM SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYSTEM DROP CACHE diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 09cc62dac00..9630767a552 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -297,7 +297,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'SOURCES' = 160, 'CLUSTER' = 161, 'ALL' = 162, 'NONE' = 163), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION CONTROL' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -581,10 +581,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'SOURCES' = 160, 'CLUSTER' = 161, 'ALL' = 162, 'NONE' = 163), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION CONTROL' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'SOURCES' = 160, 'CLUSTER' = 161, 'ALL' = 162, 'NONE' = 163)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION CONTROL' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' From bea4e8e81f6a782edc3dc2a672ae3011843de305 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 7 Jun 2023 06:45:14 +0000 Subject: [PATCH 0241/1997] Add new files --- programs/keeper/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index f775e8a5a22..90f4f870df6 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -131,6 +131,8 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIterator.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/StoredObject.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/registerDiskS3.cpp From 9cd0d5e6db0ce68ffdc320e2f73d17531fb54ec0 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 7 Jun 2023 11:49:41 +0000 Subject: [PATCH 0242/1997] move settings to server_settings --- programs/server/Server.cpp | 9 +++------ src/Core/ServerSettings.h | 2 ++ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index a64676cfa01..5496720e5dc 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1438,15 +1438,12 @@ try /// This is needed to load proper values of background_pool_size etc. global_context->initializeBackgroundExecutorsIfNeeded(); - size_t async_insert_threads = config().getUInt("async_insert_threads", 16); - bool async_insert_queue_flush_on_shutdown = config().getBool("async_insert_queue_flush_on_shutdown", false); - - if (async_insert_threads) + if (server_settings.async_insert_threads) { global_context->setAsynchronousInsertQueue(std::make_shared( global_context, - async_insert_threads, - async_insert_queue_flush_on_shutdown)); + server_settings.async_insert_threads, + server_settings.async_insert_queue_flush_on_shutdown)); } size_t mark_cache_size = server_settings.mark_cache_size; diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 1a9f226041b..ca27cbdbf19 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -48,6 +48,8 @@ namespace DB M(UInt64, merges_mutations_memory_usage_soft_limit, 0, "Limit on total memory usage for merges and mutations. Zero means Unlimited.", 0) \ M(Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to ram ratio. Allows to lower memory limit on low-memory systems.", 0) \ M(Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0) \ + M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \ + M(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \ \ M(UInt64, max_concurrent_queries, 0, "Limit on total number of concurrently executed queries. Zero means Unlimited.", 0) \ M(UInt64, max_concurrent_insert_queries, 0, "Limit on total number of concurrently insert queries. Zero means Unlimited.", 0) \ From ece53808519ce6e42bbff2f2f3ac7d9ffccabe63 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 7 Jun 2023 15:48:38 +0200 Subject: [PATCH 0243/1997] add simple stateless for file() --- .../02771_complex_globs_in_storage_file_path.reference | 4 ++++ .../02771_complex_globs_in_storage_file_path.sql | 8 ++++++++ 2 files changed, 12 insertions(+) create mode 100644 tests/queries/0_stateless/02771_complex_globs_in_storage_file_path.reference create mode 100644 tests/queries/0_stateless/02771_complex_globs_in_storage_file_path.sql diff --git a/tests/queries/0_stateless/02771_complex_globs_in_storage_file_path.reference b/tests/queries/0_stateless/02771_complex_globs_in_storage_file_path.reference new file mode 100644 index 00000000000..e1b420ecf37 --- /dev/null +++ b/tests/queries/0_stateless/02771_complex_globs_in_storage_file_path.reference @@ -0,0 +1,4 @@ +This is file data1 data1.csv +This is file data2 data2.csv +This is file data1 data1.csv +This is file data2 data2.csv diff --git a/tests/queries/0_stateless/02771_complex_globs_in_storage_file_path.sql b/tests/queries/0_stateless/02771_complex_globs_in_storage_file_path.sql new file mode 100644 index 00000000000..c579c8d8698 --- /dev/null +++ b/tests/queries/0_stateless/02771_complex_globs_in_storage_file_path.sql @@ -0,0 +1,8 @@ +-- Tags: no-replicated-database, no-parallel + +INSERT INTO TABLE FUNCTION file('02771/dir1/subdir11/data1.csv', 'CSV', 's String') SELECT 'This is file data1' SETTINGS engine_file_truncate_on_insert=1; +INSERT INTO TABLE FUNCTION file('02771/dir2/subdir22/data2.csv', 'CSV', 's String') SELECT 'This is file data2' SETTINGS engine_file_truncate_on_insert=1; + +SELECT *, _file FROM file('02771/dir{?/subdir?1/da,2/subdir2?/da}ta1.csv', CSV); +SELECT *, _file FROM file('02771/dir{?/subdir?1/da,2/subdir2?/da}ta2.csv', CSV); +SELECT *, _file FROM file('02771/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV); From 2df0e0c66962ce91e47eb4c98bae4fabe1ce9cc1 Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 7 Jun 2023 16:25:52 +0000 Subject: [PATCH 0244/1997] Unify priorities for connections --- src/Client/ConnectionPool.cpp | 4 ++-- src/Client/ConnectionPool.h | 13 +++++++------ src/Client/ConnectionPoolWithFailover.cpp | 2 +- src/Client/ConnectionPoolWithFailover.h | 2 +- src/Common/GetPriorityForLoadBalancing.cpp | 15 ++++++++------- src/Common/GetPriorityForLoadBalancing.h | 2 +- src/Common/PoolWithFailoverBase.h | 9 +++++---- src/Common/ZooKeeper/ZooKeeper.cpp | 2 +- src/Common/ZooKeeper/ZooKeeper.h | 2 +- src/Databases/DatabaseReplicated.cpp | 2 +- src/Functions/hasColumnInTable.cpp | 2 +- .../tests/gtest_resource_manager_static.cpp | 4 ++-- src/Interpreters/Cluster.cpp | 2 +- src/Interpreters/Cluster.h | 5 +++-- src/Interpreters/ClusterDiscovery.cpp | 2 +- src/TableFunctions/TableFunctionRemote.cpp | 2 +- 16 files changed, 37 insertions(+), 33 deletions(-) diff --git a/src/Client/ConnectionPool.cpp b/src/Client/ConnectionPool.cpp index 8433b0833fa..5cabb1465d1 100644 --- a/src/Client/ConnectionPool.cpp +++ b/src/Client/ConnectionPool.cpp @@ -18,7 +18,7 @@ ConnectionPoolPtr ConnectionPoolFactory::get( String client_name, Protocol::Compression compression, Protocol::Secure secure, - Int64 priority) + Priority priority) { Key key{ max_connections, host, port, default_database, user, password, quota_key, cluster, cluster_secret, client_name, compression, secure, priority}; @@ -74,7 +74,7 @@ size_t ConnectionPoolFactory::KeyHash::operator()(const ConnectionPoolFactory::K hash_combine(seed, hash_value(k.client_name)); hash_combine(seed, hash_value(k.compression)); hash_combine(seed, hash_value(k.secure)); - hash_combine(seed, hash_value(k.priority)); + hash_combine(seed, hash_value(k.priority.value)); return seed; } diff --git a/src/Client/ConnectionPool.h b/src/Client/ConnectionPool.h index aacd0a063c7..b6d03daacfb 100644 --- a/src/Client/ConnectionPool.h +++ b/src/Client/ConnectionPool.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -34,7 +35,7 @@ public: const Settings * settings = nullptr, bool force_connected = true) = 0; - virtual Int64 getPriority() const { return 1; } + virtual Priority getPriority() const { return Priority{1}; } }; using ConnectionPoolPtr = std::shared_ptr; @@ -60,7 +61,7 @@ public: const String & client_name_, Protocol::Compression compression_, Protocol::Secure secure_, - Int64 priority_ = 1) + Priority priority_ = Priority{1}) : Base(max_connections_, &Poco::Logger::get("ConnectionPool (" + host_ + ":" + toString(port_) + ")")), host(host_), @@ -103,7 +104,7 @@ public: return host + ":" + toString(port); } - Int64 getPriority() const override + Priority getPriority() const override { return priority; } @@ -134,7 +135,7 @@ private: String client_name; Protocol::Compression compression; /// Whether to compress data when interacting with the server. Protocol::Secure secure; /// Whether to encrypt data when interacting with the server. - Int64 priority; /// priority from + Priority priority; /// priority from }; /** @@ -157,7 +158,7 @@ public: String client_name; Protocol::Compression compression; Protocol::Secure secure; - Int64 priority; + Priority priority; }; struct KeyHash @@ -180,7 +181,7 @@ public: String client_name, Protocol::Compression compression, Protocol::Secure secure, - Int64 priority); + Priority priority); private: mutable std::mutex mutex; using ConnectionPoolWeakPtr = std::weak_ptr; diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index 129bc10bc27..feb4c01c374 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -71,7 +71,7 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts return Base::get(max_ignored_errors, fallback_to_stale_replicas, try_get_entry, get_priority); } -Int64 ConnectionPoolWithFailover::getPriority() const +Priority ConnectionPoolWithFailover::getPriority() const { return (*std::max_element(nested_pools.begin(), nested_pools.end(), [](const auto & a, const auto & b) { diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h index 0273ce41589..75a0dafd977 100644 --- a/src/Client/ConnectionPoolWithFailover.h +++ b/src/Client/ConnectionPoolWithFailover.h @@ -48,7 +48,7 @@ public: const Settings * settings, bool force_connected) override; /// From IConnectionPool - Int64 getPriority() const override; /// From IConnectionPool + Priority getPriority() const override; /// From IConnectionPool /** Allocates up to the specified number of connections to work. * Connections provide access to different replicas of one shard. diff --git a/src/Common/GetPriorityForLoadBalancing.cpp b/src/Common/GetPriorityForLoadBalancing.cpp index 5da60fb1bae..c4d36acc70c 100644 --- a/src/Common/GetPriorityForLoadBalancing.cpp +++ b/src/Common/GetPriorityForLoadBalancing.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -8,23 +9,23 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -std::function GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const +std::function GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const { - std::function get_priority; + std::function get_priority; switch (load_balance) { case LoadBalancing::NEAREST_HOSTNAME: if (hostname_differences.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "It's a bug: hostname_differences is not initialized"); - get_priority = [this](size_t i) { return hostname_differences[i]; }; + get_priority = [this](size_t i) { return Priority{static_cast(hostname_differences[i])}; }; break; case LoadBalancing::IN_ORDER: - get_priority = [](size_t i) { return i; }; + get_priority = [](size_t i) { return Priority{static_cast(i)}; }; break; case LoadBalancing::RANDOM: break; case LoadBalancing::FIRST_OR_RANDOM: - get_priority = [offset](size_t i) -> size_t { return i != offset; }; + get_priority = [offset](size_t i) { return i != offset ? Priority{1} : Priority{0}; }; break; case LoadBalancing::ROUND_ROBIN: if (last_used >= pool_size) @@ -38,8 +39,8 @@ std::function GetPriorityForLoadBalancing::getPriorityFunc * */ get_priority = [this, pool_size](size_t i) { - ++i; - return i < last_used ? pool_size - i : i - last_used; + ++i; // To make `i` indexing start with 1 instead of 0 as `last_used` does + return Priority{static_cast(i < last_used ? pool_size - i : i - last_used)}; }; break; } diff --git a/src/Common/GetPriorityForLoadBalancing.h b/src/Common/GetPriorityForLoadBalancing.h index e57b02b5e90..8052185ac13 100644 --- a/src/Common/GetPriorityForLoadBalancing.h +++ b/src/Common/GetPriorityForLoadBalancing.h @@ -21,7 +21,7 @@ public: return !(*this == other); } - std::function getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const; + std::function getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const; std::vector hostname_differences; /// Distances from name of this host to the names of hosts of pools. diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h index 646e10d6443..c6f44a7701a 100644 --- a/src/Common/PoolWithFailoverBase.h +++ b/src/Common/PoolWithFailoverBase.h @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB @@ -34,7 +35,7 @@ namespace ProfileEvents /// This class provides a pool with fault tolerance. It is used for pooling of connections to replicated DB. /// Initialized by several PoolBase objects. /// When a connection is requested, tries to create or choose an alive connection from one of the nested pools. -/// Pools are tried in the order consistent with lexicographical order of (error count, priority, random number) tuples. +/// Pools are tried in the order consistent with lexicographical order of (error count, slowdown count, config priority, priority, random number) tuples. /// Number of tries for a single pool is limited by max_tries parameter. /// The client can set nested pool priority by passing a GetPriority functor. /// @@ -113,7 +114,7 @@ public: /// The client can provide this functor to affect load balancing - the index of a pool is passed to /// this functor. The pools with lower result value will be tried first. - using GetPriorityFunc = std::function; + using GetPriorityFunc = std::function; /// Returns at least min_entries and at most max_entries connections (at most one connection per nested pool). /// The method will throw if it is unable to get min_entries alive connections or @@ -336,9 +337,9 @@ struct PoolWithFailoverBase::PoolState /// The number of slowdowns that led to changing replica in HedgedRequestsFactory UInt64 slowdown_count = 0; /// Priority from the configuration. - Int64 config_priority = 1; + Priority config_priority{1}; /// Priority from the GetPriorityFunc. - Int64 priority = 0; + Priority priority{0}; UInt64 random = 0; void randomize() diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index a587ad6caf4..62807fe2433 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -179,7 +179,7 @@ ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std std::vector ZooKeeper::shuffleHosts() const { - std::function get_priority = args.get_priority_load_balancing.getPriorityFunc(args.get_priority_load_balancing.load_balancing, 0, args.hosts.size()); + std::function get_priority = args.get_priority_load_balancing.getPriorityFunc(args.get_priority_load_balancing.load_balancing, 0, args.hosts.size()); std::vector shuffle_hosts; for (size_t i = 0; i < args.hosts.size(); ++i) { diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 96f9914b597..d48ca0a4ef5 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -49,7 +49,7 @@ constexpr size_t MULTI_BATCH_SIZE = 100; struct ShuffleHost { String host; - Int64 priority = 0; + Priority priority; UInt64 random = 0; void randomize() diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 9bbf5b9565d..583607bda1d 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -252,7 +252,7 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const treat_local_as_remote, treat_local_port_as_remote, cluster_auth_info.cluster_secure_connection, - /*priority=*/ 1, + Priority{1}, TSA_SUPPRESS_WARNING_FOR_READ(database_name), /// FIXME cluster_auth_info.cluster_secret}; diff --git a/src/Functions/hasColumnInTable.cpp b/src/Functions/hasColumnInTable.cpp index 4676b4083b7..66ed515e490 100644 --- a/src/Functions/hasColumnInTable.cpp +++ b/src/Functions/hasColumnInTable.cpp @@ -137,7 +137,7 @@ ColumnPtr FunctionHasColumnInTable::executeImpl(const ColumnsWithTypeAndName & a treat_local_as_remote, treat_local_port_as_remote, /* secure= */ false, - /* priority= */ 1, + /* priority= */ Priority{1}, /* cluster_name= */ "", /* password= */ "" }; diff --git a/src/IO/Resource/tests/gtest_resource_manager_static.cpp b/src/IO/Resource/tests/gtest_resource_manager_static.cpp index 091f6923714..976eac41a49 100644 --- a/src/IO/Resource/tests/gtest_resource_manager_static.cpp +++ b/src/IO/Resource/tests/gtest_resource_manager_static.cpp @@ -44,8 +44,8 @@ TEST(IOResourceStaticResourceManager, Smoke) TEST(IOResourceStaticResourceManager, Prioritization) { - std::optional last_priority; - auto check = [&] (Int64 priority) + std::optional last_priority; + auto check = [&] (Priority priority) { // Lock is not required here because this is called during request execution and we have max_requests = 1 if (last_priority) diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 8c30dbe230f..edbef77ef02 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -108,7 +108,7 @@ Cluster::Address::Address( password = config.getString(config_prefix + ".password", ""); default_database = config.getString(config_prefix + ".default_database", ""); secure = ConfigHelper::getBool(config, config_prefix + ".secure", false, /* empty_as */true) ? Protocol::Secure::Enable : Protocol::Secure::Disable; - priority = config.getInt(config_prefix + ".priority", 1); + priority = Priority{config.getInt(config_prefix + ".priority", 1)}; const char * port_type = secure == Protocol::Secure::Enable ? "tcp_port_secure" : "tcp_port"; auto default_port = config.getInt(port_type, 0); diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index 4798384f29c..de10a445d01 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -44,7 +45,7 @@ struct ClusterConnectionParameters bool treat_local_as_remote; bool treat_local_port_as_remote; bool secure = false; - Int64 priority = 1; + Priority priority{1}; String cluster_name; String cluster_secret; }; @@ -131,7 +132,7 @@ public: Protocol::Compression compression = Protocol::Compression::Enable; Protocol::Secure secure = Protocol::Secure::Disable; - Int64 priority = 1; + Priority priority{1}; Address() = default; diff --git a/src/Interpreters/ClusterDiscovery.cpp b/src/Interpreters/ClusterDiscovery.cpp index 884e3b87343..553488edf50 100644 --- a/src/Interpreters/ClusterDiscovery.cpp +++ b/src/Interpreters/ClusterDiscovery.cpp @@ -246,7 +246,7 @@ ClusterPtr ClusterDiscovery::makeCluster(const ClusterInfo & cluster_info) /* treat_local_as_remote= */ false, /* treat_local_port_as_remote= */ false, /// should be set only for clickhouse-local, but cluster discovery is not used there /* secure= */ secure, - /* priority= */ 1, + /* priority= */ Priority{1}, /* cluster_name= */ "", /* password= */ ""}; auto cluster = std::make_shared( diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index b2f09adf773..4143014a7b3 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -262,7 +262,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr treat_local_as_remote, treat_local_port_as_remote, secure, - /* priority= */ 1, + /* priority= */ Priority{1}, /* cluster_name= */ "", /* password= */ "" }; From 32372967e9814e629cbad2ce2ff57f82aba86e97 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 7 Jun 2023 16:55:14 +0200 Subject: [PATCH 0245/1997] fix --- src/Storages/StorageReplicatedMergeTree.cpp | 10 +++------- src/Storages/StorageReplicatedMergeTree.h | 2 +- .../0_stateless/02432_s3_parallel_parts_cleanup.sql | 6 +----- .../0_stateless/02448_clone_replica_lost_part.sql | 9 ++++++--- 4 files changed, 11 insertions(+), 16 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 6edd7531ec1..36bc3476e91 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6743,14 +6743,12 @@ size_t StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK() if (parts.empty()) return total_parts_to_remove; - size_t res = 0; - NOEXCEPT_SCOPE({ res = clearOldPartsAndRemoveFromZKImpl(zookeeper, std::move(parts)); }); - return res; + NOEXCEPT_SCOPE({ clearOldPartsAndRemoveFromZKImpl(zookeeper, std::move(parts)); }); + return total_parts_to_remove; } -size_t StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZKImpl(zkutil::ZooKeeperPtr zookeeper, DataPartsVector && parts) +void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZKImpl(zkutil::ZooKeeperPtr zookeeper, DataPartsVector && parts) { - DataPartsVector parts_to_delete_only_from_filesystem; // Only duplicates DataPartsVector parts_to_delete_completely; // All parts except duplicates DataPartsVector parts_to_retry_deletion; // Parts that should be retried due to network problems @@ -6861,8 +6859,6 @@ size_t StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZKImpl(zkutil::ZooK /// Otherwise nobody will try to remove them again (see grabOldParts). delete_parts_from_fs_and_rollback_in_case_of_error(parts_to_remove_from_filesystem, "old"); } - - return total_parts_to_remove; } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 368d7d1b948..290266ca00c 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -344,7 +344,7 @@ private: /// Delete old parts from disk and from ZooKeeper. Returns the number of removed parts size_t clearOldPartsAndRemoveFromZK(); - size_t clearOldPartsAndRemoveFromZKImpl(zkutil::ZooKeeperPtr zookeeper, DataPartsVector && parts); + void clearOldPartsAndRemoveFromZKImpl(zkutil::ZooKeeperPtr zookeeper, DataPartsVector && parts); template friend class ReplicatedMergeTreeSinkImpl; diff --git a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql index 3f8aa545298..948ec9e9e8a 100644 --- a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql +++ b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql @@ -38,11 +38,7 @@ select count(), sum(n), sum(m) from rmt; -- New table can assign merges/mutations and can remove old parts create table rmt2 (n int, m int, k String) engine=ReplicatedMergeTree('/test/02432/{database}', '2') order by tuple() settings storage_policy = 's3_cache', allow_remote_fs_zero_copy_replication=1, -<<<<<<< HEAD - max_part_removal_threads=10, concurrent_part_removal_threshold=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0, -======= - concurrent_part_removal_threshold=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1, ->>>>>>> master + concurrent_part_removal_threshold=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0, min_bytes_for_wide_part=0, min_rows_for_wide_part=0, max_replicated_merges_in_queue=1, old_parts_lifetime=0; diff --git a/tests/queries/0_stateless/02448_clone_replica_lost_part.sql b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql index 1e99e1869cc..eb4d0f255a7 100644 --- a/tests/queries/0_stateless/02448_clone_replica_lost_part.sql +++ b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql @@ -7,11 +7,11 @@ drop table if exists rmt2; create table rmt1 (n int) engine=ReplicatedMergeTree('/test/02448/{database}/rmt', '1') order by tuple() settings min_replicated_logs_to_keep=1, max_replicated_logs_to_keep=2, cleanup_delay_period=0, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0, old_parts_lifetime=0, max_parts_to_merge_at_once=4, - merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=500; + merge_selecting_sleep_ms=1000, max_merge_selecting_sleep_ms=2000; create table rmt2 (n int) engine=ReplicatedMergeTree('/test/02448/{database}/rmt', '2') order by tuple() settings min_replicated_logs_to_keep=1, max_replicated_logs_to_keep=2, cleanup_delay_period=0, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0, old_parts_lifetime=0, max_parts_to_merge_at_once=4, - merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=500; + merge_selecting_sleep_ms=1000, max_merge_selecting_sleep_ms=2000; -- insert part only on one replica system stop replicated sends rmt1; @@ -141,7 +141,10 @@ system sync replica rmt2; -- merge through gap optimize table rmt2; -- give it a chance to cleanup log -select sleep(2) format Null; -- increases probability of reproducing the issue + +select sleepEachRow(2) from url('http://localhost:8123/?param_tries={1..10}&query=' || encodeURLComponent( + 'select value from system.zookeeper where path=''//test/02448/' || currentDatabase() || '/rmt/replicas/1/is_lost'' and value=''1''' + ), 'LineAsString', 's String') settings max_threads=1 format Null; -- rmt1 will mimic rmt2, but will not be able to fetch parts for a while system stop replicated sends rmt2; From bfdb18619c3b31044eba28f15cd9a0d7141b790e Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 7 Jun 2023 19:25:33 +0200 Subject: [PATCH 0246/1997] small docs udpate --- docs/en/sql-reference/table-functions/file.md | 2 +- docs/ru/sql-reference/table-functions/file.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index 28c2dc9f1f3..09e3df7cc6a 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -134,7 +134,7 @@ Multiple path components can have globs. For being processed file must exist and - `*` — Substitutes any number of any characters except `/` including empty string. - `?` — Substitutes any single character. -- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`, including `/`. - `{N..M}` — Substitutes any number in range from N to M including both borders. - `**` - Fetches all files inside the folder recursively. diff --git a/docs/ru/sql-reference/table-functions/file.md b/docs/ru/sql-reference/table-functions/file.md index 94bc734a8fb..f0fea630ad2 100644 --- a/docs/ru/sql-reference/table-functions/file.md +++ b/docs/ru/sql-reference/table-functions/file.md @@ -79,7 +79,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U - `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов. - `?` — заменяет ровно один любой символ. -- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. +- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`, причём строка может содержать `/`. - `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули). Конструкция с `{}` аналогична табличной функции [remote](remote.md). From ff1b069cdb9301f5c13bed69d70c484210693dea Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 7 Jun 2023 21:15:35 +0000 Subject: [PATCH 0247/1997] fix test --- tests/queries/0_stateless/02726_async_insert_flush_queue.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql index 33f40eef14e..98e78045b85 100644 --- a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql +++ b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql @@ -1,3 +1,5 @@ +-- Tags: no-parallel + DROP TABLE IF EXISTS t_async_inserts_flush; CREATE TABLE t_async_inserts_flush (a UInt64) ENGINE = Memory; From fa877f456185b5de5b5f2e36e775acdb8dec7f31 Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 8 Jun 2023 16:05:14 +0200 Subject: [PATCH 0248/1997] cosmetic changes --- src/Common/DateLUT.h | 2 +- src/Common/LocalDate.h | 13 +++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/Common/DateLUT.h b/src/Common/DateLUT.h index 23698331afe..833b2291254 100644 --- a/src/Common/DateLUT.h +++ b/src/Common/DateLUT.h @@ -39,7 +39,7 @@ public: /// Timezone is passed in query_context, but on CH-Client we have no query context, /// and each time we modify client's global context - const auto global_context = DB::CurrentThread::get().getGlobalContext(); + const DB::ContextPtr global_context = DB::CurrentThread::get().getGlobalContext(); if (global_context) { context_timezone = extractTimezoneFromContext(global_context); diff --git a/src/Common/LocalDate.h b/src/Common/LocalDate.h index 4a383129ae4..2331a40fd12 100644 --- a/src/Common/LocalDate.h +++ b/src/Common/LocalDate.h @@ -24,9 +24,8 @@ private: unsigned char m_month; unsigned char m_day; - void init(time_t time) + void init(time_t time, const DateLUTImpl & date_lut) { - const auto & date_lut = DateLUT::instance(); const auto & values = date_lut.getValues(time); m_year = values.year; @@ -56,9 +55,9 @@ private: } public: - explicit LocalDate(time_t time) + explicit LocalDate(time_t time, const DateLUTImpl & time_zone = DateLUT::instance()) { - init(time); + init(time, time_zone); } LocalDate(DayNum day_num, const DateLUTImpl & time_zone = DateLUT::instance()) /// NOLINT @@ -99,15 +98,13 @@ public: LocalDate(const LocalDate &) noexcept = default; LocalDate & operator= (const LocalDate &) noexcept = default; - DayNum getDayNum() const + DayNum getDayNum(const DateLUTImpl & lut = DateLUT::instance()) const { - const auto & lut = DateLUT::instance(); return DayNum(lut.makeDayNum(m_year, m_month, m_day).toUnderType()); } - ExtendedDayNum getExtenedDayNum() const + ExtendedDayNum getExtenedDayNum(const DateLUTImpl & lut = DateLUT::instance()) const { - const auto & lut = DateLUT::instance(); return ExtendedDayNum (lut.makeDayNum(m_year, m_month, m_day).toUnderType()); } From 60499164b3acc8663a10836233926c0f997ac381 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 8 Jun 2023 14:35:11 +0000 Subject: [PATCH 0249/1997] fix tests --- tests/queries/0_stateless/02726_async_insert_flush_stress.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02726_async_insert_flush_stress.sh b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh index 4685e49b96d..5fafb773d16 100755 --- a/tests/queries/0_stateless/02726_async_insert_flush_stress.sh +++ b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long +# Tags: long, no-parallel set -e From 4d4e5c690e446db23f8a1ef7fc1e577df93e9373 Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 8 Jun 2023 17:10:51 +0200 Subject: [PATCH 0250/1997] update docs spelling check failed --- docs/en/operations/settings/settings.md | 2 +- docs/en/sql-reference/functions/date-time-functions.md | 4 +--- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 77c9238e4c7..6c9c8349519 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4116,7 +4116,7 @@ SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS sessi This happens due to different parsing pipelines: - `toDateTime('2000-01-01 00:00:00')` creates a new DateTime in a usual way, and thus `session_timezone` setting from query context is applied. - - `2000-01-01 00:00:00` is parsed to a DateTime inheriting type of `d` column, including DateTime's time zone, and `session_timezone` has no impact on this value. + - `2000-01-01 00:00:00` is parsed to a DateTime inheriting type of `d` column, including its time zone, and `session_timezone` has no impact on this value. Possible values: diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 1a5b0dcabf9..89ac6d438ff 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -163,7 +163,7 @@ Type: [String](../../sql-reference/data-types/string.md). ## serverTimeZone Returns the default timezone of the server, i.e. the value of setting [timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). -If it is executed in the context of a distributed table, then it generates a normal column with values relevant to each shard. Otherwise it produces a constant value. +If it is executed in the context of a distributed table, then it generates a normal column with values relevant to each shard. Otherwise, it produces a constant value. **Syntax** @@ -171,8 +171,6 @@ If it is executed in the context of a distributed table, then it generates a nor serverTimeZone() ``` -Alias: `ServerTimezone`, `servertimezone`. - **Returned value** - Timezone. diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index ded7a4643a9..8301579b6a8 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -484,6 +484,7 @@ russian rw sasl schemas +servertimezone simdjson skippingerrors sparsehash From a1b1e12e5bb5d6a1937f9081eb43374afef60f9b Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 8 Jun 2023 18:38:51 +0200 Subject: [PATCH 0251/1997] upd spell --- docs/en/sql-reference/functions/date-time-functions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 89ac6d438ff..62bbb84053a 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -158,9 +158,9 @@ Type: [String](../../sql-reference/data-types/string.md). **See also** -- [serverTimeZone](#serverTimeZone) +- [serverTimezone](#serverTimeZone) -## serverTimeZone +## serverTimezone Returns the default timezone of the server, i.e. the value of setting [timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). If it is executed in the context of a distributed table, then it generates a normal column with values relevant to each shard. Otherwise, it produces a constant value. From f4bf42cc42a6a2d8966d711874286427956d51d7 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 8 Jun 2023 17:29:51 +0000 Subject: [PATCH 0252/1997] more fixes --- .../AggregateFunctionGroupArray.cpp | 1 + .../AggregateFunctionGroupUniqArray.cpp | 11 ++ .../AggregateFunctionTopK.cpp | 19 ++ .../AggregateFunctionUniq.h | 31 +++- .../AggregateFunctionUniqCombined.cpp | 5 + .../AggregateFunctionUniqCombined.h | 16 +- ...es_aggregate_functions_states.reference.j2 | 172 ++++++++++++++++++ ...ip_types_aggregate_functions_states.sql.j2 | 123 +++++++++++++ 8 files changed, 366 insertions(+), 12 deletions(-) create mode 100644 tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.reference.j2 create mode 100644 tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.sql.j2 diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index 15f500b8bb6..bb1368b9ff8 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -25,6 +25,7 @@ IAggregateFunction * createWithNumericOrTimeType(const IDataType & argument_type WhichDataType which(argument_type); if (which.idx == TypeIndex::Date) return new AggregateFunctionTemplate(std::forward(args)...); if (which.idx == TypeIndex::DateTime) return new AggregateFunctionTemplate(std::forward(args)...); + if (which.idx == TypeIndex::IPv4) return new AggregateFunctionTemplate(std::forward(args)...); return createWithNumericType(argument_type, std::forward(args)...); } diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp index 16f2feb71bf..9e8060d44cc 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -39,12 +40,22 @@ public: static DataTypePtr createResultType() { return std::make_shared(std::make_shared()); } }; +template +class AggregateFunctionGroupUniqArrayIPv4 : public AggregateFunctionGroupUniqArray +{ +public: + explicit AggregateFunctionGroupUniqArrayIPv4(const DataTypePtr & argument_type, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits::max()) + : AggregateFunctionGroupUniqArray(argument_type, parameters_, createResultType(), max_elems_) {} + static DataTypePtr createResultType() { return std::make_shared(std::make_shared()); } +}; + template IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, TArgs && ... args) { WhichDataType which(argument_type); if (which.idx == TypeIndex::Date) return new AggregateFunctionGroupUniqArrayDate(argument_type, std::forward(args)...); else if (which.idx == TypeIndex::DateTime) return new AggregateFunctionGroupUniqArrayDateTime(argument_type, std::forward(args)...); + else if (which.idx == TypeIndex::IPv4) return new AggregateFunctionGroupUniqArrayIPv4(argument_type, std::forward(args)...); else { /// Check that we can use plain version of AggregateFunctionGroupUniqArrayGeneric diff --git a/src/AggregateFunctions/AggregateFunctionTopK.cpp b/src/AggregateFunctions/AggregateFunctionTopK.cpp index e568694df02..8f6652223cc 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.cpp +++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp @@ -5,6 +5,7 @@ #include #include #include +#include static inline constexpr UInt64 TOP_K_MAX_SIZE = 0xFFFFFF; @@ -60,6 +61,22 @@ public: {} }; +template +class AggregateFunctionTopKIPv4 : public AggregateFunctionTopK +{ +public: + using AggregateFunctionTopK::AggregateFunctionTopK; + + AggregateFunctionTopKIPv4(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params) + : AggregateFunctionTopK( + threshold_, + load_factor, + argument_types_, + params, + std::make_shared(std::make_shared())) + {} +}; + template IAggregateFunction * createWithExtraTypes(const DataTypes & argument_types, UInt64 threshold, UInt64 load_factor, const Array & params) @@ -72,6 +89,8 @@ IAggregateFunction * createWithExtraTypes(const DataTypes & argument_types, UInt return new AggregateFunctionTopKDate(threshold, load_factor, argument_types, params); if (which.idx == TypeIndex::DateTime) return new AggregateFunctionTopKDateTime(threshold, load_factor, argument_types, params); + if (which.idx == TypeIndex::IPv4) + return new AggregateFunctionTopKIPv4(threshold, load_factor, argument_types, params); /// Check that we can use plain version of AggregateFunctionTopKGeneric if (argument_types[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion()) diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index 03d999b47e2..de68e9076a0 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -167,6 +167,25 @@ struct AggregateFunctionUniqExactData static String getName() { return "uniqExact"; } }; +/// For historical reasons IPv6 is treated as FixedString(16) +template +struct AggregateFunctionUniqExactData +{ + using Key = UInt128; + + /// When creating, the hash table must be small. + using SingleLevelSet = HashSet, HashTableAllocatorWithStackMemory>; + using TwoLevelSet = TwoLevelHashSet; + using Set = UniqExactSet; + + Set set; + + constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_variadic = false; + + static String getName() { return "uniqExact"; } +}; + template struct AggregateFunctionUniqExactDataForVariadic : AggregateFunctionUniqExactData { @@ -275,12 +294,7 @@ struct Adder else if constexpr (std::is_same_v>) { const auto & column = *columns[0]; - if constexpr (!std::is_same_v) - { - data.set.template insert( - assert_cast &>(column).getData()[row_num]); - } - else + if constexpr (std::is_same_v || std::is_same_v) { StringRef value = column.getDataAt(row_num); @@ -291,6 +305,11 @@ struct Adder data.set.template insert(key); } + else + { + data.set.template insert( + assert_cast &>(column).getData()[row_num]); + } } #if USE_DATASKETCHES else if constexpr (std::is_same_v) diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp index 1c59da59e83..8c2cb6ea0de 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp +++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -60,6 +61,10 @@ namespace return std::make_shared::template AggregateFunction>(argument_types, params); else if (which.isUUID()) return std::make_shared::template AggregateFunction>(argument_types, params); + else if (which.isIPv4()) + return std::make_shared::template AggregateFunction>(argument_types, params); + else if (which.isIPv6()) + return std::make_shared::template AggregateFunction>(argument_types, params); else if (which.isTuple()) { if (use_exact_hash_function) diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/src/AggregateFunctions/AggregateFunctionUniqCombined.h index d879e3b3dde..5e8fa69f9de 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqCombined.h +++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -119,6 +119,10 @@ struct AggregateFunctionUniqCombinedData : public Aggr { }; +template +struct AggregateFunctionUniqCombinedData : public AggregateFunctionUniqCombinedDataWithKey +{ +}; template class AggregateFunctionUniqCombined final @@ -141,16 +145,16 @@ public: void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override { - if constexpr (!std::is_same_v) - { - const auto & value = assert_cast &>(*columns[0]).getElement(row_num); - this->data(place).set.insert(detail::AggregateFunctionUniqCombinedTraits::hash(value)); - } - else + if constexpr (std::is_same_v || std::is_same_v) { StringRef value = columns[0]->getDataAt(row_num); this->data(place).set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size)); } + else + { + const auto & value = assert_cast &>(*columns[0]).getElement(row_num); + this->data(place).set.insert(detail::AggregateFunctionUniqCombinedTraits::hash(value)); + } } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override diff --git a/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.reference.j2 b/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.reference.j2 new file mode 100644 index 00000000000..481dd723b66 --- /dev/null +++ b/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.reference.j2 @@ -0,0 +1,172 @@ +----- hash / State / ip4 ----- +Row 1: +────── +minState: 12535288824949910799 +maxState: 18210943739258811465 +first_valueState: 12535288824949910799 +last_valueState: 18210943739258811465 +topKState: 1594227852744382511 +groupArrayState: 8025417272361615478 +groupUniqArrayState: 919082878249747568 +uniqState: 14828781561416784358 +uniqExactState: 11391659146320471795 +uniqCombinedState: 9631896280254268221 +uniqCombined64State: 5156097536649078816 +uniqHLL12State: 9696624347265201099 +uniqThetaState: 10464560810701154023 +----- hash / State / ip6 ----- +Row 1: +────── +minState: 9428555662807296659 +maxState: 18253481702148601156 +first_valueState: 9428555662807296659 +last_valueState: 18253481702148601156 +topKState: 8045294331733869941 +groupArrayState: 10451014709837753966 +groupUniqArrayState: 1954028114836070615 +uniqState: 14986562136250471284 +uniqExactState: 10032843621916709112 +uniqCombinedState: 6379274083567016598 +uniqCombined64State: 6379274083567016598 +uniqHLL12State: 9181286681186915812 +uniqThetaState: 2415188383468008881 +----- finalizeAggregation / State / ip4 ----- +Row 1: +────── +min: 59.154.201.255 +max: 59.154.202.48 +first_value: 59.154.201.255 +last_value: 59.154.202.48 +topK: ['59.154.202.48','59.154.202.5','59.154.202.26','59.154.202.25','59.154.202.24','59.154.202.23','59.154.202.22','59.154.202.21','59.154.202.27','59.154.202.19'] +groupArray: ['59.154.201.255','59.154.202.0','59.154.202.1','59.154.202.2','59.154.202.3','59.154.202.4','59.154.202.5','59.154.202.6','59.154.202.7','59.154.202.8','59.154.202.9','59.154.202.10','59.154.202.11','59.154.202.12','59.154.202.13','59.154.202.14','59.154.202.15','59.154.202.16','59.154.202.17','59.154.202.18','59.154.202.19','59.154.202.20','59.154.202.21','59.154.202.22','59.154.202.23','59.154.202.24','59.154.202.25','59.154.202.26','59.154.202.27','59.154.202.28','59.154.202.29','59.154.202.30','59.154.202.31','59.154.202.32','59.154.202.33','59.154.202.34','59.154.202.35','59.154.202.36','59.154.202.37','59.154.202.38','59.154.202.39','59.154.202.40','59.154.202.41','59.154.202.42','59.154.202.43','59.154.202.44','59.154.202.45','59.154.202.46','59.154.202.47','59.154.202.48'] +groupUniqArray: ['59.154.202.28','59.154.202.45','59.154.202.35','59.154.202.2','59.154.202.42','59.154.202.1','59.154.202.4','59.154.202.15','59.154.202.22','59.154.202.20','59.154.202.12','59.154.202.3','59.154.202.40','59.154.202.43','59.154.202.26','59.154.202.37','59.154.202.7','59.154.202.36','59.154.202.32','59.154.202.47','59.154.202.17','59.154.202.11','59.154.201.255','59.154.202.0','59.154.202.14','59.154.202.25','59.154.202.6','59.154.202.30','59.154.202.16','59.154.202.21','59.154.202.23','59.154.202.38','59.154.202.44','59.154.202.39','59.154.202.48','59.154.202.41','59.154.202.27','59.154.202.33','59.154.202.19','59.154.202.5','59.154.202.9','59.154.202.18','59.154.202.24','59.154.202.34','59.154.202.46','59.154.202.8','59.154.202.29','59.154.202.10','59.154.202.13','59.154.202.31'] +uniq: 50 +uniqExact: 50 +uniqCombined: 50 +uniqCombined64: 50 +uniqHLL12: 49 +uniqTheta: 50 +----- finalizeAggregation / State / ip6 ----- +Row 1: +────── +min: 8c:333c::8c:333c:0:0 +max: ff8b:333c::ff8b:333c:0:0 +first_value: 8c:333c::8c:333c:0:0 +last_value: ff8b:333c::ff8b:333c:0:0 +topK: ['ff8b:333c::ff8b:333c:0:0','68c:333c::68c:333c:0:0','e98b:333c::e98b:333c:0:0','e88b:333c::e88b:333c:0:0','e78b:333c::e78b:333c:0:0','e68b:333c::e68b:333c:0:0','e58b:333c::e58b:333c:0:0','e48b:333c::e48b:333c:0:0','ea8b:333c::ea8b:333c:0:0','e28b:333c::e28b:333c:0:0'] +groupArray: ['8c:333c::8c:333c:0:0','18c:333c::18c:333c:0:0','28c:333c::28c:333c:0:0','38c:333c::38c:333c:0:0','48c:333c::48c:333c:0:0','58c:333c::58c:333c:0:0','68c:333c::68c:333c:0:0','78c:333c::78c:333c:0:0','88c:333c::88c:333c:0:0','98c:333c::98c:333c:0:0','a8c:333c::a8c:333c:0:0','b8c:333c::b8c:333c:0:0','c8c:333c::c8c:333c:0:0','d8c:333c::d8c:333c:0:0','e8c:333c::e8c:333c:0:0','dd8b:333c::dd8b:333c:0:0','de8b:333c::de8b:333c:0:0','df8b:333c::df8b:333c:0:0','e08b:333c::e08b:333c:0:0','e18b:333c::e18b:333c:0:0','e28b:333c::e28b:333c:0:0','e38b:333c::e38b:333c:0:0','e48b:333c::e48b:333c:0:0','e58b:333c::e58b:333c:0:0','e68b:333c::e68b:333c:0:0','e78b:333c::e78b:333c:0:0','e88b:333c::e88b:333c:0:0','e98b:333c::e98b:333c:0:0','ea8b:333c::ea8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','f08b:333c::f08b:333c:0:0','f18b:333c::f18b:333c:0:0','f28b:333c::f28b:333c:0:0','f38b:333c::f38b:333c:0:0','f48b:333c::f48b:333c:0:0','f58b:333c::f58b:333c:0:0','f68b:333c::f68b:333c:0:0','f78b:333c::f78b:333c:0:0','f88b:333c::f88b:333c:0:0','f98b:333c::f98b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','ff8b:333c::ff8b:333c:0:0'] +groupUniqArray: ['58c:333c::58c:333c:0:0','f78b:333c::f78b:333c:0:0','f38b:333c::f38b:333c:0:0','18c:333c::18c:333c:0:0','e78b:333c::e78b:333c:0:0','e38b:333c::e38b:333c:0:0','e48b:333c::e48b:333c:0:0','e08b:333c::e08b:333c:0:0','df8b:333c::df8b:333c:0:0','f48b:333c::f48b:333c:0:0','68c:333c::68c:333c:0:0','28c:333c::28c:333c:0:0','f08b:333c::f08b:333c:0:0','fa8b:333c::fa8b:333c:0:0','88c:333c::88c:333c:0:0','c8c:333c::c8c:333c:0:0','fe8b:333c::fe8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','e98b:333c::e98b:333c:0:0','ed8b:333c::ed8b:333c:0:0','b8c:333c::b8c:333c:0:0','f98b:333c::f98b:333c:0:0','fd8b:333c::fd8b:333c:0:0','de8b:333c::de8b:333c:0:0','f58b:333c::f58b:333c:0:0','78c:333c::78c:333c:0:0','38c:333c::38c:333c:0:0','f18b:333c::f18b:333c:0:0','e58b:333c::e58b:333c:0:0','e18b:333c::e18b:333c:0:0','e68b:333c::e68b:333c:0:0','e28b:333c::e28b:333c:0:0','48c:333c::48c:333c:0:0','dd8b:333c::dd8b:333c:0:0','f68b:333c::f68b:333c:0:0','f28b:333c::f28b:333c:0:0','8c:333c::8c:333c:0:0','a8c:333c::a8c:333c:0:0','f88b:333c::f88b:333c:0:0','fc8b:333c::fc8b:333c:0:0','e8c:333c::e8c:333c:0:0','e88b:333c::e88b:333c:0:0','ec8b:333c::ec8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','98c:333c::98c:333c:0:0','d8c:333c::d8c:333c:0:0','ff8b:333c::ff8b:333c:0:0'] +uniq: 50 +uniqExact: 50 +uniqCombined: 50 +uniqCombined64: 50 +uniqHLL12: 50 +uniqTheta: 50 +----- hash / IfState / ip4 ----- +Row 1: +────── +minIfState: 12535288824949910799 +maxIfState: 18210943739258811465 +first_valueIfState: 12535288824949910799 +last_valueIfState: 18210943739258811465 +topKIfState: 1594227852744382511 +groupArrayIfState: 8025417272361615478 +groupUniqArrayIfState: 919082878249747568 +uniqIfState: 14828781561416784358 +uniqExactIfState: 11391659146320471795 +uniqCombinedIfState: 9631896280254268221 +uniqCombined64IfState: 5156097536649078816 +uniqHLL12IfState: 9696624347265201099 +uniqThetaIfState: 10464560810701154023 +----- hash / IfState / ip6 ----- +Row 1: +────── +minIfState: 9428555662807296659 +maxIfState: 18253481702148601156 +first_valueIfState: 9428555662807296659 +last_valueIfState: 18253481702148601156 +topKIfState: 8045294331733869941 +groupArrayIfState: 10451014709837753966 +groupUniqArrayIfState: 1954028114836070615 +uniqIfState: 14986562136250471284 +uniqExactIfState: 10032843621916709112 +uniqCombinedIfState: 6379274083567016598 +uniqCombined64IfState: 6379274083567016598 +uniqHLL12IfState: 9181286681186915812 +uniqThetaIfState: 2415188383468008881 +----- finalizeAggregation / IfState / ip4 ----- +Row 1: +────── +min: 59.154.201.255 +max: 59.154.202.48 +first_value: 59.154.201.255 +last_value: 59.154.202.48 +topK: ['59.154.202.48','59.154.202.5','59.154.202.26','59.154.202.25','59.154.202.24','59.154.202.23','59.154.202.22','59.154.202.21','59.154.202.27','59.154.202.19'] +groupArray: ['59.154.201.255','59.154.202.0','59.154.202.1','59.154.202.2','59.154.202.3','59.154.202.4','59.154.202.5','59.154.202.6','59.154.202.7','59.154.202.8','59.154.202.9','59.154.202.10','59.154.202.11','59.154.202.12','59.154.202.13','59.154.202.14','59.154.202.15','59.154.202.16','59.154.202.17','59.154.202.18','59.154.202.19','59.154.202.20','59.154.202.21','59.154.202.22','59.154.202.23','59.154.202.24','59.154.202.25','59.154.202.26','59.154.202.27','59.154.202.28','59.154.202.29','59.154.202.30','59.154.202.31','59.154.202.32','59.154.202.33','59.154.202.34','59.154.202.35','59.154.202.36','59.154.202.37','59.154.202.38','59.154.202.39','59.154.202.40','59.154.202.41','59.154.202.42','59.154.202.43','59.154.202.44','59.154.202.45','59.154.202.46','59.154.202.47','59.154.202.48'] +groupUniqArray: ['59.154.202.28','59.154.202.45','59.154.202.35','59.154.202.2','59.154.202.42','59.154.202.1','59.154.202.4','59.154.202.15','59.154.202.22','59.154.202.20','59.154.202.12','59.154.202.3','59.154.202.40','59.154.202.43','59.154.202.26','59.154.202.37','59.154.202.7','59.154.202.36','59.154.202.32','59.154.202.47','59.154.202.17','59.154.202.11','59.154.201.255','59.154.202.0','59.154.202.14','59.154.202.25','59.154.202.6','59.154.202.30','59.154.202.16','59.154.202.21','59.154.202.23','59.154.202.38','59.154.202.44','59.154.202.39','59.154.202.48','59.154.202.41','59.154.202.27','59.154.202.33','59.154.202.19','59.154.202.5','59.154.202.9','59.154.202.18','59.154.202.24','59.154.202.34','59.154.202.46','59.154.202.8','59.154.202.29','59.154.202.10','59.154.202.13','59.154.202.31'] +uniq: 50 +uniqExact: 50 +uniqCombined: 50 +uniqCombined64: 50 +uniqHLL12: 49 +uniqTheta: 50 +----- finalizeAggregation / IfState / ip6 ----- +Row 1: +────── +min: 8c:333c::8c:333c:0:0 +max: ff8b:333c::ff8b:333c:0:0 +first_value: 8c:333c::8c:333c:0:0 +last_value: ff8b:333c::ff8b:333c:0:0 +topK: ['ff8b:333c::ff8b:333c:0:0','68c:333c::68c:333c:0:0','e98b:333c::e98b:333c:0:0','e88b:333c::e88b:333c:0:0','e78b:333c::e78b:333c:0:0','e68b:333c::e68b:333c:0:0','e58b:333c::e58b:333c:0:0','e48b:333c::e48b:333c:0:0','ea8b:333c::ea8b:333c:0:0','e28b:333c::e28b:333c:0:0'] +groupArray: ['8c:333c::8c:333c:0:0','18c:333c::18c:333c:0:0','28c:333c::28c:333c:0:0','38c:333c::38c:333c:0:0','48c:333c::48c:333c:0:0','58c:333c::58c:333c:0:0','68c:333c::68c:333c:0:0','78c:333c::78c:333c:0:0','88c:333c::88c:333c:0:0','98c:333c::98c:333c:0:0','a8c:333c::a8c:333c:0:0','b8c:333c::b8c:333c:0:0','c8c:333c::c8c:333c:0:0','d8c:333c::d8c:333c:0:0','e8c:333c::e8c:333c:0:0','dd8b:333c::dd8b:333c:0:0','de8b:333c::de8b:333c:0:0','df8b:333c::df8b:333c:0:0','e08b:333c::e08b:333c:0:0','e18b:333c::e18b:333c:0:0','e28b:333c::e28b:333c:0:0','e38b:333c::e38b:333c:0:0','e48b:333c::e48b:333c:0:0','e58b:333c::e58b:333c:0:0','e68b:333c::e68b:333c:0:0','e78b:333c::e78b:333c:0:0','e88b:333c::e88b:333c:0:0','e98b:333c::e98b:333c:0:0','ea8b:333c::ea8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','f08b:333c::f08b:333c:0:0','f18b:333c::f18b:333c:0:0','f28b:333c::f28b:333c:0:0','f38b:333c::f38b:333c:0:0','f48b:333c::f48b:333c:0:0','f58b:333c::f58b:333c:0:0','f68b:333c::f68b:333c:0:0','f78b:333c::f78b:333c:0:0','f88b:333c::f88b:333c:0:0','f98b:333c::f98b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','ff8b:333c::ff8b:333c:0:0'] +groupUniqArray: ['58c:333c::58c:333c:0:0','f78b:333c::f78b:333c:0:0','f38b:333c::f38b:333c:0:0','18c:333c::18c:333c:0:0','e78b:333c::e78b:333c:0:0','e38b:333c::e38b:333c:0:0','e48b:333c::e48b:333c:0:0','e08b:333c::e08b:333c:0:0','df8b:333c::df8b:333c:0:0','f48b:333c::f48b:333c:0:0','68c:333c::68c:333c:0:0','28c:333c::28c:333c:0:0','f08b:333c::f08b:333c:0:0','fa8b:333c::fa8b:333c:0:0','88c:333c::88c:333c:0:0','c8c:333c::c8c:333c:0:0','fe8b:333c::fe8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','e98b:333c::e98b:333c:0:0','ed8b:333c::ed8b:333c:0:0','b8c:333c::b8c:333c:0:0','f98b:333c::f98b:333c:0:0','fd8b:333c::fd8b:333c:0:0','de8b:333c::de8b:333c:0:0','f58b:333c::f58b:333c:0:0','78c:333c::78c:333c:0:0','38c:333c::38c:333c:0:0','f18b:333c::f18b:333c:0:0','e58b:333c::e58b:333c:0:0','e18b:333c::e18b:333c:0:0','e68b:333c::e68b:333c:0:0','e28b:333c::e28b:333c:0:0','48c:333c::48c:333c:0:0','dd8b:333c::dd8b:333c:0:0','f68b:333c::f68b:333c:0:0','f28b:333c::f28b:333c:0:0','8c:333c::8c:333c:0:0','a8c:333c::a8c:333c:0:0','f88b:333c::f88b:333c:0:0','fc8b:333c::fc8b:333c:0:0','e8c:333c::e8c:333c:0:0','e88b:333c::e88b:333c:0:0','ec8b:333c::ec8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','98c:333c::98c:333c:0:0','d8c:333c::d8c:333c:0:0','ff8b:333c::ff8b:333c:0:0'] +uniq: 50 +uniqExact: 50 +uniqCombined: 50 +uniqCombined64: 50 +uniqHLL12: 50 +uniqTheta: 50 +----- Arg / hash / State / ip4 ----- +Row 1: +────── +argMinState: 13774589216353164344 +argMaxState: 9177365218111013695 +----- Arg / hash / State / ip6 ----- +Row 1: +────── +argMinState: 7320668278649617037 +argMaxState: 16598449636475438091 +----- Arg / finalizeAggregation / State / ip4 ----- +Row 1: +────── +argMinState: 59.154.201.255 +argMaxState: 59.154.202.48 +----- Arg / finalizeAggregation / State / ip6 ----- +Row 1: +────── +argMinState: 8c:333c::8c:333c:0:0 +argMaxState: ff8b:333c::ff8b:333c:0:0 +----- hash / State / ip4 ----- +Row 1: +────── +anyState: 12535288824949910799 +anyHeavyState: 9327034461443333306 +anyLastState: 12535288824949910799 +----- hash / State / ip6 ----- +Row 1: +────── +anyState: 1383994153676807399 +anyHeavyState: 15555709096566410627 +anyLastState: 1383994153676807399 +----- finalizeAggregation / State / ip4 ----- +Row 1: +────── +any: 59.154.201.255 +anyHeavy: 59.154.201.255 +anyLast: 59.154.201.255 +----- finalizeAggregation / State / ip6 ----- +Row 1: +────── +any: dd8b:333c::dd8b:333c:0:0 +anyHeavy: dd8b:333c::dd8b:333c:0:0 +anyLast: dd8b:333c::dd8b:333c:0:0 diff --git a/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.sql.j2 b/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.sql.j2 new file mode 100644 index 00000000000..133d5287fdb --- /dev/null +++ b/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.sql.j2 @@ -0,0 +1,123 @@ +{# this test checks backward compatibility of aggregate functions States against IPv4, IPv6 types #} + +{% set ip4_generator = "select number::UInt32::IPv4 ip from numbers(999999999,50) order by ip" %} +{% set ip6_generator = "SELECT toIPv6(IPv6NumToString(toFixedString(reinterpretAsFixedString(number)||reinterpretAsFixedString(number), 16))) AS ip FROM numbers(1010011101, 50) order by ip" %} +{% set ip_generators = {'ip4': ip4_generator, 'ip6': ip6_generator} %} + + +{% set agg_func_list = [ "min", "max", "first_value", "last_value", "topK", "groupArray", "groupUniqArray", "uniq", "uniqExact", "uniqCombined", "uniqCombined64", "uniqHLL12", "uniqTheta" ] %} + +{% for generator_name, ip_generator in ip_generators.items() %} + +select '----- hash / State / {{ generator_name }} -----'; +select + {% for func in agg_func_list -%} + cityHash64(hex( {{ func }}State(ip) )) AS {{ func }}State{{ "," if not loop.last }} + {% endfor -%} +from ( {{ ip_generator }} ) format Vertical; + +{% endfor -%} + + + +{% for generator_name, ip_generator in ip_generators.items() %} + +select '----- finalizeAggregation / State / {{ generator_name }} -----'; +select + {% for func in agg_func_list -%} + finalizeAggregation( {{ func }}State(ip) ) AS {{ func }}{{ "," if not loop.last }} + {% endfor -%} +from ( {{ ip_generator }} ) format Vertical; + +{% endfor -%} + + + +{% for generator_name, ip_generator in ip_generators.items() %} + +select '----- hash / IfState / {{ generator_name }} -----'; +select + {% for func in agg_func_list -%} + cityHash64(hex( {{ func }}IfState(ip, 1) )) AS {{ func }}IfState{{ "," if not loop.last }} + {% endfor -%} +from ( {{ ip_generator }} ) format Vertical; + +{% endfor -%} + + + +{% for generator_name, ip_generator in ip_generators.items() %} + +select '----- finalizeAggregation / IfState / {{ generator_name }} -----'; +select + {% for func in agg_func_list -%} + finalizeAggregation( {{ func }}IfState(ip, 1) ) AS {{ func }}{{ "," if not loop.last }} + {% endfor -%} +from ( {{ ip_generator }} ) format Vertical; + +{% endfor -%} + + + + +{% set agg_func_list = [ "argMin", "argMax" ] %} + +{% for generator_name, ip_generator in ip_generators.items() %} + +select '----- Arg / hash / State / {{ generator_name }} -----'; +select + {% for func in agg_func_list -%} + cityHash64(hex( {{ func }}State(ip, ip) )) AS {{ func }}State{{ "," if not loop.last }} + {% endfor -%} +from ( {{ ip_generator }} ) format Vertical; + +{% endfor -%} + + + + +{% for generator_name, ip_generator in ip_generators.items() %} + +select '----- Arg / finalizeAggregation / State / {{ generator_name }} -----'; +select + {% for func in agg_func_list -%} + finalizeAggregation( {{ func }}State(ip, ip) ) AS {{ func }}State{{ "," if not loop.last }} + {% endfor -%} +from ( {{ ip_generator }} ) format Vertical; + +{% endfor -%} + + +{# let's test functions with not deterministic result against 1 row, to make it deterministic #} +{% set ip4_generator = "select number::UInt32::IPv4 ip from numbers(999999999,1) order by ip" %} +{% set ip6_generator = "SELECT toIPv6(IPv6NumToString(toFixedString(reinterpretAsFixedString(number)||reinterpretAsFixedString(number), 16))) AS ip FROM numbers(1010011101, 1) order by ip" %} + +{% set ip_generators = {'ip4': ip4_generator, 'ip6': ip6_generator} %} + +{% set agg_func_list = [ "any", "anyHeavy", "anyLast" ] %} + + +{% for generator_name, ip_generator in ip_generators.items() %} + +select '----- hash / State / {{ generator_name }} -----'; +select + {% for func in agg_func_list -%} + cityHash64(hex( {{ func }}State(ip) )) AS {{ func }}State{{ "," if not loop.last }} + {% endfor -%} +from ( {{ ip_generator }} ) format Vertical; + +{% endfor -%} + + + +{% for generator_name, ip_generator in ip_generators.items() %} + +select '----- finalizeAggregation / State / {{ generator_name }} -----'; +select + {% for func in agg_func_list -%} + finalizeAggregation( {{ func }}State(ip) ) AS {{ func }}{{ "," if not loop.last }} + {% endfor -%} +from ( {{ ip_generator }} ) format Vertical; + +{% endfor -%} + From 6c48aba69392b68c08a4105d6c5ebd9dbf9392c1 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 8 Jun 2023 20:30:34 +0200 Subject: [PATCH 0253/1997] Fix tests --- tests/integration/helpers/0_common_instance_users.xml | 6 +----- .../test_dictionaries_mysql/configs/users.xml | 1 + tests/integration/test_dictionaries_mysql/test.py | 3 ++- .../test_dictionaries_postgresql/configs/users.xml | 10 ++++++++++ .../test_mysql_database_engine/configs/users.xml | 9 +++++++++ tests/integration/test_mysql_database_engine/test.py | 1 + .../configs/users.d/0a_users_no_default_access.xml | 9 +++++++++ .../test_postgresql_database_engine/configs/users.xml | 9 +++++++++ .../test_postgresql_database_engine/test.py | 2 +- .../configs/users.xml | 7 +++++++ .../test_redirect_url_storage/configs/users.xml | 9 +++++++++ tests/integration/test_redirect_url_storage/test.py | 1 + .../integration/test_storage_mongodb/configs/users.xml | 9 +++++++++ tests/integration/test_storage_mongodb/test.py | 1 + tests/integration/test_storage_mysql/configs/users.xml | 1 + tests/integration/test_storage_mysql/test.py | 1 + .../test_storage_postgresql/configs/users.xml | 9 +++++++++ tests/integration/test_storage_postgresql/test.py | 2 +- tests/integration/test_storage_s3/configs/users.xml | 9 +++++++++ tests/integration/test_storage_s3/test.py | 2 +- .../test_table_function_mongodb/configs/users.xml | 9 +++++++++ tests/integration/test_table_function_mongodb/test.py | 1 + 22 files changed, 102 insertions(+), 9 deletions(-) create mode 100644 tests/integration/test_dictionaries_postgresql/configs/users.xml create mode 100644 tests/integration/test_mysql_database_engine/configs/users.xml create mode 100644 tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml create mode 100644 tests/integration/test_postgresql_database_engine/configs/users.xml create mode 100644 tests/integration/test_redirect_url_storage/configs/users.xml create mode 100644 tests/integration/test_storage_mongodb/configs/users.xml create mode 100644 tests/integration/test_storage_postgresql/configs/users.xml create mode 100644 tests/integration/test_storage_s3/configs/users.xml create mode 100644 tests/integration/test_table_function_mongodb/configs/users.xml diff --git a/tests/integration/helpers/0_common_instance_users.xml b/tests/integration/helpers/0_common_instance_users.xml index 6aae12400fd..3399ef5915a 100644 --- a/tests/integration/helpers/0_common_instance_users.xml +++ b/tests/integration/helpers/0_common_instance_users.xml @@ -1,11 +1,7 @@ - - GRANT ACCESS MANAGEMENT ON *.* WITH GRANT OPTION - GRANT ALL ON *.* WITH GRANT OPTION - GRANT USE NAMED COLLECTION ON * WITH GRANT OPTION - + 1 diff --git a/tests/integration/test_dictionaries_mysql/configs/users.xml b/tests/integration/test_dictionaries_mysql/configs/users.xml index 4555a2ed494..70c7d3bc2c1 100644 --- a/tests/integration/test_dictionaries_mysql/configs/users.xml +++ b/tests/integration/test_dictionaries_mysql/configs/users.xml @@ -12,6 +12,7 @@ default default + 1 diff --git a/tests/integration/test_dictionaries_mysql/test.py b/tests/integration/test_dictionaries_mysql/test.py index a12139a0bea..8252a2fd514 100644 --- a/tests/integration/test_dictionaries_mysql/test.py +++ b/tests/integration/test_dictionaries_mysql/test.py @@ -8,9 +8,10 @@ import logging DICTS = ["configs/dictionaries/mysql_dict1.xml", "configs/dictionaries/mysql_dict2.xml"] CONFIG_FILES = ["configs/remote_servers.xml", "configs/named_collections.xml"] +USER_CONFIGS = ["configs/users.xml"] cluster = ClickHouseCluster(__file__) instance = cluster.add_instance( - "instance", main_configs=CONFIG_FILES, with_mysql=True, dictionaries=DICTS + "instance", main_configs=CONFIG_FILES, user_configs=USER_CONFIGS, with_mysql=True, dictionaries=DICTS ) create_table_mysql_template = """ diff --git a/tests/integration/test_dictionaries_postgresql/configs/users.xml b/tests/integration/test_dictionaries_postgresql/configs/users.xml new file mode 100644 index 00000000000..beb08eb6ed4 --- /dev/null +++ b/tests/integration/test_dictionaries_postgresql/configs/users.xml @@ -0,0 +1,10 @@ + + + + + default + default + 1 + + + diff --git a/tests/integration/test_mysql_database_engine/configs/users.xml b/tests/integration/test_mysql_database_engine/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_mysql_database_engine/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index 52a7b319551..18dde5307fd 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -12,6 +12,7 @@ cluster = ClickHouseCluster(__file__) clickhouse_node = cluster.add_instance( "node1", main_configs=["configs/remote_servers.xml", "configs/named_collections.xml"], + user_configs=["configs/users.xml"], with_mysql=True, stay_alive=True, ) diff --git a/tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml b/tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml new file mode 100644 index 00000000000..b8f38f04ca9 --- /dev/null +++ b/tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml @@ -0,0 +1,9 @@ + + + + + default + default + + + diff --git a/tests/integration/test_postgresql_database_engine/configs/users.xml b/tests/integration/test_postgresql_database_engine/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_postgresql_database_engine/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_postgresql_database_engine/test.py b/tests/integration/test_postgresql_database_engine/test.py index d9f06f0295b..68e6f444f73 100644 --- a/tests/integration/test_postgresql_database_engine/test.py +++ b/tests/integration/test_postgresql_database_engine/test.py @@ -8,7 +8,7 @@ from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/named_collections.xml"], with_postgres=True + "node1", main_configs=["configs/named_collections.xml"], user_configs=["configs/users.xml"], with_postgres=True ) postgres_table_template = """ diff --git a/tests/integration/test_postgresql_replica_database_engine_2/configs/users.xml b/tests/integration/test_postgresql_replica_database_engine_2/configs/users.xml index 26ea20e012f..e0c51962193 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/configs/users.xml +++ b/tests/integration/test_postgresql_replica_database_engine_2/configs/users.xml @@ -4,4 +4,11 @@ 1 + + + + default + 1 + + diff --git a/tests/integration/test_redirect_url_storage/configs/users.xml b/tests/integration/test_redirect_url_storage/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_redirect_url_storage/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_redirect_url_storage/test.py b/tests/integration/test_redirect_url_storage/test.py index b2178655444..225a34c9109 100644 --- a/tests/integration/test_redirect_url_storage/test.py +++ b/tests/integration/test_redirect_url_storage/test.py @@ -9,6 +9,7 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( "node1", main_configs=["configs/named_collections.xml"], + user_configs=["configs/user.xml"], with_zookeeper=False, with_hdfs=True, ) diff --git a/tests/integration/test_storage_mongodb/configs/users.xml b/tests/integration/test_storage_mongodb/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_mongodb/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_mongodb/test.py b/tests/integration/test_storage_mongodb/test.py index 6ba5520704d..174ad908d60 100644 --- a/tests/integration/test_storage_mongodb/test.py +++ b/tests/integration/test_storage_mongodb/test.py @@ -17,6 +17,7 @@ def started_cluster(request): "configs_secure/config.d/ssl_conf.xml", "configs/named_collections.xml", ], + user_configs=["configs/users.xml"], with_mongo=True, with_mongo_secure=request.param, ) diff --git a/tests/integration/test_storage_mysql/configs/users.xml b/tests/integration/test_storage_mysql/configs/users.xml index d030ccb0e72..a11985dd113 100644 --- a/tests/integration/test_storage_mysql/configs/users.xml +++ b/tests/integration/test_storage_mysql/configs/users.xml @@ -12,6 +12,7 @@ ::/0 default + 1 diff --git a/tests/integration/test_storage_mysql/test.py b/tests/integration/test_storage_mysql/test.py index 49629575ec7..3e3132949e7 100644 --- a/tests/integration/test_storage_mysql/test.py +++ b/tests/integration/test_storage_mysql/test.py @@ -13,6 +13,7 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( "node1", main_configs=["configs/remote_servers.xml", "configs/named_collections.xml"], + user_configs=["configs/users.xml"], with_mysql=True, ) node2 = cluster.add_instance( diff --git a/tests/integration/test_storage_postgresql/configs/users.xml b/tests/integration/test_storage_postgresql/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_postgresql/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index d60a90ed7ce..2ce1bac3cff 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -7,7 +7,7 @@ from helpers.postgres_utility import get_postgres_conn cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/named_collections.xml"], with_postgres=True + "node1", main_configs=["configs/named_collections.xml"], user_configs=["configs/users.xml"], with_postgres=True ) node2 = cluster.add_instance( "node2", diff --git a/tests/integration/test_storage_s3/configs/users.xml b/tests/integration/test_storage_s3/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_s3/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index f1cbd3366b4..75473f3c406 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -55,7 +55,7 @@ def started_cluster(): "configs/named_collections.xml", "configs/schema_cache.xml", ], - user_configs=["configs/access.xml"], + user_configs=["configs/access.xml", "configs/users.xml"], ) cluster.add_instance( "s3_max_redirects", diff --git a/tests/integration/test_table_function_mongodb/configs/users.xml b/tests/integration/test_table_function_mongodb/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_table_function_mongodb/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_table_function_mongodb/test.py b/tests/integration/test_table_function_mongodb/test.py index e0ad71b0079..3b6ace9d11b 100644 --- a/tests/integration/test_table_function_mongodb/test.py +++ b/tests/integration/test_table_function_mongodb/test.py @@ -16,6 +16,7 @@ def started_cluster(request): main_configs=[ "configs_secure/config.d/ssl_conf.xml", ], + user_configs=["configs/users.xml"], with_mongo_secure=request.param, ) cluster.start() From c96989ca14415707e5dc37958ca36093e8292f46 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 8 Jun 2023 19:37:52 +0000 Subject: [PATCH 0254/1997] no-fasttest because uniqTheta --- .../02751_ip_types_aggregate_functions_states.sql.j2 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.sql.j2 b/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.sql.j2 index 133d5287fdb..708eeab7724 100644 --- a/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.sql.j2 +++ b/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.sql.j2 @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest because uniqTheta + {# this test checks backward compatibility of aggregate functions States against IPv4, IPv6 types #} {% set ip4_generator = "select number::UInt32::IPv4 ip from numbers(999999999,50) order by ip" %} From 5cf29fbf762e0efc51142afb3396a16414c121fc Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 9 Jun 2023 13:13:33 +0200 Subject: [PATCH 0255/1997] Fix black check --- tests/integration/test_dictionaries_mysql/test.py | 6 +++++- tests/integration/test_postgresql_database_engine/test.py | 5 ++++- tests/integration/test_storage_postgresql/test.py | 5 ++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_dictionaries_mysql/test.py b/tests/integration/test_dictionaries_mysql/test.py index 8252a2fd514..ee0d957b8a9 100644 --- a/tests/integration/test_dictionaries_mysql/test.py +++ b/tests/integration/test_dictionaries_mysql/test.py @@ -11,7 +11,11 @@ CONFIG_FILES = ["configs/remote_servers.xml", "configs/named_collections.xml"] USER_CONFIGS = ["configs/users.xml"] cluster = ClickHouseCluster(__file__) instance = cluster.add_instance( - "instance", main_configs=CONFIG_FILES, user_configs=USER_CONFIGS, with_mysql=True, dictionaries=DICTS + "instance", + main_configs=CONFIG_FILES, + user_configs=USER_CONFIGS, + with_mysql=True, + dictionaries=DICTS, ) create_table_mysql_template = """ diff --git a/tests/integration/test_postgresql_database_engine/test.py b/tests/integration/test_postgresql_database_engine/test.py index 68e6f444f73..59a464f9020 100644 --- a/tests/integration/test_postgresql_database_engine/test.py +++ b/tests/integration/test_postgresql_database_engine/test.py @@ -8,7 +8,10 @@ from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/named_collections.xml"], user_configs=["configs/users.xml"], with_postgres=True + "node1", + main_configs=["configs/named_collections.xml"], + user_configs=["configs/users.xml"], + with_postgres=True, ) postgres_table_template = """ diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 2ce1bac3cff..0c8fc597b5c 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -7,7 +7,10 @@ from helpers.postgres_utility import get_postgres_conn cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/named_collections.xml"], user_configs=["configs/users.xml"], with_postgres=True + "node1", + main_configs=["configs/named_collections.xml"], + user_configs=["configs/users.xml"], + with_postgres=True, ) node2 = cluster.add_instance( "node2", From 056ca4f555fbbf4463de5be8642a2c01b6759192 Mon Sep 17 00:00:00 2001 From: jinjunzh Date: Wed, 24 May 2023 13:26:15 -0400 Subject: [PATCH 0256/1997] Add extensive testing cases for deflate qpl codec --- .../sql-reference/statements/create/table.md | 2 +- src/Client/Connection.cpp | 2 +- src/Compression/CompressionCodecDeflateQpl.h | 3 +- src/Compression/CompressionFactory.h | 4 +- .../CompressionFactoryAdditions.cpp | 14 ++-- src/Compression/ICompressionCodec.h | 3 + src/Core/Settings.h | 1 + src/Interpreters/InterpreterCreateQuery.cpp | 3 +- src/Server/TCPHandler.cpp | 2 +- src/Storages/AlterCommands.cpp | 8 +-- src/Storages/ColumnsDescription.cpp | 2 +- src/Storages/Distributed/DistributedSink.cpp | 2 +- src/Storages/TTLDescription.cpp | 2 +- .../deflateqpl_compression_by_default.xml | 11 ++++ .../configs/enable_deflateqpl_codec.xml | 7 ++ .../test_non_default_compression/test.py | 65 ++++++++++++++++++- ...04_test_alter_compression_codecs.reference | 31 ++++++--- .../00804_test_alter_compression_codecs.sql | 28 +++++--- 18 files changed, 153 insertions(+), 37 deletions(-) create mode 100644 tests/integration/test_non_default_compression/configs/deflateqpl_compression_by_default.xml create mode 100644 tests/integration/test_non_default_compression/configs/enable_deflateqpl_codec.xml diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index de44a001472..b0865ad2896 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -380,7 +380,7 @@ High compression levels are useful for asymmetric scenarios, like compress once, `DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library. Some limitations apply: -- DEFLATE_QPL is experimental and can only be used after setting configuration parameter `allow_experimental_codecs=1`. +- DEFLATE_QPL is disabled by default and can only be used after setting configuration parameter `enable_qpl_deflate=1`. - DEFLATE_QPL requires a ClickHouse build compiled with SSE 4.2 instructions (by default, this is the case). Refer to [Build Clickhouse with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Build-Clickhouse-with-DEFLATE_QPL) for more details. - DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device. Refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) and [Benchmark with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Run-Benchmark-with-DEFLATE_QPL) for more details. - DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with SSE 4.2 enabled. diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 2350a5039ab..68bc3b39a56 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -588,7 +588,7 @@ void Connection::sendQuery( if (method == "ZSTD") level = settings->network_zstd_compression_level; - CompressionCodecFactory::instance().validateCodec(method, level, !settings->allow_suspicious_codecs, settings->allow_experimental_codecs); + CompressionCodecFactory::instance().validateCodec(method, level, !settings->allow_suspicious_codecs, settings->allow_experimental_codecs, settings->enable_qpl_deflate); compression_codec = CompressionCodecFactory::instance().get(method, level); } else diff --git a/src/Compression/CompressionCodecDeflateQpl.h b/src/Compression/CompressionCodecDeflateQpl.h index 7a1a764295d..13aa8733b54 100644 --- a/src/Compression/CompressionCodecDeflateQpl.h +++ b/src/Compression/CompressionCodecDeflateQpl.h @@ -98,7 +98,8 @@ public: protected: bool isCompression() const override { return true; } bool isGenericCompression() const override { return true; } - bool isExperimental() const override { return true; } + bool isExperimental() const override { return false; } + bool isDeflateQplCompression() const override { return true; } UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; diff --git a/src/Compression/CompressionFactory.h b/src/Compression/CompressionFactory.h index a4451f9ed2e..1fdaf4f1c71 100644 --- a/src/Compression/CompressionFactory.h +++ b/src/Compression/CompressionFactory.h @@ -40,10 +40,10 @@ public: CompressionCodecPtr getDefaultCodec() const; /// Validate codecs AST specified by user and parses codecs description (substitute default parameters) - ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs) const; + ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_qpl_deflate) const; /// Validate codecs AST specified by user - void validateCodec(const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs) const; + void validateCodec(const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_qpl_deflate) const; /// Get codec by AST and possible column_type. Some codecs can use /// information about type to improve inner settings, but every codec should diff --git a/src/Compression/CompressionFactoryAdditions.cpp b/src/Compression/CompressionFactoryAdditions.cpp index 978a0fe5069..2630326238a 100644 --- a/src/Compression/CompressionFactoryAdditions.cpp +++ b/src/Compression/CompressionFactoryAdditions.cpp @@ -34,7 +34,7 @@ namespace ErrorCodes void CompressionCodecFactory::validateCodec( - const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs) const + const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_qpl_deflate) const { if (family_name.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Compression codec name cannot be empty"); @@ -43,13 +43,13 @@ void CompressionCodecFactory::validateCodec( { auto literal = std::make_shared(static_cast(*level)); validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", makeASTFunction(Poco::toUpper(family_name), literal)), - {}, sanity_check, allow_experimental_codecs); + {}, sanity_check, allow_experimental_codecs, enable_qpl_deflate); } else { auto identifier = std::make_shared(Poco::toUpper(family_name)); validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", identifier), - {}, sanity_check, allow_experimental_codecs); + {}, sanity_check, allow_experimental_codecs, enable_qpl_deflate); } } @@ -77,7 +77,7 @@ bool innerDataTypeIsFloat(const DataTypePtr & type) } ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( - const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs) const + const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_qpl_deflate) const { if (const auto * func = ast->as()) { @@ -159,6 +159,12 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( " You can enable it with the 'allow_experimental_codecs' setting.", codec_family_name); + if (!enable_qpl_deflate && result_codec->isDeflateQplCompression()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Codec {} is disabled by default." + " You can enable it with the 'enable_qpl_deflate' setting.", + codec_family_name); + codecs_descriptions->children.emplace_back(result_codec->getCodecDesc()); } diff --git a/src/Compression/ICompressionCodec.h b/src/Compression/ICompressionCodec.h index 44835ac19cb..d92ad3fc718 100644 --- a/src/Compression/ICompressionCodec.h +++ b/src/Compression/ICompressionCodec.h @@ -112,6 +112,9 @@ public: /// If it does nothing. virtual bool isNone() const { return false; } + /// This is a knob for Deflate QPL codec. + virtual bool isDeflateQplCompression() const { return false; } + protected: /// This is used for fuzz testing friend int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 464b9168a4c..c6a2069e6ae 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -319,6 +319,7 @@ class IColumn; M(Bool, allow_distributed_ddl, true, "If it is set to true, then a user is allowed to executed distributed DDL queries.", 0) \ M(Bool, allow_suspicious_codecs, false, "If it is set to true, allow to specify meaningless compression codecs.", 0) \ M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \ + M(Bool, enable_qpl_deflate, false, "If it is set to true, allow to use deflate_qpl for compression.", 0) \ M(UInt64, query_profiler_real_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, "Period for real clock timer of query profiler (in nanoseconds). Set 0 value to turn off the real clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(UInt64, query_profiler_cpu_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, "Period for CPU clock timer of query profiler (in nanoseconds). Set 0 value to turn off the CPU clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(Bool, metrics_perf_events_enabled, false, "If enabled, some of the perf events will be measured throughout queries' execution.", 0) \ diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ab9e1fb04d6..5c22b46b360 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -571,6 +571,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( bool sanity_check_compression_codecs = !attach && !context_->getSettingsRef().allow_suspicious_codecs; bool allow_experimental_codecs = attach || context_->getSettingsRef().allow_experimental_codecs; + bool enable_qpl_deflate = attach || context_->getSettingsRef().enable_qpl_deflate; ColumnsDescription res; auto name_type_it = column_names_and_types.begin(); @@ -631,7 +632,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( if (col_decl.default_specifier == "ALIAS") throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot specify codec for column type ALIAS"); column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST( - col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs); + col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_qpl_deflate); } if (col_decl.ttl) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 1ded7d97248..96c585e7d16 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1775,7 +1775,7 @@ void TCPHandler::initBlockOutput(const Block & block) if (state.compression == Protocol::Compression::Enable) { - CompressionCodecFactory::instance().validateCodec(method, level, !query_settings.allow_suspicious_codecs, query_settings.allow_experimental_codecs); + CompressionCodecFactory::instance().validateCodec(method, level, !query_settings.allow_suspicious_codecs, query_settings.allow_experimental_codecs, query_settings.enable_qpl_deflate); state.maybe_compressed_out = std::make_shared( *out, CompressionCodecFactory::instance().get(method, level)); diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 5fd823b9e01..ecbddfc3e2a 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -388,7 +388,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) column.comment = *comment; if (codec) - column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type, false, true); + column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type, false, true, true); column.ttl = ttl; @@ -429,7 +429,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) else { if (codec) - column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type ? data_type : column.type, false, true); + column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type ? data_type : column.type, false, true, true); if (comment) column.comment = *comment; @@ -1067,7 +1067,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const "this column name is reserved for lightweight delete feature", backQuote(column_name)); if (command.codec) - CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs); + CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_qpl_deflate); all_columns.add(ColumnDescription(column_name, command.data_type)); } @@ -1093,7 +1093,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const { if (all_columns.hasAlias(column_name)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot specify codec for column type ALIAS"); - CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs); + CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_qpl_deflate); } auto column_default = all_columns.getDefault(column_name); if (column_default) diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 8eabae7929c..045afd7e6e6 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -130,7 +130,7 @@ void ColumnDescription::readText(ReadBuffer & buf) comment = col_ast->comment->as().value.get(); if (col_ast->codec) - codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(col_ast->codec, type, false, true); + codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(col_ast->codec, type, false, true, true); if (col_ast->ttl) ttl = col_ast->ttl; diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 720a951299a..ce1dbde8eae 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -733,7 +733,7 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const if (compression_method == "ZSTD") compression_level = settings.network_zstd_compression_level; - CompressionCodecFactory::instance().validateCodec(compression_method, compression_level, !settings.allow_suspicious_codecs, settings.allow_experimental_codecs); + CompressionCodecFactory::instance().validateCodec(compression_method, compression_level, !settings.allow_suspicious_codecs, settings.allow_experimental_codecs, settings.enable_qpl_deflate); CompressionCodecPtr compression_codec = CompressionCodecFactory::instance().get(compression_method, compression_level); /// tmp directory is used to ensure atomicity of transactions diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index e1a80800630..f5209cbdff6 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -285,7 +285,7 @@ TTLDescription TTLDescription::getTTLFromAST( { result.recompression_codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST( - ttl_element->recompression_codec, {}, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs); + ttl_element->recompression_codec, {}, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_qpl_deflate); } } diff --git a/tests/integration/test_non_default_compression/configs/deflateqpl_compression_by_default.xml b/tests/integration/test_non_default_compression/configs/deflateqpl_compression_by_default.xml new file mode 100644 index 00000000000..2ad6a0f1eff --- /dev/null +++ b/tests/integration/test_non_default_compression/configs/deflateqpl_compression_by_default.xml @@ -0,0 +1,11 @@ + + + + + 0 + 0 + + deflate_qpl + + + diff --git a/tests/integration/test_non_default_compression/configs/enable_deflateqpl_codec.xml b/tests/integration/test_non_default_compression/configs/enable_deflateqpl_codec.xml new file mode 100644 index 00000000000..46e9e43ca27 --- /dev/null +++ b/tests/integration/test_non_default_compression/configs/enable_deflateqpl_codec.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_non_default_compression/test.py b/tests/integration/test_non_default_compression/test.py index e0a67a5db95..e69b32daae0 100644 --- a/tests/integration/test_non_default_compression/test.py +++ b/tests/integration/test_non_default_compression/test.py @@ -41,7 +41,14 @@ node6 = cluster.add_instance( main_configs=["configs/allow_experimental_codecs.xml"], user_configs=["configs/allow_suspicious_codecs.xml"], ) - +node7 = cluster.add_instance( + "node7", + main_configs=["configs/deflateqpl_compression_by_default.xml"], + user_configs=[ + "configs/enable_deflateqpl_codec.xml", + "configs/allow_suspicious_codecs.xml", + ], +) @pytest.fixture(scope="module") def start_cluster(): @@ -244,3 +251,59 @@ def test_uncompressed_cache_plus_zstd_codec(start_cluster): ) == "10000\n" ) + +def test_preconfigured_deflateqpl_codec(start_cluster): + node7.query( + """ + CREATE TABLE compression_codec_multiple_with_key ( + somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12), DEFLATE_QPL), + id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC, DEFLATE_QPL), + data String CODEC(ZSTD(2), LZ4HC, NONE, LZ4, LZ4, DEFLATE_QPL), + somecolumn Float64 + ) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2; + """ + ) + node7.query( + "INSERT INTO compression_codec_multiple_with_key VALUES(toDate('2018-10-12'), 100000, 'hello', 88.88), (toDate('2018-10-12'), 100002, 'world', 99.99), (toDate('2018-10-12'), 1111, '!', 777.777)" + ) + assert ( + node7.query( + "SELECT COUNT(*) FROM compression_codec_multiple_with_key WHERE id % 2 == 0" + ) + == "2\n" + ) + assert ( + node7.query( + "SELECT DISTINCT somecolumn FROM compression_codec_multiple_with_key ORDER BY id" + ) + == "777.777\n88.88\n99.99\n" + ) + assert ( + node7.query( + "SELECT data FROM compression_codec_multiple_with_key WHERE id >= 1112 AND somedate = toDate('2018-10-12') AND somecolumn <= 100" + ) + == "hello\nworld\n" + ) + + node7.query( + "INSERT INTO compression_codec_multiple_with_key SELECT toDate('2018-10-12'), number, toString(number), 1.0 FROM system.numbers LIMIT 10000" + ) + + assert ( + node7.query( + "SELECT COUNT(id) FROM compression_codec_multiple_with_key WHERE id % 10 == 0" + ) + == "1001\n" + ) + assert ( + node7.query( + "SELECT SUM(somecolumn) FROM compression_codec_multiple_with_key" + ) + == str(777.777 + 88.88 + 99.99 + 1.0 * 10000) + "\n" + ) + assert ( + node7.query( + "SELECT count(*) FROM compression_codec_multiple_with_key GROUP BY somedate" + ) + == "10003\n" + ) diff --git a/tests/queries/0_stateless/00804_test_alter_compression_codecs.reference b/tests/queries/0_stateless/00804_test_alter_compression_codecs.reference index cfbfadf1e67..a6afe11126c 100644 --- a/tests/queries/0_stateless/00804_test_alter_compression_codecs.reference +++ b/tests/queries/0_stateless/00804_test_alter_compression_codecs.reference @@ -12,13 +12,7 @@ CODEC(NONE) 2018-01-01 4 4 2018-01-01 5 5 2018-01-01 6 6 -2018-01-01 1 default_value -2018-01-01 2 default_value -2018-01-01 3 3 -2018-01-01 4 4 -2018-01-01 5 5 -2018-01-01 6 6 -CODEC(ZSTD(1), LZ4HC(0), LZ4, LZ4, NONE) +CODEC(DEFLATE_QPL) 2018-01-01 1 default_value 2018-01-01 2 default_value 2018-01-01 3 3 @@ -27,7 +21,26 @@ CODEC(ZSTD(1), LZ4HC(0), LZ4, LZ4, NONE) 2018-01-01 6 6 2018-01-01 7 7 2018-01-01 8 8 -CODEC(ZSTD(1), LZ4HC(0), LZ4, LZ4, NONE) -CODEC(NONE, LZ4, LZ4HC(0), ZSTD(1)) +2018-01-01 1 default_value +2018-01-01 2 default_value +2018-01-01 3 3 +2018-01-01 4 4 +2018-01-01 5 5 +2018-01-01 6 6 +2018-01-01 7 7 +2018-01-01 8 8 +CODEC(ZSTD(1), LZ4HC(0), LZ4, LZ4, DEFLATE_QPL, NONE) +2018-01-01 1 default_value +2018-01-01 2 default_value +2018-01-01 3 3 +2018-01-01 4 4 +2018-01-01 5 5 +2018-01-01 6 6 +2018-01-01 7 7 +2018-01-01 8 8 +2018-01-01 9 9 +2018-01-01 10 10 +CODEC(ZSTD(1), LZ4HC(0), LZ4, LZ4, DEFLATE_QPL, NONE) +CODEC(NONE, LZ4, LZ4HC(0), ZSTD(1), DEFLATE_QPL) 2 1 diff --git a/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql b/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql index 85e5f8b63ad..40a8bb4c7cb 100644 --- a/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql +++ b/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql @@ -25,15 +25,23 @@ INSERT INTO alter_compression_codec VALUES('2018-01-01', 5, '5'); INSERT INTO alter_compression_codec VALUES('2018-01-01', 6, '6'); SELECT * FROM alter_compression_codec ORDER BY id; -OPTIMIZE TABLE alter_compression_codec FINAL; -SELECT * FROM alter_compression_codec ORDER BY id; - -SET allow_suspicious_codecs = 1; -ALTER TABLE alter_compression_codec MODIFY COLUMN alter_column CODEC(ZSTD, LZ4HC, LZ4, LZ4, NONE); +SET enable_qpl_deflate = 1; +ALTER TABLE alter_compression_codec MODIFY COLUMN alter_column CODEC(DEFLATE_QPL); SELECT compression_codec FROM system.columns WHERE database = currentDatabase() AND table = 'alter_compression_codec' AND name = 'alter_column'; INSERT INTO alter_compression_codec VALUES('2018-01-01', 7, '7'); INSERT INTO alter_compression_codec VALUES('2018-01-01', 8, '8'); +SELECT * FROM alter_compression_codec ORDER BY id; + +OPTIMIZE TABLE alter_compression_codec FINAL; +SELECT * FROM alter_compression_codec ORDER BY id; + +SET allow_suspicious_codecs = 1; +ALTER TABLE alter_compression_codec MODIFY COLUMN alter_column CODEC(ZSTD, LZ4HC, LZ4, LZ4, DEFLATE_QPL, NONE); +SELECT compression_codec FROM system.columns WHERE database = currentDatabase() AND table = 'alter_compression_codec' AND name = 'alter_column'; + +INSERT INTO alter_compression_codec VALUES('2018-01-01', 9, '9'); +INSERT INTO alter_compression_codec VALUES('2018-01-01', 10, '10'); OPTIMIZE TABLE alter_compression_codec FINAL; SELECT * FROM alter_compression_codec ORDER BY id; @@ -54,15 +62,17 @@ ALTER TABLE alter_bad_codec ADD COLUMN alter_column DateTime DEFAULT '2019-01-01 ALTER TABLE alter_bad_codec ADD COLUMN alter_column DateTime DEFAULT '2019-01-01 00:00:00' CODEC(ZSTD(100)); -- { serverError 433 } +ALTER TABLE alter_bad_codec ADD COLUMN alter_column DateTime DEFAULT '2019-01-01 00:00:00' CODEC(DEFLATE_QPL(100)); -- { serverError 378 } + DROP TABLE IF EXISTS alter_bad_codec; DROP TABLE IF EXISTS large_alter_table_00804; DROP TABLE IF EXISTS store_of_hash_00804; CREATE TABLE large_alter_table_00804 ( - somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12)), - id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC), - data String CODEC(ZSTD(2), LZ4HC, NONE, LZ4, LZ4) + somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12), DEFLATE_QPL), + id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC, DEFLATE_QPL), + data String CODEC(ZSTD(2), LZ4HC, NONE, LZ4, LZ4, DEFLATE_QPL) ) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi', min_bytes_for_wide_part = 0; INSERT INTO large_alter_table_00804 SELECT toDate('2019-01-01'), number, toString(number + rand()) FROM system.numbers LIMIT 300000; @@ -71,7 +81,7 @@ CREATE TABLE store_of_hash_00804 (hash UInt64) ENGINE = Memory(); INSERT INTO store_of_hash_00804 SELECT sum(cityHash64(*)) FROM large_alter_table_00804; -ALTER TABLE large_alter_table_00804 MODIFY COLUMN data CODEC(NONE, LZ4, LZ4HC, ZSTD); +ALTER TABLE large_alter_table_00804 MODIFY COLUMN data CODEC(NONE, LZ4, LZ4HC, ZSTD, DEFLATE_QPL); OPTIMIZE TABLE large_alter_table_00804; From d85bc02388317ed4b2743814bcc217baf1652971 Mon Sep 17 00:00:00 2001 From: jinjunzh Date: Wed, 24 May 2023 15:08:23 -0400 Subject: [PATCH 0257/1997] add function test for deflate_qpl --- ...4_test_custom_compression_codecs.reference | 8 ++-- .../00804_test_custom_compression_codecs.sql | 45 +++++++++++-------- ...m_compression_codes_log_storages.reference | 20 ++++----- ..._custom_compression_codes_log_storages.sql | 41 +++++++++-------- ...st_deflate_qpl_codec_compression.reference | 4 ++ ...804_test_deflate_qpl_codec_compression.sql | 32 +++++++++++++ ...804_test_delta_codec_compression.reference | 2 + .../00804_test_delta_codec_compression.sql | 38 ++++++++++++++++ 8 files changed, 140 insertions(+), 50 deletions(-) create mode 100644 tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference create mode 100644 tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference b/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference index 7bd91e5a69b..a9cbe3d32d3 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference +++ b/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference @@ -1,6 +1,6 @@ -1 hello 2018-12-14 1.1 aaa 5 -2 world 2018-12-15 2.2 bbb 6 -3 ! 2018-12-16 3.3 ccc 7 +1 hello 2018-12-14 1.1 aaa 5 qpl11 11 +2 world 2018-12-15 2.2 bbb 6 qpl22 22 +3 ! 2018-12-16 3.3 ccc 7 qpl33 33 2 1 world 2018-10-05 1.1 2 hello 2018-10-01 2.2 @@ -9,7 +9,7 @@ 10003 274972506.6 9175437371954010821 -CREATE TABLE default.compression_codec_multiple_more_types\n(\n `id` Decimal(38, 13) CODEC(ZSTD(1), LZ4, ZSTD(1), ZSTD(1), Delta(2), Delta(4), Delta(1), LZ4HC(0)),\n `data` FixedString(12) CODEC(ZSTD(1), ZSTD(1), NONE, NONE, NONE, LZ4HC(0)),\n `ddd.age` Array(UInt8) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8)),\n `ddd.Name` Array(String) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.compression_codec_multiple_more_types\n(\n `id` Decimal(38, 13) CODEC(ZSTD(1), LZ4, ZSTD(1), ZSTD(1), Delta(2), Delta(4), Delta(1), LZ4HC(0), DEFLATE_QPL),\n `data` FixedString(12) CODEC(ZSTD(1), ZSTD(1), NONE, NONE, NONE, LZ4HC(0), DEFLATE_QPL),\n `ddd.age` Array(UInt8) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8), DEFLATE_QPL),\n `ddd.Name` Array(String) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8), DEFLATE_QPL)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1.5555555555555 hello world! [77] ['John'] 7.1 xxxxxxxxxxxx [127] ['Henry'] ! diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql b/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql index c080c2fc98e..44a0daada27 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql +++ b/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql @@ -1,5 +1,6 @@ SET send_logs_level = 'fatal'; SET allow_suspicious_codecs = 1; +SET enable_qpl_deflate = 1; DROP TABLE IF EXISTS compression_codec; @@ -9,18 +10,20 @@ CREATE TABLE compression_codec( ddd Date CODEC(NONE), somenum Float64 CODEC(ZSTD(2)), somestr FixedString(3) CODEC(LZ4HC(7)), - othernum Int64 CODEC(Delta) + othernum Int64 CODEC(Delta), + qplstr String CODEC(DEFLATE_QPL), + qplnum UInt32 CODEC(DEFLATE_QPL), ) ENGINE = MergeTree() ORDER BY tuple(); -INSERT INTO compression_codec VALUES(1, 'hello', toDate('2018-12-14'), 1.1, 'aaa', 5); -INSERT INTO compression_codec VALUES(2, 'world', toDate('2018-12-15'), 2.2, 'bbb', 6); -INSERT INTO compression_codec VALUES(3, '!', toDate('2018-12-16'), 3.3, 'ccc', 7); +INSERT INTO compression_codec VALUES(1, 'hello', toDate('2018-12-14'), 1.1, 'aaa', 5, 'qpl11', 11); +INSERT INTO compression_codec VALUES(2, 'world', toDate('2018-12-15'), 2.2, 'bbb', 6,'qpl22', 22); +INSERT INTO compression_codec VALUES(3, '!', toDate('2018-12-16'), 3.3, 'ccc', 7, 'qpl33', 33); SELECT * FROM compression_codec ORDER BY id; OPTIMIZE TABLE compression_codec FINAL; -INSERT INTO compression_codec VALUES(2, '', toDate('2018-12-13'), 4.4, 'ddd', 8); +INSERT INTO compression_codec VALUES(2, '', toDate('2018-12-13'), 4.4, 'ddd', 8, 'qpl44', 44); DETACH TABLE compression_codec; ATTACH TABLE compression_codec; @@ -31,25 +34,31 @@ DROP TABLE IF EXISTS compression_codec; DROP TABLE IF EXISTS bad_codec; DROP TABLE IF EXISTS params_when_no_params; +DROP TABLE IF EXISTS params_when_no_params2; DROP TABLE IF EXISTS too_many_params; DROP TABLE IF EXISTS codec_multiple_direct_specification_1; DROP TABLE IF EXISTS codec_multiple_direct_specification_2; +DROP TABLE IF EXISTS codec_multiple_direct_specification_3; DROP TABLE IF EXISTS delta_bad_params1; DROP TABLE IF EXISTS delta_bad_params2; CREATE TABLE bad_codec(id UInt64 CODEC(adssadads)) ENGINE = MergeTree() order by tuple(); -- { serverError 432 } CREATE TABLE too_many_params(id UInt64 CODEC(ZSTD(2,3,4,5))) ENGINE = MergeTree() order by tuple(); -- { serverError 431 } CREATE TABLE params_when_no_params(id UInt64 CODEC(LZ4(1))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 378 } +CREATE TABLE params_when_no_params2(id UInt64 CODEC(DEFLATE_QPL(1))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 378 } CREATE TABLE codec_multiple_direct_specification_1(id UInt64 CODEC(MULTIPLE(LZ4, ZSTD))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 432 } CREATE TABLE codec_multiple_direct_specification_2(id UInt64 CODEC(multiple(LZ4, ZSTD))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 432 } +CREATE TABLE codec_multiple_direct_specification_3(id UInt64 CODEC(multiple(LZ4, DEFLATE_QPL))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 432 } CREATE TABLE delta_bad_params1(id UInt64 CODEC(Delta(3))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 433 } CREATE TABLE delta_bad_params2(id UInt64 CODEC(Delta(16))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 433 } DROP TABLE IF EXISTS bad_codec; DROP TABLE IF EXISTS params_when_no_params; +DROP TABLE IF EXISTS params_when_no_params2; DROP TABLE IF EXISTS too_many_params; DROP TABLE IF EXISTS codec_multiple_direct_specification_1; DROP TABLE IF EXISTS codec_multiple_direct_specification_2; +DROP TABLE IF EXISTS codec_multiple_direct_specification_3; DROP TABLE IF EXISTS delta_bad_params1; DROP TABLE IF EXISTS delta_bad_params2; @@ -58,10 +67,10 @@ DROP TABLE IF EXISTS compression_codec_multiple; SET network_compression_method = 'lz4hc'; CREATE TABLE compression_codec_multiple ( - id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC, Delta(4)), - data String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC, LZ4, LZ4, Delta(8)), - ddd Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD, LZ4HC, LZ4HC), - somenum Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD) + id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC, Delta(4), DEFLATE_QPL), + data String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC, LZ4, LZ4, Delta(8), DEFLATE_QPL), + ddd Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD, LZ4HC, LZ4HC, DEFLATE_QPL), + somenum Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD, DEFLATE_QPL) ) ENGINE = MergeTree() ORDER BY tuple(); INSERT INTO compression_codec_multiple VALUES (1, 'world', toDate('2018-10-05'), 1.1), (2, 'hello', toDate('2018-10-01'), 2.2), (3, 'buy', toDate('2018-10-11'), 3.3); @@ -85,15 +94,15 @@ SELECT sum(cityHash64(*)) FROM compression_codec_multiple; DROP TABLE IF EXISTS compression_codec_multiple_more_types; CREATE TABLE compression_codec_multiple_more_types ( - id Decimal128(13) CODEC(ZSTD, LZ4, ZSTD, ZSTD, Delta(2), Delta(4), Delta(1), LZ4HC), - data FixedString(12) CODEC(ZSTD, ZSTD, Delta, Delta, Delta, NONE, NONE, NONE, LZ4HC), - ddd Nested (age UInt8, Name String) CODEC(LZ4, LZ4HC, NONE, NONE, NONE, ZSTD, Delta(8)) + id Decimal128(13) CODEC(ZSTD, LZ4, ZSTD, ZSTD, Delta(2), Delta(4), Delta(1), LZ4HC, DEFLATE_QPL), + data FixedString(12) CODEC(ZSTD, ZSTD, Delta, Delta, Delta, NONE, NONE, NONE, LZ4HC, DEFLATE_QPL), + ddd Nested (age UInt8, Name String) CODEC(LZ4, LZ4HC, NONE, NONE, NONE, ZSTD, Delta(8), DEFLATE_QPL) ) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 36 } CREATE TABLE compression_codec_multiple_more_types ( - id Decimal128(13) CODEC(ZSTD, LZ4, ZSTD, ZSTD, Delta(2), Delta(4), Delta(1), LZ4HC), - data FixedString(12) CODEC(ZSTD, ZSTD, NONE, NONE, NONE, LZ4HC), - ddd Nested (age UInt8, Name String) CODEC(LZ4, LZ4HC, NONE, NONE, NONE, ZSTD, Delta(8)) + id Decimal128(13) CODEC(ZSTD, LZ4, ZSTD, ZSTD, Delta(2), Delta(4), Delta(1), LZ4HC, DEFLATE_QPL), + data FixedString(12) CODEC(ZSTD, ZSTD, NONE, NONE, NONE, LZ4HC, DEFLATE_QPL), + ddd Nested (age UInt8, Name String) CODEC(LZ4, LZ4HC, NONE, NONE, NONE, ZSTD, Delta(8), DEFLATE_QPL) ) ENGINE = MergeTree() ORDER BY tuple(); SHOW CREATE TABLE compression_codec_multiple_more_types; @@ -109,9 +118,9 @@ SET network_compression_method = 'zstd'; SET network_zstd_compression_level = 5; CREATE TABLE compression_codec_multiple_with_key ( - somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12), Delta, Delta), - id UInt64 CODEC(LZ4, ZSTD, Delta, NONE, LZ4HC, Delta), - data String CODEC(ZSTD(2), Delta(1), LZ4HC, NONE, LZ4, LZ4) + somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12), Delta, Delta, DEFLATE_QPL), + id UInt64 CODEC(LZ4, ZSTD, Delta, NONE, LZ4HC, Delta, DEFLATE_QPL), + data String CODEC(ZSTD(2), Delta(1), LZ4HC, NONE, LZ4, LZ4, DEFLATE_QPL) ) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi'; diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference index 8145ca99829..d64b8a77eed 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference +++ b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference @@ -1,9 +1,9 @@ -CREATE TABLE default.compression_codec_log\n(\n `id` UInt64 CODEC(LZ4),\n `data` String CODEC(ZSTD(1)),\n `ddd` Date CODEC(NONE),\n `somenum` Float64 CODEC(ZSTD(2)),\n `somestr` FixedString(3) CODEC(LZ4HC(7)),\n `othernum` Int64 CODEC(Delta(8))\n)\nENGINE = Log -1 hello 2018-12-14 1.1 aaa 5 -2 world 2018-12-15 2.2 bbb 6 -3 ! 2018-12-16 3.3 ccc 7 +CREATE TABLE default.compression_codec_log\n(\n `id` UInt64 CODEC(LZ4),\n `data` String CODEC(ZSTD(1)),\n `ddd` Date CODEC(NONE),\n `somenum` Float64 CODEC(ZSTD(2)),\n `somestr` FixedString(3) CODEC(LZ4HC(7)),\n `othernum` Int64 CODEC(Delta(8)),\n `qplstr` String CODEC(DEFLATE_QPL),\n `qplnum` UInt32 CODEC(DEFLATE_QPL)\n)\nENGINE = Log +1 hello 2018-12-14 1.1 aaa 5 qpl11 11 +2 world 2018-12-15 2.2 bbb 6 qpl22 22 +3 ! 2018-12-16 3.3 ccc 7 qpl33 33 2 -CREATE TABLE default.compression_codec_multiple_log\n(\n `id` UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4)),\n `data` String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8)),\n `ddd` Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0)),\n `somenum` Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1))\n)\nENGINE = Log +CREATE TABLE default.compression_codec_multiple_log\n(\n `id` UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4), DEFLATE_QPL),\n `data` String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8), DEFLATE_QPL),\n `ddd` Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0), DEFLATE_QPL),\n `somenum` Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1), DEFLATE_QPL)\n)\nENGINE = Log 1 world 2018-10-05 1.1 2 hello 2018-10-01 2.2 3 buy 2018-10-11 3.3 @@ -11,12 +11,12 @@ CREATE TABLE default.compression_codec_multiple_log\n(\n `id` UInt64 CODEC(LZ 10003 274972506.6 9175437371954010821 -CREATE TABLE default.compression_codec_tiny_log\n(\n `id` UInt64 CODEC(LZ4),\n `data` String CODEC(ZSTD(1)),\n `ddd` Date CODEC(NONE),\n `somenum` Float64 CODEC(ZSTD(2)),\n `somestr` FixedString(3) CODEC(LZ4HC(7)),\n `othernum` Int64 CODEC(Delta(8))\n)\nENGINE = TinyLog -1 hello 2018-12-14 1.1 aaa 5 -2 world 2018-12-15 2.2 bbb 6 -3 ! 2018-12-16 3.3 ccc 7 +CREATE TABLE default.compression_codec_tiny_log\n(\n `id` UInt64 CODEC(LZ4),\n `data` String CODEC(ZSTD(1)),\n `ddd` Date CODEC(NONE),\n `somenum` Float64 CODEC(ZSTD(2)),\n `somestr` FixedString(3) CODEC(LZ4HC(7)),\n `othernum` Int64 CODEC(Delta(8)),\n `qplstr` String CODEC(DEFLATE_QPL),\n `qplnum` UInt32 CODEC(DEFLATE_QPL)\n)\nENGINE = TinyLog +1 hello 2018-12-14 1.1 aaa 5 qpl11 11 +2 world 2018-12-15 2.2 bbb 6 qpl22 22 +3 ! 2018-12-16 3.3 ccc 7 qpl33 33 2 -CREATE TABLE default.compression_codec_multiple_tiny_log\n(\n `id` UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4)),\n `data` String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8)),\n `ddd` Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0)),\n `somenum` Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1))\n)\nENGINE = TinyLog +CREATE TABLE default.compression_codec_multiple_tiny_log\n(\n `id` UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4), DEFLATE_QPL),\n `data` String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8), DEFLATE_QPL),\n `ddd` Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0), DEFLATE_QPL),\n `somenum` Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1), DEFLATE_QPL)\n)\nENGINE = TinyLog 1 world 2018-10-05 1.1 2 hello 2018-10-01 2.2 3 buy 2018-10-11 3.3 diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql index fba6a216762..113f26732e7 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql +++ b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql @@ -1,5 +1,6 @@ SET send_logs_level = 'fatal'; SET allow_suspicious_codecs = 1; +SET enable_qpl_deflate = 1; -- copy-paste for storage log @@ -11,18 +12,20 @@ CREATE TABLE compression_codec_log( ddd Date CODEC(NONE), somenum Float64 CODEC(ZSTD(2)), somestr FixedString(3) CODEC(LZ4HC(7)), - othernum Int64 CODEC(Delta) + othernum Int64 CODEC(Delta), + qplstr String CODEC(DEFLATE_QPL), + qplnum UInt32 CODEC(DEFLATE_QPL), ) ENGINE = Log(); SHOW CREATE TABLE compression_codec_log; -INSERT INTO compression_codec_log VALUES(1, 'hello', toDate('2018-12-14'), 1.1, 'aaa', 5); -INSERT INTO compression_codec_log VALUES(2, 'world', toDate('2018-12-15'), 2.2, 'bbb', 6); -INSERT INTO compression_codec_log VALUES(3, '!', toDate('2018-12-16'), 3.3, 'ccc', 7); +INSERT INTO compression_codec_log VALUES(1, 'hello', toDate('2018-12-14'), 1.1, 'aaa', 5, 'qpl11', 11); +INSERT INTO compression_codec_log VALUES(2, 'world', toDate('2018-12-15'), 2.2, 'bbb', 6,'qpl22', 22); +INSERT INTO compression_codec_log VALUES(3, '!', toDate('2018-12-16'), 3.3, 'ccc', 7, 'qpl33', 33); SELECT * FROM compression_codec_log ORDER BY id; -INSERT INTO compression_codec_log VALUES(2, '', toDate('2018-12-13'), 4.4, 'ddd', 8); +INSERT INTO compression_codec_log VALUES(2, '', toDate('2018-12-13'), 4.4, 'ddd', 8, 'qpl44', 44); DETACH TABLE compression_codec_log; ATTACH TABLE compression_codec_log; @@ -34,10 +37,10 @@ DROP TABLE IF EXISTS compression_codec_log; DROP TABLE IF EXISTS compression_codec_multiple_log; CREATE TABLE compression_codec_multiple_log ( - id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC, Delta(4)), - data String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC, LZ4, LZ4, Delta(8)), - ddd Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD, LZ4HC, LZ4HC), - somenum Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD) + id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC, Delta(4), DEFLATE_QPL), + data String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC, LZ4, LZ4, Delta(8), DEFLATE_QPL), + ddd Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD, LZ4HC, LZ4HC, DEFLATE_QPL), + somenum Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD, DEFLATE_QPL) ) ENGINE = Log(); SHOW CREATE TABLE compression_codec_multiple_log; @@ -69,18 +72,20 @@ CREATE TABLE compression_codec_tiny_log( ddd Date CODEC(NONE), somenum Float64 CODEC(ZSTD(2)), somestr FixedString(3) CODEC(LZ4HC(7)), - othernum Int64 CODEC(Delta) + othernum Int64 CODEC(Delta), + qplstr String CODEC(DEFLATE_QPL), + qplnum UInt32 CODEC(DEFLATE_QPL), ) ENGINE = TinyLog(); SHOW CREATE TABLE compression_codec_tiny_log; -INSERT INTO compression_codec_tiny_log VALUES(1, 'hello', toDate('2018-12-14'), 1.1, 'aaa', 5); -INSERT INTO compression_codec_tiny_log VALUES(2, 'world', toDate('2018-12-15'), 2.2, 'bbb', 6); -INSERT INTO compression_codec_tiny_log VALUES(3, '!', toDate('2018-12-16'), 3.3, 'ccc', 7); +INSERT INTO compression_codec_tiny_log VALUES(1, 'hello', toDate('2018-12-14'), 1.1, 'aaa', 5, 'qpl11', 11); +INSERT INTO compression_codec_tiny_log VALUES(2, 'world', toDate('2018-12-15'), 2.2, 'bbb', 6, 'qpl22', 22); +INSERT INTO compression_codec_tiny_log VALUES(3, '!', toDate('2018-12-16'), 3.3, 'ccc', 7, 'qpl33', 33); SELECT * FROM compression_codec_tiny_log ORDER BY id; -INSERT INTO compression_codec_tiny_log VALUES(2, '', toDate('2018-12-13'), 4.4, 'ddd', 8); +INSERT INTO compression_codec_tiny_log VALUES(2, '', toDate('2018-12-13'), 4.4, 'ddd', 8, 'qpl44', 44); DETACH TABLE compression_codec_tiny_log; ATTACH TABLE compression_codec_tiny_log; @@ -92,10 +97,10 @@ DROP TABLE IF EXISTS compression_codec_tiny_log; DROP TABLE IF EXISTS compression_codec_multiple_tiny_log; CREATE TABLE compression_codec_multiple_tiny_log ( - id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC, Delta(4)), - data String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC, LZ4, LZ4, Delta(8)), - ddd Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD, LZ4HC, LZ4HC), - somenum Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD) + id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC, Delta(4), DEFLATE_QPL), + data String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC, LZ4, LZ4, Delta(8), DEFLATE_QPL), + ddd Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD, LZ4HC, LZ4HC, DEFLATE_QPL), + somenum Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD, DEFLATE_QPL) ) ENGINE = TinyLog(); SHOW CREATE TABLE compression_codec_multiple_tiny_log; diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference new file mode 100644 index 00000000000..88d274d9cba --- /dev/null +++ b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference @@ -0,0 +1,4 @@ +1 hello 2018-12-14 1.1 aaa 5 qpl11 11 +2 world 2018-12-15 2.2 bbb 6 qpl22 22 +3 ! 2018-12-16 3.3 ccc 7 qpl33 33 +2 diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql new file mode 100644 index 00000000000..fe23e49804d --- /dev/null +++ b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql @@ -0,0 +1,32 @@ +SET send_logs_level = 'fatal'; +SET enable_qpl_deflate = 1; + +DROP TABLE IF EXISTS compression_codec; + +CREATE TABLE compression_codec( + id UInt64 CODEC(DEFLATE_QPL), + data String CODEC(DEFLATE_QPL), + ddd Date CODEC(DEFLATE_QPL), + somenum Float64 CODEC(DEFLATE_QPL), + somestr FixedString(3) CODEC(DEFLATE_QPL), + othernum Int64 CODEC(DEFLATE_QPL), + qplstr String CODEC(DEFLATE_QPL), + qplnum UInt32 CODEC(DEFLATE_QPL), +) ENGINE = MergeTree() ORDER BY tuple(); + +INSERT INTO compression_codec VALUES(1, 'hello', toDate('2018-12-14'), 1.1, 'aaa', 5, 'qpl11', 11); +INSERT INTO compression_codec VALUES(2, 'world', toDate('2018-12-15'), 2.2, 'bbb', 6,'qpl22', 22); +INSERT INTO compression_codec VALUES(3, '!', toDate('2018-12-16'), 3.3, 'ccc', 7, 'qpl33', 33); + +SELECT * FROM compression_codec ORDER BY id; + +OPTIMIZE TABLE compression_codec FINAL; + +INSERT INTO compression_codec VALUES(2, '', toDate('2018-12-13'), 4.4, 'ddd', 8, 'qpl44', 44); + +DETACH TABLE compression_codec; +ATTACH TABLE compression_codec; + +SELECT count(*) FROM compression_codec WHERE id = 2 GROUP BY id; + +DROP TABLE IF EXISTS compression_codec; diff --git a/tests/queries/0_stateless/00804_test_delta_codec_compression.reference b/tests/queries/0_stateless/00804_test_delta_codec_compression.reference index 949d37ed27a..37f9d4901b3 100644 --- a/tests/queries/0_stateless/00804_test_delta_codec_compression.reference +++ b/tests/queries/0_stateless/00804_test_delta_codec_compression.reference @@ -4,3 +4,5 @@ 1 32 1 +17 +1 diff --git a/tests/queries/0_stateless/00804_test_delta_codec_compression.sql b/tests/queries/0_stateless/00804_test_delta_codec_compression.sql index 25988f6474b..f9805246662 100644 --- a/tests/queries/0_stateless/00804_test_delta_codec_compression.sql +++ b/tests/queries/0_stateless/00804_test_delta_codec_compression.sql @@ -115,3 +115,41 @@ USING(key); DROP TABLE IF EXISTS delta_codec_string; DROP TABLE IF EXISTS default_codec_string; + +SET enable_qpl_deflate = 1; +DROP TABLE IF EXISTS delta_codec_string_qpl; +DROP TABLE IF EXISTS default_codec_string_qpl; + +CREATE TABLE delta_codec_string_qpl +( + id Float64 Codec(Delta, DEFLATE_QPL) +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; + +CREATE TABLE default_codec_string_qpl +( + id Float64 Codec(DEFLATE_QPL) +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; + +INSERT INTO delta_codec_string_qpl SELECT concat(toString(number), toString(number % 100)) FROM numbers(1547510400, 500000); +INSERT INTO default_codec_string_qpl SELECT * from delta_codec_string_qpl; + +OPTIMIZE TABLE delta_codec_string_qpl FINAL; +OPTIMIZE TABLE default_codec_string_qpl FINAL; + +SELECT + floor(big_size / small_size) as ratio +FROM + (SELECT 1 AS key, sum(bytes_on_disk) AS small_size FROM system.parts WHERE database = currentDatabase() and table = 'delta_codec_string_qpl' and active) +INNER JOIN + (SELECT 1 AS key, sum(bytes_on_disk) as big_size FROM system.parts WHERE database = currentDatabase() and table = 'default_codec_string_qpl' and active) USING(key); + +SELECT + small_hash == big_hash +FROM + (SELECT 1 AS key, sum(cityHash64(*)) AS small_hash FROM delta_codec_string_qpl) +INNER JOIN + (SELECT 1 AS key, sum(cityHash64(*)) AS big_hash FROM default_codec_string_qpl) +USING(key); + +DROP TABLE IF EXISTS delta_codec_string_qpl; +DROP TABLE IF EXISTS default_codec_string_qpl; From 31173ab55b0926f634c2fbfc06f7d2f34410a4ff Mon Sep 17 00:00:00 2001 From: jinjunzh Date: Wed, 24 May 2023 15:15:40 -0400 Subject: [PATCH 0258/1997] add sections of deflate_qpl for stress test and performance test --- tests/ci/stress.py | 1 + tests/performance/codecs_float_insert.xml | 2 ++ tests/performance/codecs_float_select.xml | 2 ++ tests/performance/codecs_int_insert.xml | 2 ++ tests/performance/codecs_int_select.xml | 2 ++ 5 files changed, 9 insertions(+) diff --git a/tests/ci/stress.py b/tests/ci/stress.py index b9044874071..b95cac9044e 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -20,6 +20,7 @@ def get_options(i, upgrade_check): '''--db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i) ) client_options.append("allow_experimental_database_replicated=1") + client_options.append("enable_qpl_deflate=1") # If database name is not specified, new database is created for each functional test. # Run some threads with one database for all tests. diff --git a/tests/performance/codecs_float_insert.xml b/tests/performance/codecs_float_insert.xml index 64325d30189..25291f7f499 100644 --- a/tests/performance/codecs_float_insert.xml +++ b/tests/performance/codecs_float_insert.xml @@ -1,6 +1,7 @@ 1 + 1 @@ -10,6 +11,7 @@ NONE LZ4 ZSTD + DEFLATE_QPL DoubleDelta Gorilla FPC diff --git a/tests/performance/codecs_float_select.xml b/tests/performance/codecs_float_select.xml index 325c140d9a0..bb67987c75e 100644 --- a/tests/performance/codecs_float_select.xml +++ b/tests/performance/codecs_float_select.xml @@ -1,6 +1,7 @@ 1 + 1 @@ -10,6 +11,7 @@ NONE LZ4 ZSTD + DEFLATE_QPL DoubleDelta Gorilla FPC diff --git a/tests/performance/codecs_int_insert.xml b/tests/performance/codecs_int_insert.xml index 618e20160f8..1db9ee8f746 100644 --- a/tests/performance/codecs_int_insert.xml +++ b/tests/performance/codecs_int_insert.xml @@ -1,6 +1,7 @@ 1 + 1 @@ -10,6 +11,7 @@ NONE LZ4 ZSTD + DEFLATE_QPL Delta T64 DoubleDelta diff --git a/tests/performance/codecs_int_select.xml b/tests/performance/codecs_int_select.xml index 62c1ee16e7b..5dc7ab48704 100644 --- a/tests/performance/codecs_int_select.xml +++ b/tests/performance/codecs_int_select.xml @@ -1,6 +1,7 @@ 1 + 1 @@ -10,6 +11,7 @@ NONE LZ4 ZSTD + DEFLATE_QPL Delta T64 DoubleDelta From cbdb408ec8330c8ce469c68e979ca208c76d0629 Mon Sep 17 00:00:00 2001 From: jinjunzh Date: Fri, 26 May 2023 12:15:34 -0400 Subject: [PATCH 0259/1997] add USE_QPL for buildoptions --- src/Storages/System/StorageSystemBuildOptions.cpp.in | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in index 3465e47449b..c2a188e7750 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -68,6 +68,7 @@ const char * auto_config_build[] "GIT_BRANCH", R"IRjaNsZIL9Yh7FQ4(@GIT_BRANCH@)IRjaNsZIL9Yh7FQ4", "GIT_DATE", "@GIT_DATE@", "GIT_COMMIT_SUBJECT", R"Gi17KJMlbGCjErEN(@GIT_COMMIT_SUBJECT@)Gi17KJMlbGCjErEN", + "USE_QPL", "@ENABLE_QPL@", nullptr, nullptr }; From f1192d59afa7ee2271d7ee6b5cb9d98bb27254a0 Mon Sep 17 00:00:00 2001 From: jinjunzh Date: Thu, 1 Jun 2023 12:42:22 -0400 Subject: [PATCH 0260/1997] refine patch according to comments --- .../sql-reference/statements/create/table.md | 2 +- src/Client/Connection.cpp | 2 +- src/Compression/CompressionFactory.h | 4 +- .../CompressionFactoryAdditions.cpp | 12 +++--- src/Compression/ICompressionCodec.h | 6 +-- src/Core/Settings.h | 2 +- src/Interpreters/InterpreterCreateQuery.cpp | 4 +- src/Server/TCPHandler.cpp | 2 +- src/Storages/AlterCommands.cpp | 4 +- src/Storages/Distributed/DistributedSink.cpp | 2 +- src/Storages/TTLDescription.cpp | 2 +- tests/ci/stress.py | 2 +- .../configs/enable_deflateqpl_codec.xml | 2 +- .../test_non_default_compression/test.py | 32 ++++++++-------- tests/performance/codecs_float_insert.xml | 3 +- tests/performance/codecs_float_select.xml | 3 +- tests/performance/codecs_int_insert.xml | 3 +- tests/performance/codecs_int_select.xml | 3 +- ...04_test_alter_compression_codecs.reference | 4 +- .../00804_test_alter_compression_codecs.sql | 10 ++--- ...4_test_custom_compression_codecs.reference | 6 +-- .../00804_test_custom_compression_codecs.sql | 13 +++---- ..._custom_compression_codes_log_storages.sql | 2 +- ...st_deflate_qpl_codec_compression.reference | 6 +-- ...804_test_deflate_qpl_codec_compression.sql | 16 ++++---- ...804_test_delta_codec_compression.reference | 2 - .../00804_test_delta_codec_compression.sql | 38 ------------------- 27 files changed, 71 insertions(+), 116 deletions(-) diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index b0865ad2896..d0e17410791 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -380,7 +380,7 @@ High compression levels are useful for asymmetric scenarios, like compress once, `DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library. Some limitations apply: -- DEFLATE_QPL is disabled by default and can only be used after setting configuration parameter `enable_qpl_deflate=1`. +- DEFLATE_QPL is disabled by default and can only be used after setting configuration parameter `enable_qpl_deflate_codec=1`. - DEFLATE_QPL requires a ClickHouse build compiled with SSE 4.2 instructions (by default, this is the case). Refer to [Build Clickhouse with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Build-Clickhouse-with-DEFLATE_QPL) for more details. - DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device. Refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) and [Benchmark with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Run-Benchmark-with-DEFLATE_QPL) for more details. - DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with SSE 4.2 enabled. diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 68bc3b39a56..ac8e6654e84 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -588,7 +588,7 @@ void Connection::sendQuery( if (method == "ZSTD") level = settings->network_zstd_compression_level; - CompressionCodecFactory::instance().validateCodec(method, level, !settings->allow_suspicious_codecs, settings->allow_experimental_codecs, settings->enable_qpl_deflate); + CompressionCodecFactory::instance().validateCodec(method, level, !settings->allow_suspicious_codecs, settings->allow_experimental_codecs, settings->enable_qpl_deflate_codec); compression_codec = CompressionCodecFactory::instance().get(method, level); } else diff --git a/src/Compression/CompressionFactory.h b/src/Compression/CompressionFactory.h index 1fdaf4f1c71..e020e51bb09 100644 --- a/src/Compression/CompressionFactory.h +++ b/src/Compression/CompressionFactory.h @@ -40,10 +40,10 @@ public: CompressionCodecPtr getDefaultCodec() const; /// Validate codecs AST specified by user and parses codecs description (substitute default parameters) - ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_qpl_deflate) const; + ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_qpl_deflate_codec) const; /// Validate codecs AST specified by user - void validateCodec(const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_qpl_deflate) const; + void validateCodec(const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_qpl_deflate_codec) const; /// Get codec by AST and possible column_type. Some codecs can use /// information about type to improve inner settings, but every codec should diff --git a/src/Compression/CompressionFactoryAdditions.cpp b/src/Compression/CompressionFactoryAdditions.cpp index 2630326238a..b4a2d96cf39 100644 --- a/src/Compression/CompressionFactoryAdditions.cpp +++ b/src/Compression/CompressionFactoryAdditions.cpp @@ -34,7 +34,7 @@ namespace ErrorCodes void CompressionCodecFactory::validateCodec( - const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_qpl_deflate) const + const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_qpl_deflate_codec) const { if (family_name.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Compression codec name cannot be empty"); @@ -43,13 +43,13 @@ void CompressionCodecFactory::validateCodec( { auto literal = std::make_shared(static_cast(*level)); validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", makeASTFunction(Poco::toUpper(family_name), literal)), - {}, sanity_check, allow_experimental_codecs, enable_qpl_deflate); + {}, sanity_check, allow_experimental_codecs, enable_qpl_deflate_codec); } else { auto identifier = std::make_shared(Poco::toUpper(family_name)); validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", identifier), - {}, sanity_check, allow_experimental_codecs, enable_qpl_deflate); + {}, sanity_check, allow_experimental_codecs, enable_qpl_deflate_codec); } } @@ -77,7 +77,7 @@ bool innerDataTypeIsFloat(const DataTypePtr & type) } ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( - const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_qpl_deflate) const + const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_qpl_deflate_codec) const { if (const auto * func = ast->as()) { @@ -159,10 +159,10 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( " You can enable it with the 'allow_experimental_codecs' setting.", codec_family_name); - if (!enable_qpl_deflate && result_codec->isDeflateQplCompression()) + if (!enable_qpl_deflate_codec && result_codec->isDeflateQplCompression()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec {} is disabled by default." - " You can enable it with the 'enable_qpl_deflate' setting.", + " You can enable it with the 'enable_qpl_deflate_codec' setting.", codec_family_name); codecs_descriptions->children.emplace_back(result_codec->getCodecDesc()); diff --git a/src/Compression/ICompressionCodec.h b/src/Compression/ICompressionCodec.h index d92ad3fc718..f7e8f4e43d2 100644 --- a/src/Compression/ICompressionCodec.h +++ b/src/Compression/ICompressionCodec.h @@ -109,12 +109,12 @@ public: /// It will not be allowed to use unless the user will turn off the safety switch. virtual bool isExperimental() const { return false; } - /// If it does nothing. - virtual bool isNone() const { return false; } - /// This is a knob for Deflate QPL codec. virtual bool isDeflateQplCompression() const { return false; } + /// If it does nothing. + virtual bool isNone() const { return false; } + protected: /// This is used for fuzz testing friend int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c6a2069e6ae..4aae8f5d572 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -319,7 +319,7 @@ class IColumn; M(Bool, allow_distributed_ddl, true, "If it is set to true, then a user is allowed to executed distributed DDL queries.", 0) \ M(Bool, allow_suspicious_codecs, false, "If it is set to true, allow to specify meaningless compression codecs.", 0) \ M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \ - M(Bool, enable_qpl_deflate, false, "If it is set to true, allow to use deflate_qpl for compression.", 0) \ + M(Bool, enable_qpl_deflate_codec, false, "If it is set to true, allow usage of the DEFLATE_QPL codec.", 0) \ M(UInt64, query_profiler_real_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, "Period for real clock timer of query profiler (in nanoseconds). Set 0 value to turn off the real clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(UInt64, query_profiler_cpu_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, "Period for CPU clock timer of query profiler (in nanoseconds). Set 0 value to turn off the CPU clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(Bool, metrics_perf_events_enabled, false, "If enabled, some of the perf events will be measured throughout queries' execution.", 0) \ diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 5c22b46b360..ddb53bbbfaa 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -571,7 +571,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( bool sanity_check_compression_codecs = !attach && !context_->getSettingsRef().allow_suspicious_codecs; bool allow_experimental_codecs = attach || context_->getSettingsRef().allow_experimental_codecs; - bool enable_qpl_deflate = attach || context_->getSettingsRef().enable_qpl_deflate; + bool enable_qpl_deflate_codec = attach || context_->getSettingsRef().enable_qpl_deflate_codec; ColumnsDescription res; auto name_type_it = column_names_and_types.begin(); @@ -632,7 +632,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( if (col_decl.default_specifier == "ALIAS") throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot specify codec for column type ALIAS"); column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST( - col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_qpl_deflate); + col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_qpl_deflate_codec); } if (col_decl.ttl) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 96c585e7d16..b43fef9dd54 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1775,7 +1775,7 @@ void TCPHandler::initBlockOutput(const Block & block) if (state.compression == Protocol::Compression::Enable) { - CompressionCodecFactory::instance().validateCodec(method, level, !query_settings.allow_suspicious_codecs, query_settings.allow_experimental_codecs, query_settings.enable_qpl_deflate); + CompressionCodecFactory::instance().validateCodec(method, level, !query_settings.allow_suspicious_codecs, query_settings.allow_experimental_codecs, query_settings.enable_qpl_deflate_codec); state.maybe_compressed_out = std::make_shared( *out, CompressionCodecFactory::instance().get(method, level)); diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index ecbddfc3e2a..73d7be8dc56 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -1067,7 +1067,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const "this column name is reserved for lightweight delete feature", backQuote(column_name)); if (command.codec) - CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_qpl_deflate); + CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_qpl_deflate_codec); all_columns.add(ColumnDescription(column_name, command.data_type)); } @@ -1093,7 +1093,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const { if (all_columns.hasAlias(column_name)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot specify codec for column type ALIAS"); - CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_qpl_deflate); + CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_qpl_deflate_codec); } auto column_default = all_columns.getDefault(column_name); if (column_default) diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index ce1dbde8eae..e383890d1f7 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -733,7 +733,7 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const if (compression_method == "ZSTD") compression_level = settings.network_zstd_compression_level; - CompressionCodecFactory::instance().validateCodec(compression_method, compression_level, !settings.allow_suspicious_codecs, settings.allow_experimental_codecs, settings.enable_qpl_deflate); + CompressionCodecFactory::instance().validateCodec(compression_method, compression_level, !settings.allow_suspicious_codecs, settings.allow_experimental_codecs, settings.enable_qpl_deflate_codec); CompressionCodecPtr compression_codec = CompressionCodecFactory::instance().get(compression_method, compression_level); /// tmp directory is used to ensure atomicity of transactions diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index f5209cbdff6..a437465b3fe 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -285,7 +285,7 @@ TTLDescription TTLDescription::getTTLFromAST( { result.recompression_codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST( - ttl_element->recompression_codec, {}, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_qpl_deflate); + ttl_element->recompression_codec, {}, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_qpl_deflate_codec); } } diff --git a/tests/ci/stress.py b/tests/ci/stress.py index b95cac9044e..e5ceb251d0f 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -20,7 +20,7 @@ def get_options(i, upgrade_check): '''--db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i) ) client_options.append("allow_experimental_database_replicated=1") - client_options.append("enable_qpl_deflate=1") + client_options.append("enable_qpl_deflate_codec=1") # If database name is not specified, new database is created for each functional test. # Run some threads with one database for all tests. diff --git a/tests/integration/test_non_default_compression/configs/enable_deflateqpl_codec.xml b/tests/integration/test_non_default_compression/configs/enable_deflateqpl_codec.xml index 46e9e43ca27..521b0fd663c 100644 --- a/tests/integration/test_non_default_compression/configs/enable_deflateqpl_codec.xml +++ b/tests/integration/test_non_default_compression/configs/enable_deflateqpl_codec.xml @@ -1,7 +1,7 @@ - 1 + 1 diff --git a/tests/integration/test_non_default_compression/test.py b/tests/integration/test_non_default_compression/test.py index e69b32daae0..e1a9c1ae540 100644 --- a/tests/integration/test_non_default_compression/test.py +++ b/tests/integration/test_non_default_compression/test.py @@ -38,16 +38,16 @@ node5 = cluster.add_instance( ) node6 = cluster.add_instance( "node6", - main_configs=["configs/allow_experimental_codecs.xml"], - user_configs=["configs/allow_suspicious_codecs.xml"], + main_configs=["configs/deflateqpl_compression_by_default.xml"], + user_configs=[ + "configs/allow_suspicious_codecs.xml", + "configs/enable_deflateqpl_codec.xml", + ], ) node7 = cluster.add_instance( "node7", - main_configs=["configs/deflateqpl_compression_by_default.xml"], - user_configs=[ - "configs/enable_deflateqpl_codec.xml", - "configs/allow_suspicious_codecs.xml", - ], + main_configs=["configs/allow_experimental_codecs.xml"], + user_configs=["configs/allow_suspicious_codecs.xml"], ) @pytest.fixture(scope="module") @@ -253,7 +253,7 @@ def test_uncompressed_cache_plus_zstd_codec(start_cluster): ) def test_preconfigured_deflateqpl_codec(start_cluster): - node7.query( + node6.query( """ CREATE TABLE compression_codec_multiple_with_key ( somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12), DEFLATE_QPL), @@ -263,46 +263,46 @@ def test_preconfigured_deflateqpl_codec(start_cluster): ) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2; """ ) - node7.query( + node6.query( "INSERT INTO compression_codec_multiple_with_key VALUES(toDate('2018-10-12'), 100000, 'hello', 88.88), (toDate('2018-10-12'), 100002, 'world', 99.99), (toDate('2018-10-12'), 1111, '!', 777.777)" ) assert ( - node7.query( + node6.query( "SELECT COUNT(*) FROM compression_codec_multiple_with_key WHERE id % 2 == 0" ) == "2\n" ) assert ( - node7.query( + node6.query( "SELECT DISTINCT somecolumn FROM compression_codec_multiple_with_key ORDER BY id" ) == "777.777\n88.88\n99.99\n" ) assert ( - node7.query( + node6.query( "SELECT data FROM compression_codec_multiple_with_key WHERE id >= 1112 AND somedate = toDate('2018-10-12') AND somecolumn <= 100" ) == "hello\nworld\n" ) - node7.query( + node6.query( "INSERT INTO compression_codec_multiple_with_key SELECT toDate('2018-10-12'), number, toString(number), 1.0 FROM system.numbers LIMIT 10000" ) assert ( - node7.query( + node6.query( "SELECT COUNT(id) FROM compression_codec_multiple_with_key WHERE id % 10 == 0" ) == "1001\n" ) assert ( - node7.query( + node6.query( "SELECT SUM(somecolumn) FROM compression_codec_multiple_with_key" ) == str(777.777 + 88.88 + 99.99 + 1.0 * 10000) + "\n" ) assert ( - node7.query( + node6.query( "SELECT count(*) FROM compression_codec_multiple_with_key GROUP BY somedate" ) == "10003\n" diff --git a/tests/performance/codecs_float_insert.xml b/tests/performance/codecs_float_insert.xml index 25291f7f499..be0935ad4cf 100644 --- a/tests/performance/codecs_float_insert.xml +++ b/tests/performance/codecs_float_insert.xml @@ -1,7 +1,7 @@ 1 - 1 + 1 @@ -11,7 +11,6 @@ NONE LZ4 ZSTD - DEFLATE_QPL DoubleDelta Gorilla FPC diff --git a/tests/performance/codecs_float_select.xml b/tests/performance/codecs_float_select.xml index bb67987c75e..844ab4508d8 100644 --- a/tests/performance/codecs_float_select.xml +++ b/tests/performance/codecs_float_select.xml @@ -1,7 +1,7 @@ 1 - 1 + 1 @@ -11,7 +11,6 @@ NONE LZ4 ZSTD - DEFLATE_QPL DoubleDelta Gorilla FPC diff --git a/tests/performance/codecs_int_insert.xml b/tests/performance/codecs_int_insert.xml index 1db9ee8f746..d5f12810118 100644 --- a/tests/performance/codecs_int_insert.xml +++ b/tests/performance/codecs_int_insert.xml @@ -1,7 +1,7 @@ 1 - 1 + 1 @@ -11,7 +11,6 @@ NONE LZ4 ZSTD - DEFLATE_QPL Delta T64 DoubleDelta diff --git a/tests/performance/codecs_int_select.xml b/tests/performance/codecs_int_select.xml index 5dc7ab48704..06b2c2a73f3 100644 --- a/tests/performance/codecs_int_select.xml +++ b/tests/performance/codecs_int_select.xml @@ -1,7 +1,7 @@ 1 - 1 + 1 @@ -11,7 +11,6 @@ NONE LZ4 ZSTD - DEFLATE_QPL Delta T64 DoubleDelta diff --git a/tests/queries/0_stateless/00804_test_alter_compression_codecs.reference b/tests/queries/0_stateless/00804_test_alter_compression_codecs.reference index a6afe11126c..5c77a102740 100644 --- a/tests/queries/0_stateless/00804_test_alter_compression_codecs.reference +++ b/tests/queries/0_stateless/00804_test_alter_compression_codecs.reference @@ -12,15 +12,13 @@ CODEC(NONE) 2018-01-01 4 4 2018-01-01 5 5 2018-01-01 6 6 -CODEC(DEFLATE_QPL) 2018-01-01 1 default_value 2018-01-01 2 default_value 2018-01-01 3 3 2018-01-01 4 4 2018-01-01 5 5 2018-01-01 6 6 -2018-01-01 7 7 -2018-01-01 8 8 +CODEC(DEFLATE_QPL) 2018-01-01 1 default_value 2018-01-01 2 default_value 2018-01-01 3 3 diff --git a/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql b/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql index 40a8bb4c7cb..5b8b73270a2 100644 --- a/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql +++ b/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql @@ -25,7 +25,10 @@ INSERT INTO alter_compression_codec VALUES('2018-01-01', 5, '5'); INSERT INTO alter_compression_codec VALUES('2018-01-01', 6, '6'); SELECT * FROM alter_compression_codec ORDER BY id; -SET enable_qpl_deflate = 1; +OPTIMIZE TABLE alter_compression_codec FINAL; +SELECT * FROM alter_compression_codec ORDER BY id; + +SET enable_qpl_deflate_codec = 1; ALTER TABLE alter_compression_codec MODIFY COLUMN alter_column CODEC(DEFLATE_QPL); SELECT compression_codec FROM system.columns WHERE database = currentDatabase() AND table = 'alter_compression_codec' AND name = 'alter_column'; @@ -33,9 +36,6 @@ INSERT INTO alter_compression_codec VALUES('2018-01-01', 7, '7'); INSERT INTO alter_compression_codec VALUES('2018-01-01', 8, '8'); SELECT * FROM alter_compression_codec ORDER BY id; -OPTIMIZE TABLE alter_compression_codec FINAL; -SELECT * FROM alter_compression_codec ORDER BY id; - SET allow_suspicious_codecs = 1; ALTER TABLE alter_compression_codec MODIFY COLUMN alter_column CODEC(ZSTD, LZ4HC, LZ4, LZ4, DEFLATE_QPL, NONE); SELECT compression_codec FROM system.columns WHERE database = currentDatabase() AND table = 'alter_compression_codec' AND name = 'alter_column'; @@ -62,7 +62,7 @@ ALTER TABLE alter_bad_codec ADD COLUMN alter_column DateTime DEFAULT '2019-01-01 ALTER TABLE alter_bad_codec ADD COLUMN alter_column DateTime DEFAULT '2019-01-01 00:00:00' CODEC(ZSTD(100)); -- { serverError 433 } -ALTER TABLE alter_bad_codec ADD COLUMN alter_column DateTime DEFAULT '2019-01-01 00:00:00' CODEC(DEFLATE_QPL(100)); -- { serverError 378 } +ALTER TABLE alter_bad_codec ADD COLUMN alter_column DateTime DEFAULT '2019-01-01 00:00:00' CODEC(DEFLATE_QPL(100)); -- { serverError DATA_TYPE_CANNOT_HAVE_ARGUMENTS } DROP TABLE IF EXISTS alter_bad_codec; diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference b/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference index a9cbe3d32d3..8b51d65004a 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference +++ b/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference @@ -1,6 +1,6 @@ -1 hello 2018-12-14 1.1 aaa 5 qpl11 11 -2 world 2018-12-15 2.2 bbb 6 qpl22 22 -3 ! 2018-12-16 3.3 ccc 7 qpl33 33 +1 hello 2018-12-14 2018-12-14 1.1 aaa 5 +2 world 2018-12-15 2018-12-15 2.2 bbb 6 +3 ! 2018-12-16 2018-12-16 3.3 ccc 7 2 1 world 2018-10-05 1.1 2 hello 2018-10-01 2.2 diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql b/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql index 44a0daada27..47ec268bfec 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql +++ b/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql @@ -1,6 +1,6 @@ SET send_logs_level = 'fatal'; SET allow_suspicious_codecs = 1; -SET enable_qpl_deflate = 1; +SET enable_qpl_deflate_codec = 1; DROP TABLE IF EXISTS compression_codec; @@ -8,22 +8,21 @@ CREATE TABLE compression_codec( id UInt64 CODEC(LZ4), data String CODEC(ZSTD), ddd Date CODEC(NONE), + ddd32 Date32 CODEC(DEFLATE_QPL), somenum Float64 CODEC(ZSTD(2)), somestr FixedString(3) CODEC(LZ4HC(7)), othernum Int64 CODEC(Delta), - qplstr String CODEC(DEFLATE_QPL), - qplnum UInt32 CODEC(DEFLATE_QPL), ) ENGINE = MergeTree() ORDER BY tuple(); -INSERT INTO compression_codec VALUES(1, 'hello', toDate('2018-12-14'), 1.1, 'aaa', 5, 'qpl11', 11); -INSERT INTO compression_codec VALUES(2, 'world', toDate('2018-12-15'), 2.2, 'bbb', 6,'qpl22', 22); -INSERT INTO compression_codec VALUES(3, '!', toDate('2018-12-16'), 3.3, 'ccc', 7, 'qpl33', 33); +INSERT INTO compression_codec VALUES(1, 'hello', toDate('2018-12-14'), toDate32('2018-12-14'), 1.1, 'aaa', 5); +INSERT INTO compression_codec VALUES(2, 'world', toDate('2018-12-15'), toDate32('2018-12-15'), 2.2, 'bbb', 6); +INSERT INTO compression_codec VALUES(3, '!', toDate('2018-12-16'), toDate32('2018-12-16'), 3.3, 'ccc', 7); SELECT * FROM compression_codec ORDER BY id; OPTIMIZE TABLE compression_codec FINAL; -INSERT INTO compression_codec VALUES(2, '', toDate('2018-12-13'), 4.4, 'ddd', 8, 'qpl44', 44); +INSERT INTO compression_codec VALUES(2, '', toDate('2018-12-13'), toDate32('2018-12-13'), 4.4, 'ddd', 8); DETACH TABLE compression_codec; ATTACH TABLE compression_codec; diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql index 113f26732e7..bcd09277824 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql +++ b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql @@ -1,6 +1,6 @@ SET send_logs_level = 'fatal'; SET allow_suspicious_codecs = 1; -SET enable_qpl_deflate = 1; +SET enable_qpl_deflate_codec = 1; -- copy-paste for storage log diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference index 88d274d9cba..276747f8233 100644 --- a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference +++ b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference @@ -1,4 +1,4 @@ -1 hello 2018-12-14 1.1 aaa 5 qpl11 11 -2 world 2018-12-15 2.2 bbb 6 qpl22 22 -3 ! 2018-12-16 3.3 ccc 7 qpl33 33 +1 hello 2018-12-14 2018-12-14 1.1 aaa 5 [1,2,3] {'k1':1,'k2':2} (1,2) +2 world 2018-12-15 2018-12-15 2.2 bbb 6 [4,5,6] {'k3':3,'k4':4} (3,4) +3 ! 2018-12-16 2018-12-16 3.3 ccc 7 [7,8,9] {'k5':5,'k6':6} (5,6) 2 diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql index fe23e49804d..64e66d47522 100644 --- a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql +++ b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql @@ -1,5 +1,5 @@ SET send_logs_level = 'fatal'; -SET enable_qpl_deflate = 1; +SET enable_qpl_deflate_codec = 1; DROP TABLE IF EXISTS compression_codec; @@ -7,22 +7,24 @@ CREATE TABLE compression_codec( id UInt64 CODEC(DEFLATE_QPL), data String CODEC(DEFLATE_QPL), ddd Date CODEC(DEFLATE_QPL), + ddd32 Date32 CODEC(DEFLATE_QPL), somenum Float64 CODEC(DEFLATE_QPL), somestr FixedString(3) CODEC(DEFLATE_QPL), othernum Int64 CODEC(DEFLATE_QPL), - qplstr String CODEC(DEFLATE_QPL), - qplnum UInt32 CODEC(DEFLATE_QPL), + somearray Array(UInt8) CODEC(DEFLATE_QPL), + somemap Map(String, UInt32) CODEC(DEFLATE_QPL), + sometuple Tuple(UInt16, UInt64) CODEC(DEFLATE_QPL), ) ENGINE = MergeTree() ORDER BY tuple(); -INSERT INTO compression_codec VALUES(1, 'hello', toDate('2018-12-14'), 1.1, 'aaa', 5, 'qpl11', 11); -INSERT INTO compression_codec VALUES(2, 'world', toDate('2018-12-15'), 2.2, 'bbb', 6,'qpl22', 22); -INSERT INTO compression_codec VALUES(3, '!', toDate('2018-12-16'), 3.3, 'ccc', 7, 'qpl33', 33); +INSERT INTO compression_codec VALUES(1, 'hello', toDate('2018-12-14'), toDate32('2018-12-14'), 1.1, 'aaa', 5, [1,2,3], map('k1',1,'k2',2), tuple(1,2)); +INSERT INTO compression_codec VALUES(2, 'world', toDate('2018-12-15'), toDate32('2018-12-15'), 2.2, 'bbb', 6, [4,5,6], map('k3',3,'k4',4), tuple(3,4)); +INSERT INTO compression_codec VALUES(3, '!', toDate('2018-12-16'), toDate32('2018-12-16'), 3.3, 'ccc', 7, [7,8,9], map('k5',5,'k6',6), tuple(5,6)); SELECT * FROM compression_codec ORDER BY id; OPTIMIZE TABLE compression_codec FINAL; -INSERT INTO compression_codec VALUES(2, '', toDate('2018-12-13'), 4.4, 'ddd', 8, 'qpl44', 44); +INSERT INTO compression_codec VALUES(2, '', toDate('2018-12-13'), toDate32('2018-12-13'), 4.4, 'ddd', 8, [10,11,12], map('k7',7,'k8',8), tuple(7,8)); DETACH TABLE compression_codec; ATTACH TABLE compression_codec; diff --git a/tests/queries/0_stateless/00804_test_delta_codec_compression.reference b/tests/queries/0_stateless/00804_test_delta_codec_compression.reference index 37f9d4901b3..949d37ed27a 100644 --- a/tests/queries/0_stateless/00804_test_delta_codec_compression.reference +++ b/tests/queries/0_stateless/00804_test_delta_codec_compression.reference @@ -4,5 +4,3 @@ 1 32 1 -17 -1 diff --git a/tests/queries/0_stateless/00804_test_delta_codec_compression.sql b/tests/queries/0_stateless/00804_test_delta_codec_compression.sql index f9805246662..25988f6474b 100644 --- a/tests/queries/0_stateless/00804_test_delta_codec_compression.sql +++ b/tests/queries/0_stateless/00804_test_delta_codec_compression.sql @@ -115,41 +115,3 @@ USING(key); DROP TABLE IF EXISTS delta_codec_string; DROP TABLE IF EXISTS default_codec_string; - -SET enable_qpl_deflate = 1; -DROP TABLE IF EXISTS delta_codec_string_qpl; -DROP TABLE IF EXISTS default_codec_string_qpl; - -CREATE TABLE delta_codec_string_qpl -( - id Float64 Codec(Delta, DEFLATE_QPL) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; - -CREATE TABLE default_codec_string_qpl -( - id Float64 Codec(DEFLATE_QPL) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; - -INSERT INTO delta_codec_string_qpl SELECT concat(toString(number), toString(number % 100)) FROM numbers(1547510400, 500000); -INSERT INTO default_codec_string_qpl SELECT * from delta_codec_string_qpl; - -OPTIMIZE TABLE delta_codec_string_qpl FINAL; -OPTIMIZE TABLE default_codec_string_qpl FINAL; - -SELECT - floor(big_size / small_size) as ratio -FROM - (SELECT 1 AS key, sum(bytes_on_disk) AS small_size FROM system.parts WHERE database = currentDatabase() and table = 'delta_codec_string_qpl' and active) -INNER JOIN - (SELECT 1 AS key, sum(bytes_on_disk) as big_size FROM system.parts WHERE database = currentDatabase() and table = 'default_codec_string_qpl' and active) USING(key); - -SELECT - small_hash == big_hash -FROM - (SELECT 1 AS key, sum(cityHash64(*)) AS small_hash FROM delta_codec_string_qpl) -INNER JOIN - (SELECT 1 AS key, sum(cityHash64(*)) AS big_hash FROM default_codec_string_qpl) -USING(key); - -DROP TABLE IF EXISTS delta_codec_string_qpl; -DROP TABLE IF EXISTS default_codec_string_qpl; From aae281eb7df6ce8e00d872d3ef0d0558781a5f1a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 1 Jun 2023 15:49:52 +0200 Subject: [PATCH 0261/1997] Update codecs_float_insert.xml --- tests/performance/codecs_float_insert.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/performance/codecs_float_insert.xml b/tests/performance/codecs_float_insert.xml index be0935ad4cf..64325d30189 100644 --- a/tests/performance/codecs_float_insert.xml +++ b/tests/performance/codecs_float_insert.xml @@ -1,7 +1,6 @@ 1 - 1 From dc93b6324ee505228b96791db629b7437f6db931 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 1 Jun 2023 15:50:28 +0200 Subject: [PATCH 0262/1997] Update codecs_float_select.xml --- tests/performance/codecs_float_select.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/performance/codecs_float_select.xml b/tests/performance/codecs_float_select.xml index 844ab4508d8..325c140d9a0 100644 --- a/tests/performance/codecs_float_select.xml +++ b/tests/performance/codecs_float_select.xml @@ -1,7 +1,6 @@ 1 - 1 From 7043db669e4e445764d99cd749cfef99d3f437cf Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 1 Jun 2023 15:50:40 +0200 Subject: [PATCH 0263/1997] Update codecs_int_insert.xml --- tests/performance/codecs_int_insert.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/performance/codecs_int_insert.xml b/tests/performance/codecs_int_insert.xml index d5f12810118..618e20160f8 100644 --- a/tests/performance/codecs_int_insert.xml +++ b/tests/performance/codecs_int_insert.xml @@ -1,7 +1,6 @@ 1 - 1 From 4d7364af97893c4457a86a064628ff478d900c05 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 1 Jun 2023 15:50:49 +0200 Subject: [PATCH 0264/1997] Update codecs_int_select.xml --- tests/performance/codecs_int_select.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/performance/codecs_int_select.xml b/tests/performance/codecs_int_select.xml index 06b2c2a73f3..62c1ee16e7b 100644 --- a/tests/performance/codecs_int_select.xml +++ b/tests/performance/codecs_int_select.xml @@ -1,7 +1,6 @@ 1 - 1 From 1f928f2d3d0eea55ff1743cea386162fd87fed92 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 1 Jun 2023 15:53:48 +0200 Subject: [PATCH 0265/1997] Update StorageSystemBuildOptions.cpp.in --- src/Storages/System/StorageSystemBuildOptions.cpp.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in index c2a188e7750..c2d35c96ce5 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -64,11 +64,11 @@ const char * auto_config_build[] "USE_ARROW", "@USE_ARROW@", "USE_ORC", "@USE_ORC@", "USE_MSGPACK", "@USE_MSGPACK@", + "USE_QPL", "@ENABLE_QPL@", "GIT_HASH", "@GIT_HASH@", "GIT_BRANCH", R"IRjaNsZIL9Yh7FQ4(@GIT_BRANCH@)IRjaNsZIL9Yh7FQ4", "GIT_DATE", "@GIT_DATE@", "GIT_COMMIT_SUBJECT", R"Gi17KJMlbGCjErEN(@GIT_COMMIT_SUBJECT@)Gi17KJMlbGCjErEN", - "USE_QPL", "@ENABLE_QPL@", nullptr, nullptr }; From 1aa158909e434438733504d2dbcd9ea9d113e41b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 9 Jun 2023 12:38:38 +0000 Subject: [PATCH 0266/1997] enable_qpl_deflate_codec --> enable_deflate_qpl_codec --- docs/en/sql-reference/statements/create/table.md | 2 +- src/Client/Connection.cpp | 2 +- src/Compression/CompressionFactory.h | 4 ++-- src/Compression/CompressionFactoryAdditions.cpp | 12 ++++++------ src/Core/Settings.h | 2 +- src/Interpreters/InterpreterCreateQuery.cpp | 4 ++-- src/Server/TCPHandler.cpp | 2 +- src/Storages/AlterCommands.cpp | 4 ++-- src/Storages/Distributed/DistributedSink.cpp | 2 +- src/Storages/TTLDescription.cpp | 2 +- tests/ci/stress.py | 2 +- .../configs/enable_deflateqpl_codec.xml | 2 +- .../00804_test_alter_compression_codecs.sql | 2 +- .../00804_test_custom_compression_codecs.sql | 2 +- ...04_test_custom_compression_codes_log_storages.sql | 2 +- .../00804_test_deflate_qpl_codec_compression.sql | 2 +- 16 files changed, 24 insertions(+), 24 deletions(-) diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index d0e17410791..496ecdbda7b 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -380,7 +380,7 @@ High compression levels are useful for asymmetric scenarios, like compress once, `DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library. Some limitations apply: -- DEFLATE_QPL is disabled by default and can only be used after setting configuration parameter `enable_qpl_deflate_codec=1`. +- DEFLATE_QPL is disabled by default and can only be used after setting configuration parameter `enable_deflate_qpl_codec = 1`. - DEFLATE_QPL requires a ClickHouse build compiled with SSE 4.2 instructions (by default, this is the case). Refer to [Build Clickhouse with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Build-Clickhouse-with-DEFLATE_QPL) for more details. - DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device. Refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) and [Benchmark with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Run-Benchmark-with-DEFLATE_QPL) for more details. - DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with SSE 4.2 enabled. diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index ac8e6654e84..636532ade4b 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -588,7 +588,7 @@ void Connection::sendQuery( if (method == "ZSTD") level = settings->network_zstd_compression_level; - CompressionCodecFactory::instance().validateCodec(method, level, !settings->allow_suspicious_codecs, settings->allow_experimental_codecs, settings->enable_qpl_deflate_codec); + CompressionCodecFactory::instance().validateCodec(method, level, !settings->allow_suspicious_codecs, settings->allow_experimental_codecs, settings->enable_deflate_qpl_codec); compression_codec = CompressionCodecFactory::instance().get(method, level); } else diff --git a/src/Compression/CompressionFactory.h b/src/Compression/CompressionFactory.h index e020e51bb09..4f2627587a3 100644 --- a/src/Compression/CompressionFactory.h +++ b/src/Compression/CompressionFactory.h @@ -40,10 +40,10 @@ public: CompressionCodecPtr getDefaultCodec() const; /// Validate codecs AST specified by user and parses codecs description (substitute default parameters) - ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_qpl_deflate_codec) const; + ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec) const; /// Validate codecs AST specified by user - void validateCodec(const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_qpl_deflate_codec) const; + void validateCodec(const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec) const; /// Get codec by AST and possible column_type. Some codecs can use /// information about type to improve inner settings, but every codec should diff --git a/src/Compression/CompressionFactoryAdditions.cpp b/src/Compression/CompressionFactoryAdditions.cpp index b4a2d96cf39..46f7e2653c2 100644 --- a/src/Compression/CompressionFactoryAdditions.cpp +++ b/src/Compression/CompressionFactoryAdditions.cpp @@ -34,7 +34,7 @@ namespace ErrorCodes void CompressionCodecFactory::validateCodec( - const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_qpl_deflate_codec) const + const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec) const { if (family_name.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Compression codec name cannot be empty"); @@ -43,13 +43,13 @@ void CompressionCodecFactory::validateCodec( { auto literal = std::make_shared(static_cast(*level)); validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", makeASTFunction(Poco::toUpper(family_name), literal)), - {}, sanity_check, allow_experimental_codecs, enable_qpl_deflate_codec); + {}, sanity_check, allow_experimental_codecs, enable_deflate_qpl_codec); } else { auto identifier = std::make_shared(Poco::toUpper(family_name)); validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", identifier), - {}, sanity_check, allow_experimental_codecs, enable_qpl_deflate_codec); + {}, sanity_check, allow_experimental_codecs, enable_deflate_qpl_codec); } } @@ -77,7 +77,7 @@ bool innerDataTypeIsFloat(const DataTypePtr & type) } ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( - const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_qpl_deflate_codec) const + const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec) const { if (const auto * func = ast->as()) { @@ -159,10 +159,10 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( " You can enable it with the 'allow_experimental_codecs' setting.", codec_family_name); - if (!enable_qpl_deflate_codec && result_codec->isDeflateQplCompression()) + if (!enable_deflate_qpl_codec && result_codec->isDeflateQplCompression()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec {} is disabled by default." - " You can enable it with the 'enable_qpl_deflate_codec' setting.", + " You can enable it with the 'enable_deflate_qpl_codec' setting.", codec_family_name); codecs_descriptions->children.emplace_back(result_codec->getCodecDesc()); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4aae8f5d572..e0034174597 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -319,7 +319,7 @@ class IColumn; M(Bool, allow_distributed_ddl, true, "If it is set to true, then a user is allowed to executed distributed DDL queries.", 0) \ M(Bool, allow_suspicious_codecs, false, "If it is set to true, allow to specify meaningless compression codecs.", 0) \ M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \ - M(Bool, enable_qpl_deflate_codec, false, "If it is set to true, allow usage of the DEFLATE_QPL codec.", 0) \ + M(Bool, enable_deflate_qpl_codec, false, "Enable/disable the DEFLATE_QPL codec.", 0) \ M(UInt64, query_profiler_real_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, "Period for real clock timer of query profiler (in nanoseconds). Set 0 value to turn off the real clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(UInt64, query_profiler_cpu_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, "Period for CPU clock timer of query profiler (in nanoseconds). Set 0 value to turn off the CPU clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(Bool, metrics_perf_events_enabled, false, "If enabled, some of the perf events will be measured throughout queries' execution.", 0) \ diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ddb53bbbfaa..d0bb3dd389f 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -571,7 +571,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( bool sanity_check_compression_codecs = !attach && !context_->getSettingsRef().allow_suspicious_codecs; bool allow_experimental_codecs = attach || context_->getSettingsRef().allow_experimental_codecs; - bool enable_qpl_deflate_codec = attach || context_->getSettingsRef().enable_qpl_deflate_codec; + bool enable_deflate_qpl_codec = attach || context_->getSettingsRef().enable_deflate_qpl_codec; ColumnsDescription res; auto name_type_it = column_names_and_types.begin(); @@ -632,7 +632,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( if (col_decl.default_specifier == "ALIAS") throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot specify codec for column type ALIAS"); column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST( - col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_qpl_deflate_codec); + col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_deflate_qpl_codec); } if (col_decl.ttl) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index b43fef9dd54..50e9d50e2f6 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1775,7 +1775,7 @@ void TCPHandler::initBlockOutput(const Block & block) if (state.compression == Protocol::Compression::Enable) { - CompressionCodecFactory::instance().validateCodec(method, level, !query_settings.allow_suspicious_codecs, query_settings.allow_experimental_codecs, query_settings.enable_qpl_deflate_codec); + CompressionCodecFactory::instance().validateCodec(method, level, !query_settings.allow_suspicious_codecs, query_settings.allow_experimental_codecs, query_settings.enable_deflate_qpl_codec); state.maybe_compressed_out = std::make_shared( *out, CompressionCodecFactory::instance().get(method, level)); diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 73d7be8dc56..a9247f9b898 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -1067,7 +1067,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const "this column name is reserved for lightweight delete feature", backQuote(column_name)); if (command.codec) - CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_qpl_deflate_codec); + CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_deflate_qpl_codec); all_columns.add(ColumnDescription(column_name, command.data_type)); } @@ -1093,7 +1093,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const { if (all_columns.hasAlias(column_name)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot specify codec for column type ALIAS"); - CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_qpl_deflate_codec); + CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_deflate_qpl_codec); } auto column_default = all_columns.getDefault(column_name); if (column_default) diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index e383890d1f7..1e1c911920e 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -733,7 +733,7 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const if (compression_method == "ZSTD") compression_level = settings.network_zstd_compression_level; - CompressionCodecFactory::instance().validateCodec(compression_method, compression_level, !settings.allow_suspicious_codecs, settings.allow_experimental_codecs, settings.enable_qpl_deflate_codec); + CompressionCodecFactory::instance().validateCodec(compression_method, compression_level, !settings.allow_suspicious_codecs, settings.allow_experimental_codecs, settings.enale_deflate_qpl_codec); CompressionCodecPtr compression_codec = CompressionCodecFactory::instance().get(compression_method, compression_level); /// tmp directory is used to ensure atomicity of transactions diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index a437465b3fe..f601fed06ac 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -285,7 +285,7 @@ TTLDescription TTLDescription::getTTLFromAST( { result.recompression_codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST( - ttl_element->recompression_codec, {}, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_qpl_deflate_codec); + ttl_element->recompression_codec, {}, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_deflate_qpl_codec); } } diff --git a/tests/ci/stress.py b/tests/ci/stress.py index e5ceb251d0f..6d17384c63f 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -20,7 +20,7 @@ def get_options(i, upgrade_check): '''--db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i) ) client_options.append("allow_experimental_database_replicated=1") - client_options.append("enable_qpl_deflate_codec=1") + client_options.append("enable_deflate_qpl_codec=1") # If database name is not specified, new database is created for each functional test. # Run some threads with one database for all tests. diff --git a/tests/integration/test_non_default_compression/configs/enable_deflateqpl_codec.xml b/tests/integration/test_non_default_compression/configs/enable_deflateqpl_codec.xml index 521b0fd663c..24e101e0e3f 100644 --- a/tests/integration/test_non_default_compression/configs/enable_deflateqpl_codec.xml +++ b/tests/integration/test_non_default_compression/configs/enable_deflateqpl_codec.xml @@ -1,7 +1,7 @@ - 1 + 1 diff --git a/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql b/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql index 5b8b73270a2..fd9855e82d3 100644 --- a/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql +++ b/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql @@ -28,7 +28,7 @@ SELECT * FROM alter_compression_codec ORDER BY id; OPTIMIZE TABLE alter_compression_codec FINAL; SELECT * FROM alter_compression_codec ORDER BY id; -SET enable_qpl_deflate_codec = 1; +SET enable_deflate_qpl_codec = 1; ALTER TABLE alter_compression_codec MODIFY COLUMN alter_column CODEC(DEFLATE_QPL); SELECT compression_codec FROM system.columns WHERE database = currentDatabase() AND table = 'alter_compression_codec' AND name = 'alter_column'; diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql b/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql index 47ec268bfec..89e77f758a7 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql +++ b/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql @@ -1,6 +1,6 @@ SET send_logs_level = 'fatal'; SET allow_suspicious_codecs = 1; -SET enable_qpl_deflate_codec = 1; +SET enable_deflate_qpl_codec = 1; DROP TABLE IF EXISTS compression_codec; diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql index bcd09277824..a629df2666d 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql +++ b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql @@ -1,6 +1,6 @@ SET send_logs_level = 'fatal'; SET allow_suspicious_codecs = 1; -SET enable_qpl_deflate_codec = 1; +SET enable_deflate_qpl_codec = 1; -- copy-paste for storage log diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql index 64e66d47522..5a56fc0d576 100644 --- a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql +++ b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql @@ -1,5 +1,5 @@ SET send_logs_level = 'fatal'; -SET enable_qpl_deflate_codec = 1; +SET enable_deflate_qpl_codec = 1; DROP TABLE IF EXISTS compression_codec; From 07582d56f32d72a3f13d9e7303310ff1753c97f5 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 9 Jun 2023 13:18:13 +0000 Subject: [PATCH 0267/1997] Add config for pinning api version --- src/Coordination/FourLetterCommand.cpp | 2 +- src/Coordination/KeeperConstants.h | 4 +- src/Coordination/KeeperContext.cpp | 37 ++++++++ src/Coordination/KeeperContext.h | 6 ++ src/Coordination/KeeperServer.cpp | 2 + src/Coordination/KeeperSnapshotManager.cpp | 2 +- src/Coordination/KeeperStorage.cpp | 2 +- src/Coordination/tests/gtest_coordination.cpp | 2 +- .../__init__.py | 0 .../configs/enable_keeper.xml | 31 +++++++ .../test_keeper_api_version_config/test.py | 87 +++++++++++++++++++ 11 files changed, 169 insertions(+), 6 deletions(-) create mode 100644 src/Coordination/KeeperContext.cpp create mode 100644 tests/integration/test_keeper_api_version_config/__init__.py create mode 100644 tests/integration/test_keeper_api_version_config/configs/enable_keeper.xml create mode 100644 tests/integration/test_keeper_api_version_config/test.py diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 7077e792fd8..a64969e3d31 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -486,7 +486,7 @@ String RecoveryCommand::run() String ApiVersionCommand::run() { - return toString(static_cast(Coordination::current_keeper_api_version)); + return toString(static_cast(Coordination::latest_keeper_api_version)); } String CreateSnapshotCommand::run() diff --git a/src/Coordination/KeeperConstants.h b/src/Coordination/KeeperConstants.h index 4b5a5b54be0..42161eee908 100644 --- a/src/Coordination/KeeperConstants.h +++ b/src/Coordination/KeeperConstants.h @@ -13,7 +13,7 @@ enum class KeeperApiVersion : uint8_t WITH_CHECK_NOT_EXISTS, }; -inline constexpr auto current_keeper_api_version = KeeperApiVersion::WITH_CHECK_NOT_EXISTS; +inline constexpr auto latest_keeper_api_version = KeeperApiVersion::WITH_CHECK_NOT_EXISTS; const std::string keeper_system_path = "/keeper"; const std::string keeper_api_version_path = keeper_system_path + "/api_version"; @@ -21,7 +21,7 @@ const std::string keeper_api_version_path = keeper_system_path + "/api_version"; using PathWithData = std::pair; const std::vector child_system_paths_with_data { - {keeper_api_version_path, toString(static_cast(current_keeper_api_version))} + {keeper_api_version_path, toString(static_cast(latest_keeper_api_version))} }; } diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp new file mode 100644 index 00000000000..d420242e670 --- /dev/null +++ b/src/Coordination/KeeperContext.cpp @@ -0,0 +1,37 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +DECLARE_SETTING_ENUM(KeeperApiVersion); +IMPLEMENT_SETTING_ENUM(KeeperApiVersion, ErrorCodes::BAD_ARGUMENTS, + {{"ZOOKEEPER_COMPATIBLE", KeeperApiVersion::ZOOKEEPER_COMPATIBLE}, + {"WITH_FILTERED_LIST", KeeperApiVersion::WITH_FILTERED_LIST}, + {"WITH_MULTI_READ", KeeperApiVersion::WITH_MULTI_READ}, + {"WITH_CHECK_NOT_EXISTS", KeeperApiVersion::WITH_CHECK_NOT_EXISTS}}); + +void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config) +{ + for (const auto & [path, data] : child_system_paths_with_data) + system_nodes_with_data[std::string{path}] = data; + + if (config.has("keeper_server.api_version")) + { + auto version_string = config.getString("keeper_server.api_version"); + auto api_version = SettingFieldKeeperApiVersionTraits::fromString(version_string); + LOG_INFO(&Poco::Logger::get("KeeperContext"), "API version override used: {}", version_string); + system_nodes_with_data[keeper_api_version_path] = toString(static_cast(api_version)); + } +} + +} diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index 64fa8cea6ec..f26009c9af0 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -1,10 +1,14 @@ #pragma once +#include + namespace DB { struct KeeperContext { + void initialize(const Poco::Util::AbstractConfiguration & config); + enum class Phase : uint8_t { INIT, @@ -16,6 +20,8 @@ struct KeeperContext bool ignore_system_path_on_startup{false}; bool digest_enabled{true}; + + std::unordered_map system_nodes_with_data; }; using KeeperContextPtr = std::shared_ptr; diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 45db9e85fa5..897d7e05671 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -119,6 +119,8 @@ KeeperServer::KeeperServer( if (coordination_settings->quorum_reads) LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower."); + keeper_context->initialize(config); + keeper_context->digest_enabled = config.getBool("keeper_server.digest_enabled", false); keeper_context->ignore_system_path_on_startup = config.getBool("keeper_server.ignore_system_path_on_startup", false); diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index 8b80db3e520..a2d9d8136cd 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -185,7 +185,7 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr } /// Serialize data tree - writeBinary(snapshot.snapshot_container_size - child_system_paths_with_data.size(), out); + writeBinary(snapshot.snapshot_container_size - keeper_context->system_nodes_with_data.size(), out); size_t counter = 0; for (auto it = snapshot.begin; counter < snapshot.snapshot_container_size; ++counter) { diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 66d6b0f5843..8abcd062b7f 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -283,7 +283,7 @@ void KeeperStorage::initializeSystemNodes() } // insert child system nodes - for (const auto & [path, data] : child_system_paths_with_data) + for (const auto & [path, data] : keeper_context->system_nodes_with_data) { assert(keeper_api_version_path.starts_with(keeper_system_path)); Node child_system_node; diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 453fd0f2e60..005c67ad261 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -2357,7 +2357,7 @@ TEST_P(CoordinationTest, TestCurrentApiVersion) uint8_t keeper_version{0}; DB::ReadBufferFromOwnString buf(get_response.data); DB::readIntText(keeper_version, buf); - EXPECT_EQ(keeper_version, static_cast(current_keeper_api_version)); + EXPECT_EQ(keeper_version, static_cast(latest_keeper_api_version)); } TEST_P(CoordinationTest, TestSystemNodeModify) diff --git a/tests/integration/test_keeper_api_version_config/__init__.py b/tests/integration/test_keeper_api_version_config/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_api_version_config/configs/enable_keeper.xml b/tests/integration/test_keeper_api_version_config/configs/enable_keeper.xml new file mode 100644 index 00000000000..c153d025598 --- /dev/null +++ b/tests/integration/test_keeper_api_version_config/configs/enable_keeper.xml @@ -0,0 +1,31 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 10 + 5 + 5000 + 10000 + trace + + + 0 + 0 + 0 + + + + + + + 1 + localhost + 9234 + + + + diff --git a/tests/integration/test_keeper_api_version_config/test.py b/tests/integration/test_keeper_api_version_config/test.py new file mode 100644 index 00000000000..34d3acc4f04 --- /dev/null +++ b/tests/integration/test_keeper_api_version_config/test.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 + +import pytest +import os +from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils +from kazoo.client import KazooClient, KazooState + +CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) +cluster = ClickHouseCluster(__file__) + +# clickhouse itself will use external zookeeper +node = cluster.add_instance( + "node", + main_configs=["configs/enable_keeper.xml"], + stay_alive=True, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + + +def get_connection_zk(nodename, timeout=30.0): + _fake_zk_instance = KazooClient( + hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout + ) + _fake_zk_instance.start() + return _fake_zk_instance + + +def restart_clickhouse(api_version=None, expect_fail=True): + node.stop_clickhouse() + node.copy_file_to_container( + os.path.join(CURRENT_TEST_DIR, "configs/enable_keeper.xml"), + "/etc/clickhouse-server/config.d/enable_keeper.xml", + ) + + if api_version: + node.replace_in_config( + "/etc/clickhouse-server/config.d/enable_keeper.xml", + "", + f"{api_version}<\\/api_version>", + ) + + node.start_clickhouse(retry_start=not expect_fail) + keeper_utils.wait_until_connected(cluster, node) + + +def test_keeper_api_version(started_cluster): + restart_clickhouse() + + def assert_version(string_version, version_number): + node.wait_for_log_line( + f"Detected server's API version: {string_version}", look_behind_lines=1000 + ) + + try: + node_zk = get_connection_zk(node.name) + assert node_zk.get("/keeper/api_version")[0] == str(version_number).encode() + finally: + if node_zk: + node_zk.stop() + node_zk.close() + + assert_version("WITH_CHECK_NOT_EXISTS", 3) + + for i, version in enumerate( + [ + "ZOOKEEPER_COMPATIBLE", + "WITH_FILTERED_LIST", + "WITH_MULTI_READ", + "WITH_CHECK_NOT_EXISTS", + ] + ): + restart_clickhouse(version) + assert_version(version, i) + + with pytest.raises(Exception): + restart_clickhouse("INVALID_VERSION", expect_fail=True) From b8b70d78e15948dd278d298d42bc1e4ef2d4fc0d Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 9 Jun 2023 13:34:27 +0000 Subject: [PATCH 0268/1997] Build fix --- contrib/azure-cmake/CMakeLists.txt | 2 +- programs/keeper/CMakeLists.txt | 1 + src/Coordination/KeeperContext.cpp | 7 +++++-- src/Coordination/KeeperContext.h | 2 ++ 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/contrib/azure-cmake/CMakeLists.txt b/contrib/azure-cmake/CMakeLists.txt index 9c361db47ca..23e38e6b63d 100644 --- a/contrib/azure-cmake/CMakeLists.txt +++ b/contrib/azure-cmake/CMakeLists.txt @@ -1,6 +1,6 @@ option (ENABLE_AZURE_BLOB_STORAGE "Enable Azure blob storage" ${ENABLE_LIBRARIES}) -if (NOT ENABLE_AZURE_BLOB_STORAGE OR BUILD_STANDALONE_KEEPER OR OS_FREEBSD OR ARCH_PPC64LE) +if (NOT ENABLE_AZURE_BLOB_STORAGE OR OS_FREEBSD OR ARCH_PPC64LE) message(STATUS "Not using Azure blob storage") return() endif() diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 1f1138f49eb..4f74cc06801 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -43,6 +43,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperDispatcher.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperLogStore.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperServer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperContext.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManagerS3.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateMachine.cpp diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index d420242e670..a750f2e1860 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -20,11 +20,14 @@ IMPLEMENT_SETTING_ENUM(KeeperApiVersion, ErrorCodes::BAD_ARGUMENTS, {"WITH_MULTI_READ", KeeperApiVersion::WITH_MULTI_READ}, {"WITH_CHECK_NOT_EXISTS", KeeperApiVersion::WITH_CHECK_NOT_EXISTS}}); -void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config) +KeeperContext::KeeperContext() { for (const auto & [path, data] : child_system_paths_with_data) system_nodes_with_data[std::string{path}] = data; - +} + +void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config) +{ if (config.has("keeper_server.api_version")) { auto version_string = config.getString("keeper_server.api_version"); diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index f26009c9af0..de502b6c566 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -7,6 +7,8 @@ namespace DB struct KeeperContext { + KeeperContext(); + void initialize(const Poco::Util::AbstractConfiguration & config); enum class Phase : uint8_t From f8791a0ea393120dbfba8eec8627edbc8d00deb8 Mon Sep 17 00:00:00 2001 From: Jordi Villar Date: Fri, 9 Jun 2023 15:36:48 +0200 Subject: [PATCH 0269/1997] SummingMergeTree support for DateTime64 --- src/DataTypes/DataTypeDateTime64.h | 2 ++ .../02785_summing_merge_tree_datetime64.reference | 1 + .../02785_summing_merge_tree_datetime64.sql | 12 ++++++++++++ 3 files changed, 15 insertions(+) create mode 100644 tests/queries/0_stateless/02785_summing_merge_tree_datetime64.reference create mode 100644 tests/queries/0_stateless/02785_summing_merge_tree_datetime64.sql diff --git a/src/DataTypes/DataTypeDateTime64.h b/src/DataTypes/DataTypeDateTime64.h index aaa99485040..64cedd798d1 100644 --- a/src/DataTypes/DataTypeDateTime64.h +++ b/src/DataTypes/DataTypeDateTime64.h @@ -37,6 +37,8 @@ public: bool canBeUsedAsVersion() const override { return true; } + bool isSummable() const override { return false; } + protected: SerializationPtr doGetDefaultSerialization() const override; }; diff --git a/tests/queries/0_stateless/02785_summing_merge_tree_datetime64.reference b/tests/queries/0_stateless/02785_summing_merge_tree_datetime64.reference new file mode 100644 index 00000000000..d395c4d6a0f --- /dev/null +++ b/tests/queries/0_stateless/02785_summing_merge_tree_datetime64.reference @@ -0,0 +1 @@ +1 2023-05-01 23:55:55.100 15 diff --git a/tests/queries/0_stateless/02785_summing_merge_tree_datetime64.sql b/tests/queries/0_stateless/02785_summing_merge_tree_datetime64.sql new file mode 100644 index 00000000000..1ed930ebbc7 --- /dev/null +++ b/tests/queries/0_stateless/02785_summing_merge_tree_datetime64.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS summing_merge_tree_datetime64; + +CREATE TABLE summing_merge_tree_datetime64 ( `pk` UInt64, `timestamp` DateTime64(3), `value` UInt64 ) +ENGINE = SummingMergeTree() ORDER BY pk; + +INSERT INTO summing_merge_tree_datetime64 SELECT 1 pk, '2023-05-01 23:55:55.100' timestamp, 1 value; +INSERT INTO summing_merge_tree_datetime64 SELECT 1 pk, '2023-05-01 23:55:55.100' timestamp, 2 value; +INSERT INTO summing_merge_tree_datetime64 SELECT 1 pk, '2023-05-01 23:55:55.100' timestamp, 3 value; +INSERT INTO summing_merge_tree_datetime64 SELECT 1 pk, '2023-05-01 23:55:55.100' timestamp, 4 value; +INSERT INTO summing_merge_tree_datetime64 SELECT 1 pk, '2023-05-01 23:55:55.100' timestamp, 5 value; + +SELECT * FROM summing_merge_tree_datetime64 FINAL; From 1bce32c1cc1d9e2b0aeea93c01947646e18c52b3 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 9 Jun 2023 15:18:46 +0000 Subject: [PATCH 0270/1997] Add tests for wrong settings --- .../configs/config_invalid_chars.xml | 12 +++++++ .../configs/config_no_encryption_codecs.xml | 4 +++ .../configs/config_subnodes.xml | 10 ++++++ .../configs/config_wrong_method.xml | 12 +++++++ .../test_wrong_settings.py | 34 +++++++++++++++++++ 5 files changed, 72 insertions(+) create mode 100644 tests/integration/test_config_decryption/configs/config_invalid_chars.xml create mode 100644 tests/integration/test_config_decryption/configs/config_no_encryption_codecs.xml create mode 100644 tests/integration/test_config_decryption/configs/config_subnodes.xml create mode 100644 tests/integration/test_config_decryption/configs/config_wrong_method.xml create mode 100644 tests/integration/test_config_decryption/test_wrong_settings.py diff --git a/tests/integration/test_config_decryption/configs/config_invalid_chars.xml b/tests/integration/test_config_decryption/configs/config_invalid_chars.xml new file mode 100644 index 00000000000..49bf51b5bad --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config_invalid_chars.xml @@ -0,0 +1,12 @@ + + + + 00112233445566778899aabbccddeeff + + + 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff + + + --96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + diff --git a/tests/integration/test_config_decryption/configs/config_no_encryption_codecs.xml b/tests/integration/test_config_decryption/configs/config_no_encryption_codecs.xml new file mode 100644 index 00000000000..07bf69d17c8 --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config_no_encryption_codecs.xml @@ -0,0 +1,4 @@ + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + diff --git a/tests/integration/test_config_decryption/configs/config_subnodes.xml b/tests/integration/test_config_decryption/configs/config_subnodes.xml new file mode 100644 index 00000000000..b0e519ff546 --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config_subnodes.xml @@ -0,0 +1,10 @@ + + + + 00112233445566778899aabbccddeeff + + + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + + diff --git a/tests/integration/test_config_decryption/configs/config_wrong_method.xml b/tests/integration/test_config_decryption/configs/config_wrong_method.xml new file mode 100644 index 00000000000..b452ce6374c --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config_wrong_method.xml @@ -0,0 +1,12 @@ + + + + 00112233445566778899aabbccddeeff + + + 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff + + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + diff --git a/tests/integration/test_config_decryption/test_wrong_settings.py b/tests/integration/test_config_decryption/test_wrong_settings.py new file mode 100644 index 00000000000..c01f5050b00 --- /dev/null +++ b/tests/integration/test_config_decryption/test_wrong_settings.py @@ -0,0 +1,34 @@ +import pytest +from helpers.cluster import ClickHouseCluster + + +def start_clickhouse(config, err_msg): + cluster = ClickHouseCluster(__file__) + node = cluster.add_instance("node", main_configs=[config]) + caught_exception = "" + try: + cluster.start() + except Exception as e: + caught_exception = str(e) + assert caught_exception.find(err_msg) != -1 + + +def test_wrong_method(): + start_clickhouse("configs/config_wrong_method.xml", "Wrong encryption Method") + + +def test_invalid_chars(): + start_clickhouse( + "configs/config_invalid_chars.xml", + "Cannot read encrypted text, check for valid characters", + ) + + +def test_no_encryption_codecs(): + start_clickhouse( + "configs/config_no_encryption_codecs.xml", "There is no key 0 in config" + ) + + +def test_subnodes(): + start_clickhouse("configs/config_subnodes.xml", "should have only one text node") From 9a4043a4b4c97bcfb7eb345e0753b27228c2f4f7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 9 Jun 2023 17:51:59 +0000 Subject: [PATCH 0271/1997] Fixing more tests. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 1 - src/Interpreters/PreparedSets.h | 6 ++ src/Planner/CollectSets.cpp | 25 +++--- src/Planner/CollectSets.h | 2 +- src/Planner/Planner.cpp | 103 +++++++++++----------- src/Planner/Utils.cpp | 4 +- src/Storages/StorageDistributed.cpp | 11 ++- 7 files changed, 82 insertions(+), 70 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index b39aff86d32..bab64480901 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -2333,7 +2333,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveTableIdentifierFromDatabaseCatalog(con auto storage_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); auto storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context); - auto result = std::make_shared(std::move(storage), std::move(storage_lock), std::move(storage_snapshot)); if (is_temporary_table) result->setTemporaryTableName(table_name); diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index fa7f7069994..a119c24ad10 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -29,6 +29,9 @@ class Set; using SetPtr = std::shared_ptr; class InterpreterSelectWithUnionQuery; +class IQueryTreeNode; +using QueryTreeNodePtr = std::shared_ptr; + /// Represents a set in a query that might be referenced at analysis time and built later during execution. /// Also it can represent a constant set that is ready to use. /// At analysis stage the FutureSets are created but not necessarily filled. Then for non-constant sets there @@ -131,6 +134,7 @@ public: /// The source is obtained using the InterpreterSelectQuery subquery. std::unique_ptr source; + QueryTreeNodePtr query_tree; }; class FutureSetFromSubquery : public FutureSet, public std::enable_shared_from_this @@ -153,6 +157,8 @@ public: // void addStorage(StoragePtr storage) { subquery.table = std::move(storage); } + SubqueryForSet & getSubquery() { return subquery; } + private: //SetPtr set; SubqueryForSet subquery; diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp index 5f44994c14b..b3e3f5f472a 100644 --- a/src/Planner/CollectSets.cpp +++ b/src/Planner/CollectSets.cpp @@ -28,9 +28,9 @@ namespace class CollectSetsVisitor : public ConstInDepthQueryTreeVisitor { public: - explicit CollectSetsVisitor(PlannerContext & planner_context_, const SelectQueryOptions & select_query_options_) + explicit CollectSetsVisitor(PlannerContext & planner_context_) //, const SelectQueryOptions & select_query_options_) : planner_context(planner_context_) - , select_query_options(select_query_options_) + //, select_query_options(select_query_options_) {} void visitImpl(const QueryTreeNodePtr & node) @@ -95,12 +95,12 @@ public: if (sets.getFuture(set_key)) return; - auto subquery_options = select_query_options.subquery(); - Planner subquery_planner( - in_second_argument, - subquery_options, - planner_context.getGlobalPlannerContext()); - subquery_planner.buildQueryPlanIfNeeded(); + // auto subquery_options = select_query_options.subquery(); + // Planner subquery_planner( + // in_second_argument, + // subquery_options, + // planner_context.getGlobalPlannerContext()); + // subquery_planner.buildQueryPlanIfNeeded(); // const auto & settings = planner_context.getQueryContext()->getSettingsRef(); // SizeLimits size_limits_for_set = {settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode}; @@ -109,7 +109,8 @@ public: SubqueryForSet subquery_for_set; subquery_for_set.key = planner_context.createSetKey(in_second_argument); - subquery_for_set.source = std::make_unique(std::move(subquery_planner).extractQueryPlan()); + subquery_for_set.query_tree = in_second_argument; + //subquery_for_set.source = std::make_unique(std::move(subquery_planner).extractQueryPlan()); /// TODO sets.addFromSubquery(set_key, std::move(subquery_for_set), settings, nullptr); @@ -132,14 +133,14 @@ public: private: PlannerContext & planner_context; - const SelectQueryOptions & select_query_options; + //const SelectQueryOptions & select_query_options; }; } -void collectSets(const QueryTreeNodePtr & node, PlannerContext & planner_context, const SelectQueryOptions & select_query_options) +void collectSets(const QueryTreeNodePtr & node, PlannerContext & planner_context) //, const SelectQueryOptions & select_query_options) { - CollectSetsVisitor visitor(planner_context, select_query_options); + CollectSetsVisitor visitor(planner_context); //, select_query_options); visitor.visit(node); } diff --git a/src/Planner/CollectSets.h b/src/Planner/CollectSets.h index 57e662a392e..e0db802d3b4 100644 --- a/src/Planner/CollectSets.h +++ b/src/Planner/CollectSets.h @@ -12,6 +12,6 @@ struct SelectQueryOptions; /** Collect prepared sets and sets for subqueries that are necessary to execute IN function and its variations. * Collected sets are registered in planner context. */ -void collectSets(const QueryTreeNodePtr & node, PlannerContext & planner_context, const SelectQueryOptions & select_query_options); +void collectSets(const QueryTreeNodePtr & node, PlannerContext & planner_context); //, const SelectQueryOptions & select_query_options); } diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 3184e229c15..30510d05840 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -893,50 +894,62 @@ void addOffsetStep(QueryPlan & query_plan, const QueryAnalysisResult & query_ana query_plan.addStep(std::move(offsets_step)); } -// void addBuildSubqueriesForSetsStepIfNeeded(QueryPlan & query_plan, -// const SelectQueryOptions & select_query_options, -// const PlannerContextPtr & planner_context, -// const std::vector & result_actions_to_execute) -// { -// PreparedSets::SubqueriesForSets subqueries_for_sets; +void addBuildSubqueriesForSetsStepIfNeeded( + QueryPlan & query_plan, + const SelectQueryOptions & select_query_options, + const PlannerContextPtr & planner_context, + const std::vector & result_actions_to_execute) +{ + auto subqueries = planner_context->getPreparedSets().detachSubqueries(); + std::unordered_set useful_sets; -// for (const auto & actions_to_execute : result_actions_to_execute) -// { -// for (const auto & node : actions_to_execute->getNodes()) -// { -// const auto & set_key = node.result_name; -// auto * planner_set = planner_context->getSetOrNull(set_key); -// if (!planner_set) -// continue; + PreparedSets::SubqueriesForSets subqueries_for_sets; -// if (planner_set->getSet().isCreated() || !planner_set->getSubqueryNode()) -// continue; + for (const auto & actions_to_execute : result_actions_to_execute) + { + for (const auto & node : actions_to_execute->getNodes()) + { + if (node.column) + { + const IColumn * column = node.column.get(); + if (const auto * column_const = typeid_cast(column)) + column = &column_const->getDataColumn(); -// auto subquery_options = select_query_options.subquery(); -// Planner subquery_planner( -// planner_set->getSubqueryNode(), -// subquery_options, -// planner_context->getGlobalPlannerContext()); -// subquery_planner.buildQueryPlanIfNeeded(); + if (const auto * column_set = typeid_cast(column)) + useful_sets.insert(column_set->getData().get()); + } + } + } -// const auto & settings = planner_context->getQueryContext()->getSettingsRef(); -// SizeLimits size_limits_for_set = {settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode}; -// bool tranform_null_in = settings.transform_null_in; -// auto set = std::make_shared(size_limits_for_set, false /*fill_set_elements*/, tranform_null_in); + auto predicate = [&useful_sets](const auto & set) { return !useful_sets.contains(set.set.get()); }; + auto it = std::remove_if(subqueries.begin(), subqueries.end(), std::move(predicate)); + subqueries.erase(it, subqueries.end()); -// SubqueryForSet subquery_for_set; -// subquery_for_set.key = set_key; -// subquery_for_set.set_in_progress = set; -// subquery_for_set.set = planner_set->getSet(); -// subquery_for_set.promise_to_fill_set = planner_set->extractPromiseToBuildSet(); -// subquery_for_set.source = std::make_unique(std::move(subquery_planner).extractQueryPlan()); + for (auto & subquery : subqueries) + { + auto & subquery_for_set = subquery.set->getSubquery(); + auto subquery_options = select_query_options.subquery(); + Planner subquery_planner( + subquery_for_set.query_tree, + subquery_options, + planner_context->getGlobalPlannerContext()); + subquery_planner.buildQueryPlanIfNeeded(); -// subqueries_for_sets.emplace(set_key, std::move(subquery_for_set)); -// } -// } + subquery_for_set.source = std::make_unique(std::move(subquery_planner).extractQueryPlan()); + } -// addCreatingSetsStep(query_plan, std::move(subqueries_for_sets), planner_context->getQueryContext()); -// } + //addCreatingSetsStep(query_plan, std::move(subqueries_for_sets), planner_context->getQueryContext()); + + if (!subqueries.empty()) + { + auto step = std::make_unique( + query_plan.getCurrentDataStream(), + std::move(subqueries), + planner_context->getQueryContext()); + + query_plan.addStep(std::move(step)); + } +} /// Support for `additional_result_filter` setting void addAdditionalFilterStepIfNeeded(QueryPlan & query_plan, @@ -1197,7 +1210,7 @@ void Planner::buildPlanForQueryNode() } checkStoragesSupportTransactions(planner_context); - collectSets(query_tree, *planner_context, select_query_options); + collectSets(query_tree, *planner_context); //, select_query_options); collectTableExpressionData(query_tree, planner_context); const auto & settings = query_context->getSettingsRef(); @@ -1497,20 +1510,8 @@ void Planner::buildPlanForQueryNode() if (!select_query_options.only_analyze) { - auto subqueries = planner_context->getPreparedSets().detachSubqueries(); - - if (!subqueries.empty()) - { - auto step = std::make_unique( - query_plan.getCurrentDataStream(), - std::move(subqueries), - planner_context->getQueryContext()); - - query_plan.addStep(std::move(step)); - } - //addCreatingSetsStep(query_plan, planner_context->getPreparedSets().detachSubqueries(planner_context->getQueryContext()), planner_context->getQueryContext()); - //addBuildSubqueriesForSetsStepIfNeeded(query_plan, select_query_options, planner_context, result_actions_to_execute); + addBuildSubqueriesForSetsStepIfNeeded(query_plan, select_query_options, planner_context, result_actions_to_execute); } } diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 2b4febf58ea..372bb15822a 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -426,7 +426,7 @@ SelectQueryInfo buildSelectQueryInfo(const QueryTreeNodePtr & query_tree, const FilterDAGInfo buildFilterInfo(ASTPtr filter_expression, const QueryTreeNodePtr & table_expression, PlannerContextPtr & planner_context, - const SelectQueryOptions & select_query_options, + [[maybe_unused]] const SelectQueryOptions & select_query_options, NameSet table_expression_required_names_without_filter) { const auto & query_context = planner_context->getQueryContext(); @@ -444,7 +444,7 @@ FilterDAGInfo buildFilterInfo(ASTPtr filter_expression, } collectSourceColumns(filter_query_tree, planner_context); - collectSets(filter_query_tree, *planner_context, select_query_options); + collectSets(filter_query_tree, *planner_context); //, select_query_options); auto filter_actions_dag = std::make_shared(); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index b0fb07d69ce..64a621e5710 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -898,9 +899,13 @@ QueryTreeNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, temporary_table_expression_node->setTemporaryTableName(temporary_table_name); auto table_out = external_storage->write({}, external_storage->getInMemoryMetadataPtr(), mutable_context); - auto io = interpreter.execute(); - io.pipeline.complete(std::move(table_out)); - CompletedPipelineExecutor executor(io.pipeline); + + auto optimization_settings = QueryPlanOptimizationSettings::fromContext(mutable_context); + auto build_pipeline_settings = BuildQueryPipelineSettings::fromContext(mutable_context); + auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*query_plan.buildQueryPipeline(optimization_settings, build_pipeline_settings))); + + pipeline.complete(std::move(table_out)); + CompletedPipelineExecutor executor(pipeline); executor.execute(); mutable_context->addExternalTable(temporary_table_name, std::move(external_storage_holder)); From e094bf3247b82c94098567a4f8f172e57fcdc017 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 9 Jun 2023 18:30:47 +0000 Subject: [PATCH 0272/1997] Resolving conflicts. --- src/Planner/CollectSets.cpp | 33 ++++++++++- .../QueryPlan/ReadFromMergeTree.cpp | 58 ++++++++++++++----- 2 files changed, 76 insertions(+), 15 deletions(-) diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp index be4cb0e2e2b..68ad1ab78d3 100644 --- a/src/Planner/CollectSets.cpp +++ b/src/Planner/CollectSets.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -96,6 +97,36 @@ public: if (sets.getFuture(set_key)) return; + auto subquery_to_execute = in_second_argument; + + if (auto * table_node = in_second_argument->as()) + { + auto storage_snapshot = table_node->getStorageSnapshot(); + auto columns_to_select = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::Ordinary)); + + size_t columns_to_select_size = columns_to_select.size(); + + auto column_nodes_to_select = std::make_shared(); + column_nodes_to_select->getNodes().reserve(columns_to_select_size); + + NamesAndTypes projection_columns; + projection_columns.reserve(columns_to_select_size); + + for (auto & column : columns_to_select) + { + column_nodes_to_select->getNodes().emplace_back(std::make_shared(column, subquery_to_execute)); + projection_columns.emplace_back(column.name, column.type); + } + + auto subquery_for_table = std::make_shared(Context::createCopy(planner_context.getQueryContext())); + subquery_for_table->setIsSubquery(true); + subquery_for_table->getProjectionNode() = std::move(column_nodes_to_select); + subquery_for_table->getJoinTree() = std::move(subquery_to_execute); + subquery_for_table->resolveProjectionColumns(std::move(projection_columns)); + + subquery_to_execute = std::move(subquery_for_table); + } + // auto subquery_options = select_query_options.subquery(); // Planner subquery_planner( // in_second_argument, @@ -110,7 +141,7 @@ public: SubqueryForSet subquery_for_set; subquery_for_set.key = planner_context.createSetKey(in_second_argument); - subquery_for_set.query_tree = in_second_argument; + subquery_for_set.query_tree = std::move(subquery_to_execute); //subquery_for_set.source = std::make_unique(std::move(subquery_planner).extractQueryPlan()); /// TODO diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 8483df797ef..8f610eb4380 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include #include @@ -100,6 +102,7 @@ namespace ErrorCodes extern const int INDEX_NOT_USED; extern const int LOGICAL_ERROR; extern const int TOO_MANY_ROWS; + extern const int CANNOT_PARSE_TEXT; } static MergeTreeReaderSettings getMergeTreeReaderSettings( @@ -1245,29 +1248,56 @@ static void buildIndexes( info = &*info_copy; } + std::unordered_set ignored_index_names; + + if (settings.ignore_data_skipping_indices.changed) + { + const auto & indices = settings.ignore_data_skipping_indices.toString(); + Tokens tokens(indices.data(), indices.data() + indices.size(), settings.max_query_size); + IParser::Pos pos(tokens, static_cast(settings.max_parser_depth)); + Expected expected; + + /// Use an unordered list rather than string vector + auto parse_single_id_or_literal = [&] + { + String str; + if (!parseIdentifierOrStringLiteral(pos, expected, str)) + return false; + + ignored_index_names.insert(std::move(str)); + return true; + }; + + if (!ParserList::parseUtil(pos, expected, parse_single_id_or_literal, false)) + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse ignore_data_skipping_indices ('{}')", indices); + } + UsefulSkipIndexes skip_indexes; using Key = std::pair; std::map merged; for (const auto & index : metadata_snapshot->getSecondaryIndices()) { - auto index_helper = MergeTreeIndexFactory::instance().get(index); - if (index_helper->isMergeable()) + if (!ignored_index_names.contains(index.name)) { - auto [it, inserted] = merged.emplace(Key{index_helper->index.type, index_helper->getGranularity()}, skip_indexes.merged_indices.size()); - if (inserted) + auto index_helper = MergeTreeIndexFactory::instance().get(index); + if (index_helper->isMergeable()) { - skip_indexes.merged_indices.emplace_back(); - skip_indexes.merged_indices.back().condition = index_helper->createIndexMergedCondition(*info, metadata_snapshot); - } + auto [it, inserted] = merged.emplace(Key{index_helper->index.type, index_helper->getGranularity()}, skip_indexes.merged_indices.size()); + if (inserted) + { + skip_indexes.merged_indices.emplace_back(); + skip_indexes.merged_indices.back().condition = index_helper->createIndexMergedCondition(*info, metadata_snapshot); + } - skip_indexes.merged_indices[it->second].addIndex(index_helper); - } - else - { - auto condition = index_helper->createIndexCondition(*info, context); - if (!condition->alwaysUnknownOrTrue()) - skip_indexes.useful_indices.emplace_back(index_helper, condition); + skip_indexes.merged_indices[it->second].addIndex(index_helper); + } + else + { + auto condition = index_helper->createIndexCondition(*info, context); + if (!condition->alwaysUnknownOrTrue()) + skip_indexes.useful_indices.emplace_back(index_helper, condition); + } } } From f0d4ce4770a00a8c0cd9857a485fc8bbc124a95b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 9 Jun 2023 22:05:21 +0300 Subject: [PATCH 0273/1997] Update 02785_summing_merge_tree_datetime64.sql --- .../queries/0_stateless/02785_summing_merge_tree_datetime64.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02785_summing_merge_tree_datetime64.sql b/tests/queries/0_stateless/02785_summing_merge_tree_datetime64.sql index 1ed930ebbc7..db00f189330 100644 --- a/tests/queries/0_stateless/02785_summing_merge_tree_datetime64.sql +++ b/tests/queries/0_stateless/02785_summing_merge_tree_datetime64.sql @@ -10,3 +10,4 @@ INSERT INTO summing_merge_tree_datetime64 SELECT 1 pk, '2023-05-01 23:55:55.100' INSERT INTO summing_merge_tree_datetime64 SELECT 1 pk, '2023-05-01 23:55:55.100' timestamp, 5 value; SELECT * FROM summing_merge_tree_datetime64 FINAL; +DROP TABLE summing_merge_tree_datetime64; From c538506f2e3ba0716dcc2f13f63bb4edc1f6f33e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 9 Jun 2023 20:50:17 +0000 Subject: [PATCH 0274/1997] More fixes --- docs/en/sql-reference/statements/create/table.md | 4 ++++ src/Compression/CompressionCodecDeflateQpl.h | 3 +-- src/Compression/CompressionFactoryAdditions.cpp | 2 +- src/Compression/ICompressionCodec.h | 4 ++-- src/Storages/Distributed/DistributedSink.cpp | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 496ecdbda7b..1a72f89fb1f 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -385,6 +385,10 @@ High compression levels are useful for asymmetric scenarios, like compress once, - DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device. Refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) and [Benchmark with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Run-Benchmark-with-DEFLATE_QPL) for more details. - DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with SSE 4.2 enabled. +:::note +DEFLATE_QPL is not available in ClickHouse Cloud. +::: + ### Specialized Codecs These codecs are designed to make compression more effective by using specific features of data. Some of these codecs do not compress data themself. Instead, they prepare the data for a common purpose codec, which compresses it better than without this preparation. diff --git a/src/Compression/CompressionCodecDeflateQpl.h b/src/Compression/CompressionCodecDeflateQpl.h index 13aa8733b54..8d73568707e 100644 --- a/src/Compression/CompressionCodecDeflateQpl.h +++ b/src/Compression/CompressionCodecDeflateQpl.h @@ -98,8 +98,7 @@ public: protected: bool isCompression() const override { return true; } bool isGenericCompression() const override { return true; } - bool isExperimental() const override { return false; } - bool isDeflateQplCompression() const override { return true; } + bool isDeflateQpl() const override { return true; } UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; diff --git a/src/Compression/CompressionFactoryAdditions.cpp b/src/Compression/CompressionFactoryAdditions.cpp index 46f7e2653c2..98e9e7480da 100644 --- a/src/Compression/CompressionFactoryAdditions.cpp +++ b/src/Compression/CompressionFactoryAdditions.cpp @@ -159,7 +159,7 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( " You can enable it with the 'allow_experimental_codecs' setting.", codec_family_name); - if (!enable_deflate_qpl_codec && result_codec->isDeflateQplCompression()) + if (!enable_deflate_qpl_codec && result_codec->isDeflateQpl()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec {} is disabled by default." " You can enable it with the 'enable_deflate_qpl_codec' setting.", diff --git a/src/Compression/ICompressionCodec.h b/src/Compression/ICompressionCodec.h index f7e8f4e43d2..6630838fa64 100644 --- a/src/Compression/ICompressionCodec.h +++ b/src/Compression/ICompressionCodec.h @@ -109,8 +109,8 @@ public: /// It will not be allowed to use unless the user will turn off the safety switch. virtual bool isExperimental() const { return false; } - /// This is a knob for Deflate QPL codec. - virtual bool isDeflateQplCompression() const { return false; } + /// Is this the DEFLATE_QPL codec? + virtual bool isDeflateQpl() const { return false; } /// If it does nothing. virtual bool isNone() const { return false; } diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 1e1c911920e..875764f7633 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -733,7 +733,7 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const if (compression_method == "ZSTD") compression_level = settings.network_zstd_compression_level; - CompressionCodecFactory::instance().validateCodec(compression_method, compression_level, !settings.allow_suspicious_codecs, settings.allow_experimental_codecs, settings.enale_deflate_qpl_codec); + CompressionCodecFactory::instance().validateCodec(compression_method, compression_level, !settings.allow_suspicious_codecs, settings.allow_experimental_codecs, settings.enable_deflate_qpl_codec); CompressionCodecPtr compression_codec = CompressionCodecFactory::instance().get(compression_method, compression_level); /// tmp directory is used to ensure atomicity of transactions From 96d7b2efc9c0d4f40b919c5036fcfbe7445d10a1 Mon Sep 17 00:00:00 2001 From: tpanetti Date: Fri, 9 Jun 2023 13:50:30 -0700 Subject: [PATCH 0275/1997] Disable fasttest for MySQL Compatibility Type Conversion and refactor style for DataTypeNumberBase --- src/DataTypes/DataTypeNumberBase.cpp | 22 ------------------- .../02775_show_columns_mysql_compatibility.sh | 1 + 2 files changed, 1 insertion(+), 22 deletions(-) diff --git a/src/DataTypes/DataTypeNumberBase.cpp b/src/DataTypes/DataTypeNumberBase.cpp index e4c0fb96483..4cefc4945c6 100644 --- a/src/DataTypes/DataTypeNumberBase.cpp +++ b/src/DataTypes/DataTypeNumberBase.cpp @@ -15,50 +15,28 @@ template String DataTypeNumberBase::getSQLCompatibleName() const { if constexpr (std::is_same_v) - { return "TINYINT"; - } else if constexpr (std::is_same_v) - { return "SMALLINT"; - } else if constexpr (std::is_same_v) - { return "INTEGER"; - } else if constexpr (std::is_same_v) - { return "BIGINT"; - } else if constexpr (std::is_same_v) - { return "TINYINT UNSIGNED"; - } else if constexpr (std::is_same_v) - { return "SMALLINT UNSIGNED"; - } else if constexpr (std::is_same_v) - { return "INTEGER UNSIGNED"; - } else if constexpr (std::is_same_v) - { return "BIGINT UNSIGNED"; - } else if constexpr (std::is_same_v) - { return "FLOAT"; - } else if constexpr (std::is_same_v) - { return "DOUBLE"; - } /// Unsupported types are converted to TEXT else - { return "TEXT"; - } } template diff --git a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh index 938102cb5fc..6a546c47a38 100755 --- a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh +++ b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 4a7761c16210c7e2eccc0b26e172ec8dc7e6c183 Mon Sep 17 00:00:00 2001 From: flynn Date: Sat, 10 Jun 2023 08:26:32 +0000 Subject: [PATCH 0276/1997] Add column is_obsolete for system.settings table and related system tables --- src/Interpreters/Context.cpp | 6 ++++-- src/Storages/System/StorageSystemMergeTreeSettings.cpp | 2 ++ src/Storages/System/StorageSystemServerSettings.cpp | 2 ++ src/Storages/System/StorageSystemSettings.cpp | 2 ++ tests/queries/0_stateless/01945_show_debug_warning.expect | 2 +- tests/queries/0_stateless/01945_system_warnings.reference | 2 +- tests/queries/0_stateless/01945_system_warnings.sh | 4 ++-- 7 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 995e78d8f0b..a12117b7677 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -781,8 +781,10 @@ Strings Context::getWarnings() const { if (setting.isValueChanged() && setting.isObsolete()) { - common_warnings.emplace_back("Some obsolete setting is changed. " - "Check 'select * from system.settings where changed' and read the changelog."); + common_warnings.emplace_back( + "Obsolete setting `" + setting.getName() + + "` is changed. " + "Check 'select * from system.settings where changed' and read the changelog."); break; } } diff --git a/src/Storages/System/StorageSystemMergeTreeSettings.cpp b/src/Storages/System/StorageSystemMergeTreeSettings.cpp index 6de3fb800f4..0ddd4546208 100644 --- a/src/Storages/System/StorageSystemMergeTreeSettings.cpp +++ b/src/Storages/System/StorageSystemMergeTreeSettings.cpp @@ -21,6 +21,7 @@ NamesAndTypesList SystemMergeTreeSettings::getNamesAndTypes() {"max", std::make_shared(std::make_shared())}, {"readonly", std::make_shared()}, {"type", std::make_shared()}, + {"is_obsolete", std::make_shared()}, }; } @@ -52,6 +53,7 @@ void SystemMergeTreeSettings::fillData(MutableColumns & res_columns, res_columns[5]->insert(max); res_columns[6]->insert(writability == SettingConstraintWritability::CONST); res_columns[7]->insert(setting.getTypeName()); + res_columns[8]->insert(setting.isObsolete()); } } diff --git a/src/Storages/System/StorageSystemServerSettings.cpp b/src/Storages/System/StorageSystemServerSettings.cpp index ad52c6896ac..290b575465c 100644 --- a/src/Storages/System/StorageSystemServerSettings.cpp +++ b/src/Storages/System/StorageSystemServerSettings.cpp @@ -15,6 +15,7 @@ NamesAndTypesList StorageSystemServerSettings::getNamesAndTypes() {"changed", std::make_shared()}, {"description", std::make_shared()}, {"type", std::make_shared()}, + {"is_obsolete", std::make_shared()}, }; } @@ -33,6 +34,7 @@ void StorageSystemServerSettings::fillData(MutableColumns & res_columns, Context res_columns[3]->insert(setting.isValueChanged()); res_columns[4]->insert(setting.getDescription()); res_columns[5]->insert(setting.getTypeName()); + res_columns[6]->insert(setting.isObsolete()); } } diff --git a/src/Storages/System/StorageSystemSettings.cpp b/src/Storages/System/StorageSystemSettings.cpp index c54f7eef25f..dcb54eac0a0 100644 --- a/src/Storages/System/StorageSystemSettings.cpp +++ b/src/Storages/System/StorageSystemSettings.cpp @@ -21,6 +21,7 @@ NamesAndTypesList StorageSystemSettings::getNamesAndTypes() {"type", std::make_shared()}, {"default", std::make_shared()}, {"alias_for", std::make_shared()}, + {"is_obsolete", std::make_shared()}, }; } @@ -51,6 +52,7 @@ void StorageSystemSettings::fillData(MutableColumns & res_columns, ContextPtr co res_columns[6]->insert(writability == SettingConstraintWritability::CONST); res_columns[7]->insert(setting.getTypeName()); res_columns[8]->insert(setting.getDefaultValueString()); + res_columns[10]->insert(setting.isObsolete()); }; const auto & settings_to_aliases = Settings::Traits::settingsToAliases(); diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect index 4e6dd3e1b0f..3d5b1ca99a5 100755 --- a/tests/queries/0_stateless/01945_show_debug_warning.expect +++ b/tests/queries/0_stateless/01945_show_debug_warning.expect @@ -55,7 +55,7 @@ expect eof spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file" expect "Warnings:" -expect " * Some obsolete setting is changed." +expect " * Obsolete setting `max_memory_usage_for_all_queries` is changed." expect ":) " send -- "q\r" expect eof diff --git a/tests/queries/0_stateless/01945_system_warnings.reference b/tests/queries/0_stateless/01945_system_warnings.reference index 296a03447db..d6ae567289c 100644 --- a/tests/queries/0_stateless/01945_system_warnings.reference +++ b/tests/queries/0_stateless/01945_system_warnings.reference @@ -1,5 +1,5 @@ Server was built in debug mode. It will work slowly. 0 -Some obsolete setting is changed. Check \'select * from system.settings where changed\' and read the changelog. +Obsolete setting `multiple_joins_rewriter_version` is changed. Check \'select * from system.settings where changed\' and read the changelog. 1 1 diff --git a/tests/queries/0_stateless/01945_system_warnings.sh b/tests/queries/0_stateless/01945_system_warnings.sh index bf11cee2911..112baab614e 100755 --- a/tests/queries/0_stateless/01945_system_warnings.sh +++ b/tests/queries/0_stateless/01945_system_warnings.sh @@ -14,8 +14,8 @@ else echo "Server was built in debug mode. It will work slowly." fi -${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.warnings WHERE message LIKE '%obsolete setting%'" -${CLICKHOUSE_CLIENT} --multiple_joins_rewriter_version=42 -q "SELECT message FROM system.warnings WHERE message LIKE '%obsolete setting%'" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.warnings WHERE message LIKE '%Obsolete setting%'" +${CLICKHOUSE_CLIENT} --multiple_joins_rewriter_version=42 -q "SELECT message FROM system.warnings WHERE message LIKE '%Obsolete setting%'" # Avoid duplicated warnings ${CLICKHOUSE_CLIENT} -q "SELECT count() = countDistinct(message) FROM system.warnings" From 7bd1c183ebe535ec3f8799e82d73f9b064c967c8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 10 Jun 2023 13:16:02 +0300 Subject: [PATCH 0277/1997] Update test.py --- tests/integration/test_attach_table_normalizer/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index ddbb02bf4ef..ba0068e9c59 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -24,7 +24,7 @@ def replace_substring_to_substr(node): "-c", "sed -i 's/substring/substr/g' /var/lib/clickhouse/metadata/default/file.sql", ], - user="root", + user="root" ) From 820673a5cf3f3f1c17b781496b3ab56f72f72c08 Mon Sep 17 00:00:00 2001 From: flynn Date: Sat, 10 Jun 2023 10:16:53 +0000 Subject: [PATCH 0278/1997] update test --- .../queries/0_stateless/01221_system_settings.reference | 4 ++-- .../0_stateless/02117_show_create_table_system.reference | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01221_system_settings.reference b/tests/queries/0_stateless/01221_system_settings.reference index 399b3778b66..e9c2f3fec32 100644 --- a/tests/queries/0_stateless/01221_system_settings.reference +++ b/tests/queries/0_stateless/01221_system_settings.reference @@ -1,4 +1,4 @@ -send_timeout 300 0 Timeout for sending data to network, in seconds. If client needs to sent some data, but it did not able to send any bytes in this interval, exception is thrown. If you set this setting on client, the \'receive_timeout\' for the socket will be also set on the corresponding connection end on the server. \N \N 0 Seconds 300 -storage_policy default 0 Name of storage disk policy \N \N 0 String +send_timeout 300 0 Timeout for sending data to network, in seconds. If client needs to sent some data, but it did not able to send any bytes in this interval, exception is thrown. If you set this setting on client, the \'receive_timeout\' for the socket will be also set on the corresponding connection end on the server. \N \N 0 Seconds 300 0 +storage_policy default 0 Name of storage disk policy \N \N 0 String 0 1 1 diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index e864ba85018..38d00c15725 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -346,7 +346,8 @@ CREATE TABLE system.merge_tree_settings `min` Nullable(String), `max` Nullable(String), `readonly` UInt8, - `type` String + `type` String, + `is_obsolete` UInt8 ) ENGINE = SystemMergeTreeSettings COMMENT 'SYSTEM TABLE is built on the fly.' @@ -918,7 +919,8 @@ CREATE TABLE system.replicated_merge_tree_settings `min` Nullable(String), `max` Nullable(String), `readonly` UInt8, - `type` String + `type` String, + `is_obsolete` UInt8 ) ENGINE = SystemReplicatedMergeTreeSettings COMMENT 'SYSTEM TABLE is built on the fly.' @@ -993,7 +995,8 @@ CREATE TABLE system.settings `readonly` UInt8, `type` String, `default` String, - `alias_for` String + `alias_for` String, + `is_obsolete` UInt8 ) ENGINE = SystemSettings COMMENT 'SYSTEM TABLE is built on the fly.' From ff96c4c0d8898c15e1aea876267c65ec8b0c69f0 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 10 Jun 2023 12:09:47 +0000 Subject: [PATCH 0279/1997] Fix black --- tests/integration/test_non_default_compression/test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_non_default_compression/test.py b/tests/integration/test_non_default_compression/test.py index e1a9c1ae540..18e2eb43813 100644 --- a/tests/integration/test_non_default_compression/test.py +++ b/tests/integration/test_non_default_compression/test.py @@ -50,6 +50,7 @@ node7 = cluster.add_instance( user_configs=["configs/allow_suspicious_codecs.xml"], ) + @pytest.fixture(scope="module") def start_cluster(): try: @@ -252,6 +253,7 @@ def test_uncompressed_cache_plus_zstd_codec(start_cluster): == "10000\n" ) + def test_preconfigured_deflateqpl_codec(start_cluster): node6.query( """ @@ -268,7 +270,7 @@ def test_preconfigured_deflateqpl_codec(start_cluster): ) assert ( node6.query( - "SELECT COUNT(*) FROM compression_codec_multiple_with_key WHERE id % 2 == 0" + "SELECT COUNT(*) FROM compression_codec_multiple_with_key WHERE id % 2 == 0" ) == "2\n" ) @@ -296,9 +298,7 @@ def test_preconfigured_deflateqpl_codec(start_cluster): == "1001\n" ) assert ( - node6.query( - "SELECT SUM(somecolumn) FROM compression_codec_multiple_with_key" - ) + node6.query("SELECT SUM(somecolumn) FROM compression_codec_multiple_with_key") == str(777.777 + 88.88 + 99.99 + 1.0 * 10000) + "\n" ) assert ( From cb8c20722b8976fe0bc402498667b02c2585cc02 Mon Sep 17 00:00:00 2001 From: tpanetti Date: Sat, 10 Jun 2023 08:35:51 -0700 Subject: [PATCH 0280/1997] Rename setting and description for MySQL compatible types This renames the setting for MySQL compatible types from output_format_mysql_types to use_mysql_types_in_show_columns --- src/Core/Settings.h | 2 +- src/Storages/System/StorageSystemColumns.cpp | 4 ++-- .../0_stateless/02775_show_columns_mysql_compatibility.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b72fc037fbb..d47015ebb39 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -196,7 +196,7 @@ class IColumn; M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \ \ M(UInt64, mysql_max_rows_to_insert, 65536, "The maximum number of rows in MySQL batch insertion of the MySQL storage engine", 0) \ - M(Bool, output_format_mysql_types, false, "Use MySQL converted types when connected via MySQL compatibility", 0) \ + M(Bool, use_mysql_types_in_show_columns, false, "Use MySQL converted types when connected via MySQL compatibility for show columns query", 0) \ \ M(UInt64, optimize_min_equality_disjunction_chain_length, 3, "The minimum length of the expression `expr = x1 OR ... expr = xN` for optimization ", 0) \ \ diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 684c35709a4..e4ca6a15138 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -75,7 +75,7 @@ public: , columns_mask(std::move(columns_mask_)), max_block_size(max_block_size_) , databases(std::move(databases_)), tables(std::move(tables_)), storages(std::move(storages_)) , client_info_interface(context->getClientInfo().interface) - , use_mysql_types(context->getSettingsRef().output_format_mysql_types) + , use_mysql_types(context->getSettingsRef().use_mysql_types_in_show_columns) , total_tables(tables->size()), access(context->getAccess()) , query_id(context->getCurrentQueryId()), lock_acquire_timeout(context->getSettingsRef().lock_acquire_timeout) { @@ -133,7 +133,7 @@ protected: auto get_type_name = [this](const IDataType& type) -> std::string { - // Check if the output_format_mysql_types setting is enabled and client is connected via MySQL protocol + // Check if the use_mysql_types_in_show_columns setting is enabled and client is connected via MySQL protocol if (use_mysql_types && client_info_interface == DB::ClientInfo::Interface::MYSQL) { return type.getSQLCompatibleName(); diff --git a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh index 6a546c47a38..51c9da2a842 100755 --- a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh +++ b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh @@ -127,7 +127,7 @@ TEMP_FILE=$(mktemp) cat < $TEMP_FILE SHOW COLUMNS FROM tab; -SET output_format_mysql_types=1; +SET use_mysql_types_in_show_columns=1; SHOW COLUMNS FROM tab; SHOW EXTENDED COLUMNS FROM tab; SHOW FULL COLUMNS FROM tab; From ffb941624bc971886212e0745716e79688a154a1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 10 Jun 2023 17:01:44 +0000 Subject: [PATCH 0281/1997] Exclude some tests with QPL from fasttest --- .../0_stateless/00804_test_alter_compression_codecs.sql | 3 +++ .../0_stateless/00804_test_custom_compression_codecs.sql | 3 +++ .../00804_test_custom_compression_codes_log_storages.sql | 3 +++ .../0_stateless/00804_test_deflate_qpl_codec_compression.sql | 3 +++ 4 files changed, 12 insertions(+) diff --git a/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql b/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql index fd9855e82d3..eb1abda9a21 100644 --- a/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql +++ b/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql @@ -1,3 +1,6 @@ +--Tags: no-fasttest +-- no-fasttest because DEFLATE_QPL isn't available in fasttest + SET send_logs_level = 'fatal'; DROP TABLE IF EXISTS alter_compression_codec; diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql b/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql index 89e77f758a7..df74620a201 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql +++ b/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql @@ -1,3 +1,6 @@ +--Tags: no-fasttest +-- no-fasttest because DEFLATE_QPL isn't available in fasttest + SET send_logs_level = 'fatal'; SET allow_suspicious_codecs = 1; SET enable_deflate_qpl_codec = 1; diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql index a629df2666d..67c0074c58f 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql +++ b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql @@ -1,3 +1,6 @@ +--Tags: no-fasttest +-- no-fasttest because DEFLATE_QPL isn't available in fasttest + SET send_logs_level = 'fatal'; SET allow_suspicious_codecs = 1; SET enable_deflate_qpl_codec = 1; diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql index 5a56fc0d576..a46272112a9 100644 --- a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql +++ b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql @@ -1,3 +1,6 @@ +--Tags: no-fasttest +-- no-fasttest because DEFLATE_QPL isn't available in fasttest + SET send_logs_level = 'fatal'; SET enable_deflate_qpl_codec = 1; From 2c97a94892358f7e6b4a7aa4b25c4d46dd0f0fc0 Mon Sep 17 00:00:00 2001 From: zvonand Date: Sun, 11 Jun 2023 01:50:17 +0200 Subject: [PATCH 0282/1997] fix hdfs + style update --- src/Storages/HDFS/StorageHDFS.cpp | 115 ++++++++++++++++++++++++++++-- src/Storages/StorageFile.cpp | 20 +++--- 2 files changed, 120 insertions(+), 15 deletions(-) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 19c0840149b..379ee395939 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -63,23 +63,124 @@ namespace ErrorCodes } namespace { + Strings LSWithRegexpMatching(const String & path_for_ls, + const HDFSFSPtr & fs, + const String & for_match, + std::unordered_map * last_mod_times); + + /// When `{...}` has any `/`s, it must be processed in a different way + Strings LSWithFoldedRegexpMatching(const String & path_for_ls, + const HDFSFSPtr & fs, + std::unordered_map * last_mod_times, + const String & processed_suffix, + const String & suffix_with_globs, + const String & current_glob, + re2::RE2 & matcher, + const size_t max_depth, + const size_t next_slash_after_glob_pos) + { + /// We don't need to go all the way in every directory if max_depth is reached + /// as it is upper limit of depth by simply counting `/`s in curly braces + if (!max_depth) + return {}; + + HDFSFileInfo ls; + ls.file_info = hdfsListDirectory(fs.get(), path_for_ls.data(), &ls.length); + if (ls.file_info == nullptr && errno != ENOENT) // NOLINT + { + // ignore file not found exception, keep throw other exception, libhdfs3 doesn't have function to get exception type, so use errno. + throw Exception( + ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", path_for_ls, String(hdfsGetLastError())); + } + + Strings result; + if (!ls.file_info && ls.length > 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null"); + + for (int i = 0; i < ls.length; ++i) + { + const String full_path = String(ls.file_info[i].mName); + const size_t last_slash = full_path.rfind('/'); + const String dir_or_file_name = full_path.substr(last_slash); + const bool is_directory = ls.file_info[i].mKind == 'D'; + + if (re2::RE2::FullMatch(processed_suffix + dir_or_file_name, matcher)) + { + if (next_slash_after_glob_pos == std::string::npos) + { + result.push_back(String(ls.file_info[i].mName)); + if (last_mod_times) + (*last_mod_times)[result.back()] = ls.file_info[i].mLastMod; + } + else + { + Strings result_part = LSWithRegexpMatching(fs::path(full_path) / "" , fs, + suffix_with_globs.substr(next_slash_after_glob_pos), last_mod_times); + std::move(result_part.begin(), result_part.end(), std::back_inserter(result)); + } + } + else if (is_directory) + { + Strings result_part = LSWithFoldedRegexpMatching(fs::path(full_path).append(processed_suffix), + fs, last_mod_times, processed_suffix + dir_or_file_name, suffix_with_globs, current_glob, matcher, + max_depth - 1, next_slash_after_glob_pos); + std::move(result_part.begin(), result_part.end(), std::back_inserter(result)); + } + } + return result; + } + /* Recursive directory listing with matched paths as a result. * Have the same method in StorageFile. */ - Strings LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, const String & for_match, std::unordered_map * last_mod_times) + Strings LSWithRegexpMatching(const String & path_for_ls, + const HDFSFSPtr & fs, + const String & for_match, + std::unordered_map * last_mod_times) { - const size_t first_glob = for_match.find_first_of("*?{"); + const size_t first_glob_pos = for_match.find_first_of("*?{"); + const bool has_glob = first_glob_pos != std::string::npos; - const size_t end_of_path_without_globs = for_match.substr(0, first_glob).rfind('/'); + const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/'); const String suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/' const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/' - const size_t next_slash = suffix_with_globs.find('/', 1); - re2::RE2 matcher(makeRegexpPatternFromGlobs(suffix_with_globs.substr(0, next_slash))); + size_t slashes_in_glob = 0; + const size_t next_slash_after_glob_pos = [&](){ + if (!has_glob) + return suffix_with_globs.find('/', 1); + + size_t in_curly = 0; + for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++) { + if (*it == '{') + ++in_curly; + else if (*it == '/') + { + if (in_curly) + ++slashes_in_glob; + else + return size_t(std::distance(suffix_with_globs.begin(), it)); + } + else if (*it == '}') + --in_curly; + } + return std::string::npos; + }(); + + const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos); + + re2::RE2 matcher(makeRegexpPatternFromGlobs(current_glob)); if (!matcher.ok()) throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", for_match, matcher.error()); + if (slashes_in_glob) + { + return LSWithFoldedRegexpMatching(fs::path(prefix_without_globs), fs, last_mod_times, + "", suffix_with_globs, current_glob, matcher, + slashes_in_glob, next_slash_after_glob_pos); + } + HDFSFileInfo ls; ls.file_info = hdfsListDirectory(fs.get(), prefix_without_globs.data(), &ls.length); if (ls.file_info == nullptr && errno != ENOENT) // NOLINT @@ -96,7 +197,7 @@ namespace const String full_path = String(ls.file_info[i].mName); const size_t last_slash = full_path.rfind('/'); const String file_name = full_path.substr(last_slash); - const bool looking_for_directory = next_slash != std::string::npos; + const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos; const bool is_directory = ls.file_info[i].mKind == 'D'; /// Condition with type of current file_info means what kind of path is it in current iteration of ls if (!is_directory && !looking_for_directory) @@ -112,7 +213,7 @@ namespace { if (re2::RE2::FullMatch(file_name, matcher)) { - Strings result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, suffix_with_globs.substr(next_slash), last_mod_times); + Strings result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, suffix_with_globs.substr(next_slash_after_glob_pos), last_mod_times); /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check. std::move(result_part.begin(), result_part.end(), std::back_inserter(result)); } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 53da509d383..74303b16ee9 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -99,10 +99,14 @@ void listFilesWithRegexpMatchingImpl( bool recursive = false); /// When `{...}` has any `/`s, it must be processed in a different way -void listFilesWithFoldedRegexpMatchingImpl(const std::string & start_dir, const std::string & processed_suffix, +void listFilesWithFoldedRegexpMatchingImpl(const std::string & path_for_ls, + const std::string & processed_suffix, const std::string & suffix_with_globs, - const std::string & glob, re2::RE2 & matcher, size_t & total_bytes_to_read, - const size_t max_depth, const size_t next_slash_after_glob_pos, + const std::string & current_glob, + re2::RE2 & matcher, + size_t & total_bytes_to_read, + const size_t max_depth, + const size_t next_slash_after_glob_pos, std::vector & result) { /// We don't need to go all the way in every directory if max_depth is reached @@ -111,7 +115,7 @@ void listFilesWithFoldedRegexpMatchingImpl(const std::string & start_dir, const return; const fs::directory_iterator end; - for (fs::directory_iterator it(start_dir + processed_suffix); it != end; ++it) + for (fs::directory_iterator it(path_for_ls); it != end; ++it) { const std::string full_path = it->path().string(); const size_t last_slash = full_path.rfind('/'); @@ -133,9 +137,9 @@ void listFilesWithFoldedRegexpMatchingImpl(const std::string & start_dir, const } else if (it->is_directory()) { - listFilesWithFoldedRegexpMatchingImpl(start_dir, processed_suffix + dir_or_file_name, suffix_with_globs, - glob, matcher, total_bytes_to_read, max_depth - 1, - next_slash_after_glob_pos, result); + listFilesWithFoldedRegexpMatchingImpl(fs::path(full_path).append(processed_suffix), processed_suffix + dir_or_file_name, + suffix_with_globs, current_glob, matcher, + total_bytes_to_read, max_depth - 1, next_slash_after_glob_pos, result); } } @@ -203,7 +207,7 @@ void listFilesWithRegexpMatchingImpl( if (slashes_in_glob) { - listFilesWithFoldedRegexpMatchingImpl(prefix_without_globs, "", suffix_with_globs, + listFilesWithFoldedRegexpMatchingImpl(fs::path(prefix_without_globs), "", suffix_with_globs, current_glob, matcher, total_bytes_to_read, slashes_in_glob, next_slash_after_glob_pos, result); return; From aad7712b18fa358f60c59565d3914fafc2a3f759 Mon Sep 17 00:00:00 2001 From: zvonand Date: Sun, 11 Jun 2023 02:09:05 +0200 Subject: [PATCH 0283/1997] add existing test --- tests/integration/test_storage_hdfs/test.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index edf5344e887..0b18c0180cc 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -85,6 +85,15 @@ def test_read_write_storage_with_globs(started_cluster): assert "in readonly mode" in str(ex) +def test_storage_with_multidirectory_glob(started_cluster): + hdfs_api = started_cluster.hdfs_api + for i in ["1", "2"]: + hdfs_api.write_data(f"/multiglob/p{i}/path{i}/postfix/data{i}", f"File{i}\t{i}{i}\n") + assert hdfs_api.read_data(f"/multiglob/p{i}/path{i}/postfix/data{i}") == f"File{i}\t{i}{i}\n" + + assert node1.query("SELECT * FROM hdfs('hdfs://hdfs1:9000/multiglob/{p1/path1,p2/path2}/postfix/data{1,2}', TSV)") == f"\File1\t11\nFile2\t22\n" + + def test_read_write_table(started_cluster): hdfs_api = started_cluster.hdfs_api From 48e03ac92a457d612dd8b2e4838dce1e47e51109 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 11 Jun 2023 14:33:21 +0000 Subject: [PATCH 0284/1997] Revert "Exclude some tests with QPL from fasttest" This reverts commit ffb941624bc971886212e0745716e79688a154a1. --- .../0_stateless/00804_test_alter_compression_codecs.sql | 3 --- .../0_stateless/00804_test_custom_compression_codecs.sql | 3 --- .../00804_test_custom_compression_codes_log_storages.sql | 3 --- .../0_stateless/00804_test_deflate_qpl_codec_compression.sql | 3 --- 4 files changed, 12 deletions(-) diff --git a/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql b/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql index eb1abda9a21..fd9855e82d3 100644 --- a/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql +++ b/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql @@ -1,6 +1,3 @@ ---Tags: no-fasttest --- no-fasttest because DEFLATE_QPL isn't available in fasttest - SET send_logs_level = 'fatal'; DROP TABLE IF EXISTS alter_compression_codec; diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql b/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql index df74620a201..89e77f758a7 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql +++ b/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql @@ -1,6 +1,3 @@ ---Tags: no-fasttest --- no-fasttest because DEFLATE_QPL isn't available in fasttest - SET send_logs_level = 'fatal'; SET allow_suspicious_codecs = 1; SET enable_deflate_qpl_codec = 1; diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql index 67c0074c58f..a629df2666d 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql +++ b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql @@ -1,6 +1,3 @@ ---Tags: no-fasttest --- no-fasttest because DEFLATE_QPL isn't available in fasttest - SET send_logs_level = 'fatal'; SET allow_suspicious_codecs = 1; SET enable_deflate_qpl_codec = 1; diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql index a46272112a9..5a56fc0d576 100644 --- a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql +++ b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql @@ -1,6 +1,3 @@ ---Tags: no-fasttest --- no-fasttest because DEFLATE_QPL isn't available in fasttest - SET send_logs_level = 'fatal'; SET enable_deflate_qpl_codec = 1; From d228411f41eabf7e443fbbb2f4148880a3da78fa Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 11 Jun 2023 14:39:15 +0000 Subject: [PATCH 0285/1997] Reset modified tests --- ...04_test_alter_compression_codecs.reference | 17 ++----- .../00804_test_alter_compression_codecs.sql | 22 +++------- ...4_test_custom_compression_codecs.reference | 8 ++-- .../00804_test_custom_compression_codecs.sql | 44 ++++++++----------- ...m_compression_codes_log_storages.reference | 20 ++++----- ..._custom_compression_codes_log_storages.sql | 41 ++++++++--------- ...804_test_deflate_qpl_codec_compression.sql | 4 ++ 7 files changed, 63 insertions(+), 93 deletions(-) diff --git a/tests/queries/0_stateless/00804_test_alter_compression_codecs.reference b/tests/queries/0_stateless/00804_test_alter_compression_codecs.reference index 5c77a102740..cfbfadf1e67 100644 --- a/tests/queries/0_stateless/00804_test_alter_compression_codecs.reference +++ b/tests/queries/0_stateless/00804_test_alter_compression_codecs.reference @@ -18,7 +18,7 @@ CODEC(NONE) 2018-01-01 4 4 2018-01-01 5 5 2018-01-01 6 6 -CODEC(DEFLATE_QPL) +CODEC(ZSTD(1), LZ4HC(0), LZ4, LZ4, NONE) 2018-01-01 1 default_value 2018-01-01 2 default_value 2018-01-01 3 3 @@ -27,18 +27,7 @@ CODEC(DEFLATE_QPL) 2018-01-01 6 6 2018-01-01 7 7 2018-01-01 8 8 -CODEC(ZSTD(1), LZ4HC(0), LZ4, LZ4, DEFLATE_QPL, NONE) -2018-01-01 1 default_value -2018-01-01 2 default_value -2018-01-01 3 3 -2018-01-01 4 4 -2018-01-01 5 5 -2018-01-01 6 6 -2018-01-01 7 7 -2018-01-01 8 8 -2018-01-01 9 9 -2018-01-01 10 10 -CODEC(ZSTD(1), LZ4HC(0), LZ4, LZ4, DEFLATE_QPL, NONE) -CODEC(NONE, LZ4, LZ4HC(0), ZSTD(1), DEFLATE_QPL) +CODEC(ZSTD(1), LZ4HC(0), LZ4, LZ4, NONE) +CODEC(NONE, LZ4, LZ4HC(0), ZSTD(1)) 2 1 diff --git a/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql b/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql index fd9855e82d3..85e5f8b63ad 100644 --- a/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql +++ b/tests/queries/0_stateless/00804_test_alter_compression_codecs.sql @@ -28,20 +28,12 @@ SELECT * FROM alter_compression_codec ORDER BY id; OPTIMIZE TABLE alter_compression_codec FINAL; SELECT * FROM alter_compression_codec ORDER BY id; -SET enable_deflate_qpl_codec = 1; -ALTER TABLE alter_compression_codec MODIFY COLUMN alter_column CODEC(DEFLATE_QPL); +SET allow_suspicious_codecs = 1; +ALTER TABLE alter_compression_codec MODIFY COLUMN alter_column CODEC(ZSTD, LZ4HC, LZ4, LZ4, NONE); SELECT compression_codec FROM system.columns WHERE database = currentDatabase() AND table = 'alter_compression_codec' AND name = 'alter_column'; INSERT INTO alter_compression_codec VALUES('2018-01-01', 7, '7'); INSERT INTO alter_compression_codec VALUES('2018-01-01', 8, '8'); -SELECT * FROM alter_compression_codec ORDER BY id; - -SET allow_suspicious_codecs = 1; -ALTER TABLE alter_compression_codec MODIFY COLUMN alter_column CODEC(ZSTD, LZ4HC, LZ4, LZ4, DEFLATE_QPL, NONE); -SELECT compression_codec FROM system.columns WHERE database = currentDatabase() AND table = 'alter_compression_codec' AND name = 'alter_column'; - -INSERT INTO alter_compression_codec VALUES('2018-01-01', 9, '9'); -INSERT INTO alter_compression_codec VALUES('2018-01-01', 10, '10'); OPTIMIZE TABLE alter_compression_codec FINAL; SELECT * FROM alter_compression_codec ORDER BY id; @@ -62,17 +54,15 @@ ALTER TABLE alter_bad_codec ADD COLUMN alter_column DateTime DEFAULT '2019-01-01 ALTER TABLE alter_bad_codec ADD COLUMN alter_column DateTime DEFAULT '2019-01-01 00:00:00' CODEC(ZSTD(100)); -- { serverError 433 } -ALTER TABLE alter_bad_codec ADD COLUMN alter_column DateTime DEFAULT '2019-01-01 00:00:00' CODEC(DEFLATE_QPL(100)); -- { serverError DATA_TYPE_CANNOT_HAVE_ARGUMENTS } - DROP TABLE IF EXISTS alter_bad_codec; DROP TABLE IF EXISTS large_alter_table_00804; DROP TABLE IF EXISTS store_of_hash_00804; CREATE TABLE large_alter_table_00804 ( - somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12), DEFLATE_QPL), - id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC, DEFLATE_QPL), - data String CODEC(ZSTD(2), LZ4HC, NONE, LZ4, LZ4, DEFLATE_QPL) + somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12)), + id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC), + data String CODEC(ZSTD(2), LZ4HC, NONE, LZ4, LZ4) ) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi', min_bytes_for_wide_part = 0; INSERT INTO large_alter_table_00804 SELECT toDate('2019-01-01'), number, toString(number + rand()) FROM system.numbers LIMIT 300000; @@ -81,7 +71,7 @@ CREATE TABLE store_of_hash_00804 (hash UInt64) ENGINE = Memory(); INSERT INTO store_of_hash_00804 SELECT sum(cityHash64(*)) FROM large_alter_table_00804; -ALTER TABLE large_alter_table_00804 MODIFY COLUMN data CODEC(NONE, LZ4, LZ4HC, ZSTD, DEFLATE_QPL); +ALTER TABLE large_alter_table_00804 MODIFY COLUMN data CODEC(NONE, LZ4, LZ4HC, ZSTD); OPTIMIZE TABLE large_alter_table_00804; diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference b/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference index 8b51d65004a..7bd91e5a69b 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference +++ b/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference @@ -1,6 +1,6 @@ -1 hello 2018-12-14 2018-12-14 1.1 aaa 5 -2 world 2018-12-15 2018-12-15 2.2 bbb 6 -3 ! 2018-12-16 2018-12-16 3.3 ccc 7 +1 hello 2018-12-14 1.1 aaa 5 +2 world 2018-12-15 2.2 bbb 6 +3 ! 2018-12-16 3.3 ccc 7 2 1 world 2018-10-05 1.1 2 hello 2018-10-01 2.2 @@ -9,7 +9,7 @@ 10003 274972506.6 9175437371954010821 -CREATE TABLE default.compression_codec_multiple_more_types\n(\n `id` Decimal(38, 13) CODEC(ZSTD(1), LZ4, ZSTD(1), ZSTD(1), Delta(2), Delta(4), Delta(1), LZ4HC(0), DEFLATE_QPL),\n `data` FixedString(12) CODEC(ZSTD(1), ZSTD(1), NONE, NONE, NONE, LZ4HC(0), DEFLATE_QPL),\n `ddd.age` Array(UInt8) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8), DEFLATE_QPL),\n `ddd.Name` Array(String) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8), DEFLATE_QPL)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.compression_codec_multiple_more_types\n(\n `id` Decimal(38, 13) CODEC(ZSTD(1), LZ4, ZSTD(1), ZSTD(1), Delta(2), Delta(4), Delta(1), LZ4HC(0)),\n `data` FixedString(12) CODEC(ZSTD(1), ZSTD(1), NONE, NONE, NONE, LZ4HC(0)),\n `ddd.age` Array(UInt8) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8)),\n `ddd.Name` Array(String) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1.5555555555555 hello world! [77] ['John'] 7.1 xxxxxxxxxxxx [127] ['Henry'] ! diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql b/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql index 89e77f758a7..c080c2fc98e 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql +++ b/tests/queries/0_stateless/00804_test_custom_compression_codecs.sql @@ -1,6 +1,5 @@ SET send_logs_level = 'fatal'; SET allow_suspicious_codecs = 1; -SET enable_deflate_qpl_codec = 1; DROP TABLE IF EXISTS compression_codec; @@ -8,21 +7,20 @@ CREATE TABLE compression_codec( id UInt64 CODEC(LZ4), data String CODEC(ZSTD), ddd Date CODEC(NONE), - ddd32 Date32 CODEC(DEFLATE_QPL), somenum Float64 CODEC(ZSTD(2)), somestr FixedString(3) CODEC(LZ4HC(7)), - othernum Int64 CODEC(Delta), + othernum Int64 CODEC(Delta) ) ENGINE = MergeTree() ORDER BY tuple(); -INSERT INTO compression_codec VALUES(1, 'hello', toDate('2018-12-14'), toDate32('2018-12-14'), 1.1, 'aaa', 5); -INSERT INTO compression_codec VALUES(2, 'world', toDate('2018-12-15'), toDate32('2018-12-15'), 2.2, 'bbb', 6); -INSERT INTO compression_codec VALUES(3, '!', toDate('2018-12-16'), toDate32('2018-12-16'), 3.3, 'ccc', 7); +INSERT INTO compression_codec VALUES(1, 'hello', toDate('2018-12-14'), 1.1, 'aaa', 5); +INSERT INTO compression_codec VALUES(2, 'world', toDate('2018-12-15'), 2.2, 'bbb', 6); +INSERT INTO compression_codec VALUES(3, '!', toDate('2018-12-16'), 3.3, 'ccc', 7); SELECT * FROM compression_codec ORDER BY id; OPTIMIZE TABLE compression_codec FINAL; -INSERT INTO compression_codec VALUES(2, '', toDate('2018-12-13'), toDate32('2018-12-13'), 4.4, 'ddd', 8); +INSERT INTO compression_codec VALUES(2, '', toDate('2018-12-13'), 4.4, 'ddd', 8); DETACH TABLE compression_codec; ATTACH TABLE compression_codec; @@ -33,31 +31,25 @@ DROP TABLE IF EXISTS compression_codec; DROP TABLE IF EXISTS bad_codec; DROP TABLE IF EXISTS params_when_no_params; -DROP TABLE IF EXISTS params_when_no_params2; DROP TABLE IF EXISTS too_many_params; DROP TABLE IF EXISTS codec_multiple_direct_specification_1; DROP TABLE IF EXISTS codec_multiple_direct_specification_2; -DROP TABLE IF EXISTS codec_multiple_direct_specification_3; DROP TABLE IF EXISTS delta_bad_params1; DROP TABLE IF EXISTS delta_bad_params2; CREATE TABLE bad_codec(id UInt64 CODEC(adssadads)) ENGINE = MergeTree() order by tuple(); -- { serverError 432 } CREATE TABLE too_many_params(id UInt64 CODEC(ZSTD(2,3,4,5))) ENGINE = MergeTree() order by tuple(); -- { serverError 431 } CREATE TABLE params_when_no_params(id UInt64 CODEC(LZ4(1))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 378 } -CREATE TABLE params_when_no_params2(id UInt64 CODEC(DEFLATE_QPL(1))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 378 } CREATE TABLE codec_multiple_direct_specification_1(id UInt64 CODEC(MULTIPLE(LZ4, ZSTD))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 432 } CREATE TABLE codec_multiple_direct_specification_2(id UInt64 CODEC(multiple(LZ4, ZSTD))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 432 } -CREATE TABLE codec_multiple_direct_specification_3(id UInt64 CODEC(multiple(LZ4, DEFLATE_QPL))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 432 } CREATE TABLE delta_bad_params1(id UInt64 CODEC(Delta(3))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 433 } CREATE TABLE delta_bad_params2(id UInt64 CODEC(Delta(16))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 433 } DROP TABLE IF EXISTS bad_codec; DROP TABLE IF EXISTS params_when_no_params; -DROP TABLE IF EXISTS params_when_no_params2; DROP TABLE IF EXISTS too_many_params; DROP TABLE IF EXISTS codec_multiple_direct_specification_1; DROP TABLE IF EXISTS codec_multiple_direct_specification_2; -DROP TABLE IF EXISTS codec_multiple_direct_specification_3; DROP TABLE IF EXISTS delta_bad_params1; DROP TABLE IF EXISTS delta_bad_params2; @@ -66,10 +58,10 @@ DROP TABLE IF EXISTS compression_codec_multiple; SET network_compression_method = 'lz4hc'; CREATE TABLE compression_codec_multiple ( - id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC, Delta(4), DEFLATE_QPL), - data String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC, LZ4, LZ4, Delta(8), DEFLATE_QPL), - ddd Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD, LZ4HC, LZ4HC, DEFLATE_QPL), - somenum Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD, DEFLATE_QPL) + id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC, Delta(4)), + data String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC, LZ4, LZ4, Delta(8)), + ddd Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD, LZ4HC, LZ4HC), + somenum Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD) ) ENGINE = MergeTree() ORDER BY tuple(); INSERT INTO compression_codec_multiple VALUES (1, 'world', toDate('2018-10-05'), 1.1), (2, 'hello', toDate('2018-10-01'), 2.2), (3, 'buy', toDate('2018-10-11'), 3.3); @@ -93,15 +85,15 @@ SELECT sum(cityHash64(*)) FROM compression_codec_multiple; DROP TABLE IF EXISTS compression_codec_multiple_more_types; CREATE TABLE compression_codec_multiple_more_types ( - id Decimal128(13) CODEC(ZSTD, LZ4, ZSTD, ZSTD, Delta(2), Delta(4), Delta(1), LZ4HC, DEFLATE_QPL), - data FixedString(12) CODEC(ZSTD, ZSTD, Delta, Delta, Delta, NONE, NONE, NONE, LZ4HC, DEFLATE_QPL), - ddd Nested (age UInt8, Name String) CODEC(LZ4, LZ4HC, NONE, NONE, NONE, ZSTD, Delta(8), DEFLATE_QPL) + id Decimal128(13) CODEC(ZSTD, LZ4, ZSTD, ZSTD, Delta(2), Delta(4), Delta(1), LZ4HC), + data FixedString(12) CODEC(ZSTD, ZSTD, Delta, Delta, Delta, NONE, NONE, NONE, LZ4HC), + ddd Nested (age UInt8, Name String) CODEC(LZ4, LZ4HC, NONE, NONE, NONE, ZSTD, Delta(8)) ) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 36 } CREATE TABLE compression_codec_multiple_more_types ( - id Decimal128(13) CODEC(ZSTD, LZ4, ZSTD, ZSTD, Delta(2), Delta(4), Delta(1), LZ4HC, DEFLATE_QPL), - data FixedString(12) CODEC(ZSTD, ZSTD, NONE, NONE, NONE, LZ4HC, DEFLATE_QPL), - ddd Nested (age UInt8, Name String) CODEC(LZ4, LZ4HC, NONE, NONE, NONE, ZSTD, Delta(8), DEFLATE_QPL) + id Decimal128(13) CODEC(ZSTD, LZ4, ZSTD, ZSTD, Delta(2), Delta(4), Delta(1), LZ4HC), + data FixedString(12) CODEC(ZSTD, ZSTD, NONE, NONE, NONE, LZ4HC), + ddd Nested (age UInt8, Name String) CODEC(LZ4, LZ4HC, NONE, NONE, NONE, ZSTD, Delta(8)) ) ENGINE = MergeTree() ORDER BY tuple(); SHOW CREATE TABLE compression_codec_multiple_more_types; @@ -117,9 +109,9 @@ SET network_compression_method = 'zstd'; SET network_zstd_compression_level = 5; CREATE TABLE compression_codec_multiple_with_key ( - somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12), Delta, Delta, DEFLATE_QPL), - id UInt64 CODEC(LZ4, ZSTD, Delta, NONE, LZ4HC, Delta, DEFLATE_QPL), - data String CODEC(ZSTD(2), Delta(1), LZ4HC, NONE, LZ4, LZ4, DEFLATE_QPL) + somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12), Delta, Delta), + id UInt64 CODEC(LZ4, ZSTD, Delta, NONE, LZ4HC, Delta), + data String CODEC(ZSTD(2), Delta(1), LZ4HC, NONE, LZ4, LZ4) ) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi'; diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference index d64b8a77eed..8145ca99829 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference +++ b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference @@ -1,9 +1,9 @@ -CREATE TABLE default.compression_codec_log\n(\n `id` UInt64 CODEC(LZ4),\n `data` String CODEC(ZSTD(1)),\n `ddd` Date CODEC(NONE),\n `somenum` Float64 CODEC(ZSTD(2)),\n `somestr` FixedString(3) CODEC(LZ4HC(7)),\n `othernum` Int64 CODEC(Delta(8)),\n `qplstr` String CODEC(DEFLATE_QPL),\n `qplnum` UInt32 CODEC(DEFLATE_QPL)\n)\nENGINE = Log -1 hello 2018-12-14 1.1 aaa 5 qpl11 11 -2 world 2018-12-15 2.2 bbb 6 qpl22 22 -3 ! 2018-12-16 3.3 ccc 7 qpl33 33 +CREATE TABLE default.compression_codec_log\n(\n `id` UInt64 CODEC(LZ4),\n `data` String CODEC(ZSTD(1)),\n `ddd` Date CODEC(NONE),\n `somenum` Float64 CODEC(ZSTD(2)),\n `somestr` FixedString(3) CODEC(LZ4HC(7)),\n `othernum` Int64 CODEC(Delta(8))\n)\nENGINE = Log +1 hello 2018-12-14 1.1 aaa 5 +2 world 2018-12-15 2.2 bbb 6 +3 ! 2018-12-16 3.3 ccc 7 2 -CREATE TABLE default.compression_codec_multiple_log\n(\n `id` UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4), DEFLATE_QPL),\n `data` String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8), DEFLATE_QPL),\n `ddd` Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0), DEFLATE_QPL),\n `somenum` Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1), DEFLATE_QPL)\n)\nENGINE = Log +CREATE TABLE default.compression_codec_multiple_log\n(\n `id` UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4)),\n `data` String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8)),\n `ddd` Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0)),\n `somenum` Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1))\n)\nENGINE = Log 1 world 2018-10-05 1.1 2 hello 2018-10-01 2.2 3 buy 2018-10-11 3.3 @@ -11,12 +11,12 @@ CREATE TABLE default.compression_codec_multiple_log\n(\n `id` UInt64 CODEC(LZ 10003 274972506.6 9175437371954010821 -CREATE TABLE default.compression_codec_tiny_log\n(\n `id` UInt64 CODEC(LZ4),\n `data` String CODEC(ZSTD(1)),\n `ddd` Date CODEC(NONE),\n `somenum` Float64 CODEC(ZSTD(2)),\n `somestr` FixedString(3) CODEC(LZ4HC(7)),\n `othernum` Int64 CODEC(Delta(8)),\n `qplstr` String CODEC(DEFLATE_QPL),\n `qplnum` UInt32 CODEC(DEFLATE_QPL)\n)\nENGINE = TinyLog -1 hello 2018-12-14 1.1 aaa 5 qpl11 11 -2 world 2018-12-15 2.2 bbb 6 qpl22 22 -3 ! 2018-12-16 3.3 ccc 7 qpl33 33 +CREATE TABLE default.compression_codec_tiny_log\n(\n `id` UInt64 CODEC(LZ4),\n `data` String CODEC(ZSTD(1)),\n `ddd` Date CODEC(NONE),\n `somenum` Float64 CODEC(ZSTD(2)),\n `somestr` FixedString(3) CODEC(LZ4HC(7)),\n `othernum` Int64 CODEC(Delta(8))\n)\nENGINE = TinyLog +1 hello 2018-12-14 1.1 aaa 5 +2 world 2018-12-15 2.2 bbb 6 +3 ! 2018-12-16 3.3 ccc 7 2 -CREATE TABLE default.compression_codec_multiple_tiny_log\n(\n `id` UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4), DEFLATE_QPL),\n `data` String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8), DEFLATE_QPL),\n `ddd` Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0), DEFLATE_QPL),\n `somenum` Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1), DEFLATE_QPL)\n)\nENGINE = TinyLog +CREATE TABLE default.compression_codec_multiple_tiny_log\n(\n `id` UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4)),\n `data` String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8)),\n `ddd` Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0)),\n `somenum` Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1))\n)\nENGINE = TinyLog 1 world 2018-10-05 1.1 2 hello 2018-10-01 2.2 3 buy 2018-10-11 3.3 diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql index a629df2666d..fba6a216762 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql +++ b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.sql @@ -1,6 +1,5 @@ SET send_logs_level = 'fatal'; SET allow_suspicious_codecs = 1; -SET enable_deflate_qpl_codec = 1; -- copy-paste for storage log @@ -12,20 +11,18 @@ CREATE TABLE compression_codec_log( ddd Date CODEC(NONE), somenum Float64 CODEC(ZSTD(2)), somestr FixedString(3) CODEC(LZ4HC(7)), - othernum Int64 CODEC(Delta), - qplstr String CODEC(DEFLATE_QPL), - qplnum UInt32 CODEC(DEFLATE_QPL), + othernum Int64 CODEC(Delta) ) ENGINE = Log(); SHOW CREATE TABLE compression_codec_log; -INSERT INTO compression_codec_log VALUES(1, 'hello', toDate('2018-12-14'), 1.1, 'aaa', 5, 'qpl11', 11); -INSERT INTO compression_codec_log VALUES(2, 'world', toDate('2018-12-15'), 2.2, 'bbb', 6,'qpl22', 22); -INSERT INTO compression_codec_log VALUES(3, '!', toDate('2018-12-16'), 3.3, 'ccc', 7, 'qpl33', 33); +INSERT INTO compression_codec_log VALUES(1, 'hello', toDate('2018-12-14'), 1.1, 'aaa', 5); +INSERT INTO compression_codec_log VALUES(2, 'world', toDate('2018-12-15'), 2.2, 'bbb', 6); +INSERT INTO compression_codec_log VALUES(3, '!', toDate('2018-12-16'), 3.3, 'ccc', 7); SELECT * FROM compression_codec_log ORDER BY id; -INSERT INTO compression_codec_log VALUES(2, '', toDate('2018-12-13'), 4.4, 'ddd', 8, 'qpl44', 44); +INSERT INTO compression_codec_log VALUES(2, '', toDate('2018-12-13'), 4.4, 'ddd', 8); DETACH TABLE compression_codec_log; ATTACH TABLE compression_codec_log; @@ -37,10 +34,10 @@ DROP TABLE IF EXISTS compression_codec_log; DROP TABLE IF EXISTS compression_codec_multiple_log; CREATE TABLE compression_codec_multiple_log ( - id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC, Delta(4), DEFLATE_QPL), - data String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC, LZ4, LZ4, Delta(8), DEFLATE_QPL), - ddd Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD, LZ4HC, LZ4HC, DEFLATE_QPL), - somenum Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD, DEFLATE_QPL) + id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC, Delta(4)), + data String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC, LZ4, LZ4, Delta(8)), + ddd Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD, LZ4HC, LZ4HC), + somenum Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD) ) ENGINE = Log(); SHOW CREATE TABLE compression_codec_multiple_log; @@ -72,20 +69,18 @@ CREATE TABLE compression_codec_tiny_log( ddd Date CODEC(NONE), somenum Float64 CODEC(ZSTD(2)), somestr FixedString(3) CODEC(LZ4HC(7)), - othernum Int64 CODEC(Delta), - qplstr String CODEC(DEFLATE_QPL), - qplnum UInt32 CODEC(DEFLATE_QPL), + othernum Int64 CODEC(Delta) ) ENGINE = TinyLog(); SHOW CREATE TABLE compression_codec_tiny_log; -INSERT INTO compression_codec_tiny_log VALUES(1, 'hello', toDate('2018-12-14'), 1.1, 'aaa', 5, 'qpl11', 11); -INSERT INTO compression_codec_tiny_log VALUES(2, 'world', toDate('2018-12-15'), 2.2, 'bbb', 6, 'qpl22', 22); -INSERT INTO compression_codec_tiny_log VALUES(3, '!', toDate('2018-12-16'), 3.3, 'ccc', 7, 'qpl33', 33); +INSERT INTO compression_codec_tiny_log VALUES(1, 'hello', toDate('2018-12-14'), 1.1, 'aaa', 5); +INSERT INTO compression_codec_tiny_log VALUES(2, 'world', toDate('2018-12-15'), 2.2, 'bbb', 6); +INSERT INTO compression_codec_tiny_log VALUES(3, '!', toDate('2018-12-16'), 3.3, 'ccc', 7); SELECT * FROM compression_codec_tiny_log ORDER BY id; -INSERT INTO compression_codec_tiny_log VALUES(2, '', toDate('2018-12-13'), 4.4, 'ddd', 8, 'qpl44', 44); +INSERT INTO compression_codec_tiny_log VALUES(2, '', toDate('2018-12-13'), 4.4, 'ddd', 8); DETACH TABLE compression_codec_tiny_log; ATTACH TABLE compression_codec_tiny_log; @@ -97,10 +92,10 @@ DROP TABLE IF EXISTS compression_codec_tiny_log; DROP TABLE IF EXISTS compression_codec_multiple_tiny_log; CREATE TABLE compression_codec_multiple_tiny_log ( - id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC, Delta(4), DEFLATE_QPL), - data String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC, LZ4, LZ4, Delta(8), DEFLATE_QPL), - ddd Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD, LZ4HC, LZ4HC, DEFLATE_QPL), - somenum Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD, DEFLATE_QPL) + id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC, Delta(4)), + data String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC, LZ4, LZ4, Delta(8)), + ddd Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD, LZ4HC, LZ4HC), + somenum Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD) ) ENGINE = TinyLog(); SHOW CREATE TABLE compression_codec_multiple_tiny_log; diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql index 5a56fc0d576..78c57013eeb 100644 --- a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql +++ b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql @@ -1,3 +1,7 @@ +--Tags: no-fasttest, no-cpu-aarch64 +-- no-fasttest because DEFLATE_QPL isn't available in fasttest +-- no-cpu-aarch64 because DEFLATE_QPL is x86-only + SET send_logs_level = 'fatal'; SET enable_deflate_qpl_codec = 1; From d72751be27ba5f69337a0039f41e577c05a3ae7f Mon Sep 17 00:00:00 2001 From: alekseygolub Date: Sun, 11 Jun 2023 15:01:45 +0000 Subject: [PATCH 0286/1997] Added cache invalidation; Fix issues --- src/Databases/DatabaseFactory.cpp | 5 +-- src/Databases/DatabaseFilesystem.cpp | 40 ++++++++++++++----- src/Databases/DatabaseFilesystem.h | 8 +++- src/Databases/DatabaseHDFS.cpp | 6 +++ src/Databases/DatabaseHDFS.h | 5 ++- src/Databases/DatabaseS3.cpp | 14 +++---- src/Databases/DatabaseS3.h | 5 ++- .../0_stateless/02724_database_s3.reference | 4 -- .../queries/0_stateless/02724_database_s3.sh | 6 --- 9 files changed, 59 insertions(+), 34 deletions(-) diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 26952cc574e..9d90c61bb41 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -15,10 +15,9 @@ #include #include #include -#include -#include -#include #include +#include +#include #include "config.h" diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index 8de609f0ca2..cf45240a5f0 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -94,14 +94,32 @@ bool DatabaseFilesystem::checkTableFilePath(const std::string & table_path, Cont return true; } -bool DatabaseFilesystem::isTableExist(const String & name, ContextPtr context_) const +StoragePtr DatabaseFilesystem::tryGetTableFromCache(const std::string & name) const { + StoragePtr table = nullptr; { std::lock_guard lock(mutex); - if (loaded_tables.find(name) != loaded_tables.end()) - return true; + auto it = loaded_tables.find(name); + if (it != loaded_tables.end()) + table = it->second; } + // invalidate cache if file no longer exists + if (table && !fs::exists(getTablePath(name))) + { + std::lock_guard lock(mutex); + loaded_tables.erase(name); + return nullptr; + } + + return table; +} + +bool DatabaseFilesystem::isTableExist(const String & name, ContextPtr context_) const +{ + if (tryGetTableFromCache(name)) + return true; + fs::path table_file_path(getTablePath(name)); return checkTableFilePath(table_file_path, context_, false); @@ -109,13 +127,9 @@ bool DatabaseFilesystem::isTableExist(const String & name, ContextPtr context_) StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr context_) const { - // Check if the table exists in the loaded tables map - { - std::lock_guard lock(mutex); - auto it = loaded_tables.find(name); - if (it != loaded_tables.end()) - return it->second; - } + // Check if table exists in loaded tables map + if (auto table = tryGetTableFromCache(name)) + return table; auto table_path = getTablePath(name); @@ -165,6 +179,12 @@ StoragePtr DatabaseFilesystem::tryGetTable(const String & name, ContextPtr conte } } +bool DatabaseFilesystem::empty() const +{ + std::lock_guard lock(mutex); + return loaded_tables.empty(); +} + ASTPtr DatabaseFilesystem::getCreateDatabaseQuery() const { auto settings = getContext()->getSettingsRef(); diff --git a/src/Databases/DatabaseFilesystem.h b/src/Databases/DatabaseFilesystem.h index 3d2ad695cc6..350ebfe37a3 100644 --- a/src/Databases/DatabaseFilesystem.h +++ b/src/Databases/DatabaseFilesystem.h @@ -31,7 +31,10 @@ public: StoragePtr tryGetTable(const String & name, ContextPtr context) const override; - bool empty() const override { return true; } + // Contains only temporary tables + bool shouldBeEmptyOnDetach() const override { return false; } + + bool empty() const override; bool isReadOnly() const override { return true; } @@ -45,13 +48,14 @@ public: protected: StoragePtr getTableImpl(const String & name, ContextPtr context) const; + StoragePtr tryGetTableFromCache(const std::string & name) const; + std::string getTablePath(const std::string & table_name) const; void addTable(const std::string & table_name, StoragePtr table_storage) const; bool checkTableFilePath(const std::string & table_path, ContextPtr context_, bool throw_on_error) const; - private: String path; mutable Tables loaded_tables TSA_GUARDED_BY(mutex); diff --git a/src/Databases/DatabaseHDFS.cpp b/src/Databases/DatabaseHDFS.cpp index 39c3f955bf5..34cb337cdbe 100644 --- a/src/Databases/DatabaseHDFS.cpp +++ b/src/Databases/DatabaseHDFS.cpp @@ -170,6 +170,12 @@ StoragePtr DatabaseHDFS::tryGetTable(const String & name, ContextPtr context_) c } } +bool DatabaseHDFS::empty() const +{ + std::lock_guard lock(mutex); + return loaded_tables.empty(); +} + ASTPtr DatabaseHDFS::getCreateDatabaseQuery() const { auto settings = getContext()->getSettingsRef(); diff --git a/src/Databases/DatabaseHDFS.h b/src/Databases/DatabaseHDFS.h index 9a506c5c8ac..c7071370b5e 100644 --- a/src/Databases/DatabaseHDFS.h +++ b/src/Databases/DatabaseHDFS.h @@ -33,7 +33,10 @@ public: StoragePtr tryGetTable(const String & name, ContextPtr context) const override; - bool empty() const override { return true; } + // Contains only temporary tables + bool shouldBeEmptyOnDetach() const override { return false; } + + bool empty() const override; bool isReadOnly() const override { return true; } diff --git a/src/Databases/DatabaseS3.cpp b/src/Databases/DatabaseS3.cpp index 96616426475..46f8a67687d 100644 --- a/src/Databases/DatabaseS3.cpp +++ b/src/Databases/DatabaseS3.cpp @@ -67,14 +67,8 @@ void DatabaseS3::addTable(const std::string & table_name, StoragePtr table_stora std::string DatabaseS3::getFullUrl(const std::string & name) const { - try - { - S3::URI uri(name); - } - catch (...) - { + if (!config.url_prefix.empty()) return (fs::path(config.url_prefix) / name).string(); - } return name; } @@ -181,6 +175,12 @@ StoragePtr DatabaseS3::tryGetTable(const String & name, ContextPtr context_) con } } +bool DatabaseS3::empty() const +{ + std::lock_guard lock(mutex); + return loaded_tables.empty(); +} + ASTPtr DatabaseS3::getCreateDatabaseQuery() const { auto settings = getContext()->getSettingsRef(); diff --git a/src/Databases/DatabaseS3.h b/src/Databases/DatabaseS3.h index 4e6910566df..f494925b09b 100644 --- a/src/Databases/DatabaseS3.h +++ b/src/Databases/DatabaseS3.h @@ -43,7 +43,10 @@ public: StoragePtr tryGetTable(const String & name, ContextPtr context) const override; - bool empty() const override { return true; } + // Contains only temporary tables + bool shouldBeEmptyOnDetach() const override { return false; } + + bool empty() const override; bool isReadOnly() const override { return true; } diff --git a/tests/queries/0_stateless/02724_database_s3.reference b/tests/queries/0_stateless/02724_database_s3.reference index 811e38b7f2b..425cca6a077 100644 --- a/tests/queries/0_stateless/02724_database_s3.reference +++ b/tests/queries/0_stateless/02724_database_s3.reference @@ -12,10 +12,6 @@ test1 13 14 15 16 17 18 0 0 0 -1 2 3 -4 5 6 -7 8 9 -0 0 0 10 11 12 13 14 15 16 17 18 diff --git a/tests/queries/0_stateless/02724_database_s3.sh b/tests/queries/0_stateless/02724_database_s3.sh index ac1b97beecf..79199b43571 100755 --- a/tests/queries/0_stateless/02724_database_s3.sh +++ b/tests/queries/0_stateless/02724_database_s3.sh @@ -32,12 +32,6 @@ USE test4; SELECT * FROM \"b.tsv\" """ -# check that database url_prefix is ignored if pass full url as table name -${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ -USE test4; -SELECT * FROM \"http://localhost:11111/test/a.tsv\" -""" - # Check named collection loading ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test5; From 2419a7b90fd1effd8ebf8b5b4741a0325447cdec Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 11 Jun 2023 15:16:52 +0000 Subject: [PATCH 0287/1997] Fix tests --- .../00804_test_deflate_qpl_codec_compression.reference | 2 ++ .../00804_test_deflate_qpl_codec_compression.sql | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference index 276747f8233..a2178f5eda7 100644 --- a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference +++ b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference @@ -1,4 +1,6 @@ +CREATE TABLE default.compression_codec\n(\n `id` UInt64 CODEC(DEFLATE_QPL),\n `data` String CODEC(DEFLATE_QPL),\n `ddd` Date CODEC(DEFLATE_QPL),\n `ddd32` Date32 CODEC(DEFLATE_QPL),\n `somenum` Float64 CODEC(DEFLATE_QPL),\n `somestr` FixedString(3) CODEC(DEFLATE_QPL),\n `othernum` Int64 CODEC(DEFLATE_QPL),\n `somearray` Array(UInt8) CODEC(DEFLATE_QPL),\n `somemap` Map(String, UInt32) CODEC(DEFLATE_QPL),\n `sometuple` Tuple(UInt16, UInt64) CODEC(DEFLATE_QPL)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1 hello 2018-12-14 2018-12-14 1.1 aaa 5 [1,2,3] {'k1':1,'k2':2} (1,2) 2 world 2018-12-15 2018-12-15 2.2 bbb 6 [4,5,6] {'k3':3,'k4':4} (3,4) 3 ! 2018-12-16 2018-12-16 3.3 ccc 7 [7,8,9] {'k5':5,'k6':6} (5,6) 2 +10001 diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql index 78c57013eeb..ff3c1812c86 100644 --- a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql +++ b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql @@ -2,7 +2,8 @@ -- no-fasttest because DEFLATE_QPL isn't available in fasttest -- no-cpu-aarch64 because DEFLATE_QPL is x86-only -SET send_logs_level = 'fatal'; +-- A bunch of random DDLs to test the DEFLATE_QPL codec. + SET enable_deflate_qpl_codec = 1; DROP TABLE IF EXISTS compression_codec; @@ -20,6 +21,8 @@ CREATE TABLE compression_codec( sometuple Tuple(UInt16, UInt64) CODEC(DEFLATE_QPL), ) ENGINE = MergeTree() ORDER BY tuple(); +SHOW CREATE TABLE compression_codec; + INSERT INTO compression_codec VALUES(1, 'hello', toDate('2018-12-14'), toDate32('2018-12-14'), 1.1, 'aaa', 5, [1,2,3], map('k1',1,'k2',2), tuple(1,2)); INSERT INTO compression_codec VALUES(2, 'world', toDate('2018-12-15'), toDate32('2018-12-15'), 2.2, 'bbb', 6, [4,5,6], map('k3',3,'k4',4), tuple(3,4)); INSERT INTO compression_codec VALUES(3, '!', toDate('2018-12-16'), toDate32('2018-12-16'), 3.3, 'ccc', 7, [7,8,9], map('k5',5,'k6',6), tuple(5,6)); @@ -35,4 +38,8 @@ ATTACH TABLE compression_codec; SELECT count(*) FROM compression_codec WHERE id = 2 GROUP BY id; +INSERT INTO compression_codec SELECT 3, '!', toDate('2018-12-16'), toDate32('2018-12-16'), 3.3, 'ccc', 7, [7,8,9], map('k5',5,'k6',6), tuple(5,6) FROM system.numbers LIMIT 10000; + +SELECT count(*) FROM compression_codec WHERE id = 3 GROUP BY id; + DROP TABLE IF EXISTS compression_codec; From 598501011f5cbedb42188b2f828c055d44a0fcd8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 11 Jun 2023 17:51:54 +0200 Subject: [PATCH 0288/1997] Relax "too many parts" further --- programs/server/Server.cpp | 1 - src/Loggers/OwnPatternFormatter.cpp | 1 - src/Storages/MergeTree/MergeTreeData.cpp | 10 +++++----- src/Storages/MergeTree/MergeTreeData.h | 2 +- src/Storages/MergeTree/MergeTreeSettings.h | 6 +++--- src/Storages/MergeTree/MergeTreeSink.cpp | 9 +++++++-- src/Storages/MergeTree/MergeTreeSink.h | 3 ++- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 9 +++++++-- src/Storages/MergeTree/ReplicatedMergeTreeSink.h | 1 + 9 files changed, 26 insertions(+), 16 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d0fc8aca5e8..cfef7f0a94a 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1705,7 +1705,6 @@ try #endif /// Must be done after initialization of `servers`, because async_metrics will access `servers` variable from its thread. - async_metrics.start(); { diff --git a/src/Loggers/OwnPatternFormatter.cpp b/src/Loggers/OwnPatternFormatter.cpp index ccf6c479b80..0c2256aaa1b 100644 --- a/src/Loggers/OwnPatternFormatter.cpp +++ b/src/Loggers/OwnPatternFormatter.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 9cca471fddb..b42d130bf62 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4315,14 +4315,14 @@ std::optional MergeTreeData::getMinPartDataVersion() const } -void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context) const +void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context, bool allow_throw) const { const auto settings = getSettings(); const auto & query_settings = query_context->getSettingsRef(); const size_t parts_count_in_total = getActivePartsCount(); - /// check if have too many parts in total - if (parts_count_in_total >= settings->max_parts_in_total) + /// Check if we have too many parts in total + if (allow_throw && parts_count_in_total >= settings->max_parts_in_total) { ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception( @@ -4338,7 +4338,7 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex if (settings->inactive_parts_to_throw_insert > 0 || settings->inactive_parts_to_delay_insert > 0) outdated_parts_count_in_partition = getMaxOutdatedPartsCountForPartition(); - if (settings->inactive_parts_to_throw_insert > 0 && outdated_parts_count_in_partition >= settings->inactive_parts_to_throw_insert) + if (allow_throw && settings->inactive_parts_to_throw_insert > 0 && outdated_parts_count_in_partition >= settings->inactive_parts_to_throw_insert) { ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception( @@ -4362,7 +4362,7 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex bool parts_are_large_enough_in_average = settings->max_avg_part_size_for_too_many_parts && average_part_size > settings->max_avg_part_size_for_too_many_parts; - if (parts_count_in_partition >= active_parts_to_throw_insert && !parts_are_large_enough_in_average) + if (allow_throw && parts_count_in_partition >= active_parts_to_throw_insert && !parts_are_large_enough_in_average) { ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception( diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index b1e1e43bd0b..ebda82eeaed 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -557,7 +557,7 @@ public: /// If the table contains too many active parts, sleep for a while to give them time to merge. /// If until is non-null, wake up from the sleep earlier if the event happened. /// The decision to delay or throw is made according to settings 'parts_to_delay_insert' and 'parts_to_throw_insert'. - void delayInsertOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context) const; + void delayInsertOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context, bool allow_throw) const; /// If the table contains too many unfinished mutations, sleep for a while to give them time to execute. /// If until is non-null, wake up from the sleep earlier if the event happened. diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 33aea358078..082b84be575 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -73,11 +73,11 @@ struct Settings; M(UInt64, max_delay_to_mutate_ms, 1000, "Max delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \ \ /** Inserts settings. */ \ - M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \ + M(UInt64, parts_to_delay_insert, 1000, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \ M(UInt64, inactive_parts_to_delay_insert, 0, "If table contains at least that many inactive parts in single partition, artificially slow down insert into table.", 0) \ - M(UInt64, parts_to_throw_insert, 300, "If more than this number active parts in single partition, throw 'Too many parts ...' exception.", 0) \ + M(UInt64, parts_to_throw_insert, 3000, "If more than this number active parts in single partition, throw 'Too many parts ...' exception.", 0) \ M(UInt64, inactive_parts_to_throw_insert, 0, "If more than this number inactive parts in single partition, throw 'Too many inactive parts ...' exception.", 0) \ - M(UInt64, max_avg_part_size_for_too_many_parts, 10ULL * 1024 * 1024 * 1024, "The 'too many parts' check according to 'parts_to_delay_insert' and 'parts_to_throw_insert' will be active only if the average part size (in the relevant partition) is not larger than the specified threshold. If it is larger than the specified threshold, the INSERTs will be neither delayed or rejected. This allows to have hundreds of terabytes in a single table on a single server if the parts are successfully merged to larger parts. This does not affect the thresholds on inactive parts or total parts.", 0) \ + M(UInt64, max_avg_part_size_for_too_many_parts, 1ULL * 1024 * 1024 * 1024, "The 'too many parts' check according to 'parts_to_delay_insert' and 'parts_to_throw_insert' will be active only if the average part size (in the relevant partition) is not larger than the specified threshold. If it is larger than the specified threshold, the INSERTs will be neither delayed or rejected. This allows to have hundreds of terabytes in a single table on a single server if the parts are successfully merged to larger parts. This does not affect the thresholds on inactive parts or total parts.", 0) \ M(UInt64, max_delay_to_insert, 1, "Max delay of inserting data into MergeTree table in seconds, if there are a lot of unmerged parts in single partition.", 0) \ M(UInt64, min_delay_to_insert_ms, 10, "Min delay of inserting data into MergeTree table in milliseconds, if there are a lot of unmerged parts in single partition.", 0) \ M(UInt64, max_parts_in_total, 100000, "If more than this number active parts in all partitions in total, throw 'Too many parts ...' exception.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index d62fe5024f4..36816904a81 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -45,9 +45,9 @@ MergeTreeSink::MergeTreeSink( void MergeTreeSink::onStart() { - /// Only check "too many parts" before write, + /// It's only allowed to throw "too many parts" before write, /// because interrupting long-running INSERT query in the middle is not convenient for users. - storage.delayInsertOrThrowIfNeeded(nullptr, context); + storage.delayInsertOrThrowIfNeeded(nullptr, context, true); } void MergeTreeSink::onFinish() @@ -57,6 +57,9 @@ void MergeTreeSink::onFinish() void MergeTreeSink::consume(Chunk chunk) { + if (num_blocks_processed > 0) + storage.delayInsertOrThrowIfNeeded(nullptr, context, false); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); if (!storage_snapshot->object_columns.empty()) convertDynamicColumnsToTuples(block, storage_snapshot); @@ -136,6 +139,8 @@ void MergeTreeSink::consume(Chunk chunk) finishDelayedChunk(); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); + + ++num_blocks_processed; } void MergeTreeSink::finishDelayedChunk() diff --git a/src/Storages/MergeTree/MergeTreeSink.h b/src/Storages/MergeTree/MergeTreeSink.h index 68f11d86a25..07ab3850df2 100644 --- a/src/Storages/MergeTree/MergeTreeSink.h +++ b/src/Storages/MergeTree/MergeTreeSink.h @@ -35,7 +35,8 @@ private: size_t max_parts_per_block; ContextPtr context; StorageSnapshotPtr storage_snapshot; - uint64_t chunk_dedup_seqnum = 0; /// input chunk ordinal number in case of dedup token + UInt64 chunk_dedup_seqnum = 0; /// input chunk ordinal number in case of dedup token + UInt64 num_blocks_processed = 0; /// We can delay processing for previous chunk and start writing a new one. struct DelayedChunk; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 28dad454afe..5fbd72ccddc 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -367,6 +367,9 @@ size_t ReplicatedMergeTreeSinkImpl::checkQuorumPrecondition(const template void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) { + if (num_blocks_processed > 0) + storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context, false); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); const auto & settings = context->getSettingsRef(); @@ -512,6 +515,8 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) /// TODO: we can also delay commit if there is no MVs. if (!settings.deduplicate_blocks_in_dependent_materialized_views) finishDelayedChunk(zookeeper); + + ++num_blocks_processed; } template<> @@ -1136,9 +1141,9 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: template void ReplicatedMergeTreeSinkImpl::onStart() { - /// Only check "too many parts" before write, + /// It's only allowed to throw "too many parts" before write, /// because interrupting long-running INSERT query in the middle is not convenient for users. - storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context); + storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context, true); } template diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 8d9e2e14129..868590efa25 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -123,6 +123,7 @@ private: bool quorum_parallel = false; const bool deduplicate = true; bool last_block_is_duplicate = false; + UInt64 num_blocks_processed = 0; using Logger = Poco::Logger; Poco::Logger * log; From 427c5cb1bafef9b52011f9d77e725b7a5ba85553 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov Date: Sun, 11 Jun 2023 18:42:10 +0200 Subject: [PATCH 0289/1997] fix integration test --- tests/integration/test_storage_hdfs/test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 0b18c0180cc..dde3dd257b1 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -91,7 +91,8 @@ def test_storage_with_multidirectory_glob(started_cluster): hdfs_api.write_data(f"/multiglob/p{i}/path{i}/postfix/data{i}", f"File{i}\t{i}{i}\n") assert hdfs_api.read_data(f"/multiglob/p{i}/path{i}/postfix/data{i}") == f"File{i}\t{i}{i}\n" - assert node1.query("SELECT * FROM hdfs('hdfs://hdfs1:9000/multiglob/{p1/path1,p2/path2}/postfix/data{1,2}', TSV)") == f"\File1\t11\nFile2\t22\n" + r = node1.query("SELECT * FROM hdfs('hdfs://hdfs1:9000/multiglob/{p1/path1,p2/path2}/postfix/data{1,2}', TSV)") + assert (r == f"File1\t11\nFile2\t22\n") or (r == f"File2\t22\nFile1\t11\n") def test_read_write_table(started_cluster): From 1af062a53214168345838f79cba53ecb11cbc41e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 12 Jun 2023 08:04:55 +0000 Subject: [PATCH 0290/1997] Un-flake 00804_test_deflate_qpl_codec_compression --- .../0_stateless/00804_test_deflate_qpl_codec_compression.sql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql index ff3c1812c86..8a256567e80 100644 --- a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql +++ b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql @@ -6,6 +6,10 @@ SET enable_deflate_qpl_codec = 1; +-- Suppress test failures because stderr contains warning "Initialization of hardware-assisted DeflateQpl failed, falling +-- back to software DeflateQpl coded." +SET send_logs_level = 'fatal'; + DROP TABLE IF EXISTS compression_codec; CREATE TABLE compression_codec( From eb9cdbcf7d5d1cdcc3b45936b4045dd8eda8f818 Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 12 Jun 2023 11:41:36 +0200 Subject: [PATCH 0291/1997] fix File test being flaky --- src/Storages/StorageFile.cpp | 15 +++++++++++---- .../02771_complex_globs_in_storage_file_path.sql | 4 +++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 74303b16ee9..c7a57b7d4c9 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -109,8 +109,14 @@ void listFilesWithFoldedRegexpMatchingImpl(const std::string & path_for_ls, const size_t next_slash_after_glob_pos, std::vector & result) { - /// We don't need to go all the way in every directory if max_depth is reached - /// as it is upper limit of depth by simply counting `/`s in curly braces + /* + * When `{...}` has any `/`s, it must be processed in a different way: + * Basically, a path with globs is processed by LSWithRegexpMatching. In case it detects multi-dir glob {.../..., .../...}, + * LSWithFoldedRegexpMatching is in charge from now on. + * It works a bit different: it still recursively goes through subdirectories, but does not match every directory to glob. + * Instead, it goes many levels down (until the approximate max_depth is reached) and compares this multi-dir path to a glob. + * StorageHDFS.cpp has the same logic. + */ if (!max_depth) return; @@ -121,6 +127,7 @@ void listFilesWithFoldedRegexpMatchingImpl(const std::string & path_for_ls, const size_t last_slash = full_path.rfind('/'); const String dir_or_file_name = full_path.substr(last_slash); + std::cerr << "\nprocessing file (full_path): " << full_path << "\n"; if (re2::RE2::FullMatch(processed_suffix + dir_or_file_name, matcher)) { if (next_slash_after_glob_pos == std::string::npos) @@ -130,14 +137,14 @@ void listFilesWithFoldedRegexpMatchingImpl(const std::string & path_for_ls, } else { - listFilesWithRegexpMatchingImpl(fs::path(full_path).append(processed_suffix).append(it->path().string()) / "" , + listFilesWithRegexpMatchingImpl(fs::path(full_path) / "" , suffix_with_globs.substr(next_slash_after_glob_pos), total_bytes_to_read, result); } } else if (it->is_directory()) { - listFilesWithFoldedRegexpMatchingImpl(fs::path(full_path).append(processed_suffix), processed_suffix + dir_or_file_name, + listFilesWithFoldedRegexpMatchingImpl(fs::path(full_path), processed_suffix + dir_or_file_name, suffix_with_globs, current_glob, matcher, total_bytes_to_read, max_depth - 1, next_slash_after_glob_pos, result); } diff --git a/tests/queries/0_stateless/02771_complex_globs_in_storage_file_path.sql b/tests/queries/0_stateless/02771_complex_globs_in_storage_file_path.sql index c579c8d8698..41d7d6fcc3f 100644 --- a/tests/queries/0_stateless/02771_complex_globs_in_storage_file_path.sql +++ b/tests/queries/0_stateless/02771_complex_globs_in_storage_file_path.sql @@ -5,4 +5,6 @@ INSERT INTO TABLE FUNCTION file('02771/dir2/subdir22/data2.csv', 'CSV', 's Strin SELECT *, _file FROM file('02771/dir{?/subdir?1/da,2/subdir2?/da}ta1.csv', CSV); SELECT *, _file FROM file('02771/dir{?/subdir?1/da,2/subdir2?/da}ta2.csv', CSV); -SELECT *, _file FROM file('02771/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV); + +SELECT *, _file FROM file('02771/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data1.csv'; +SELECT *, _file FROM file('02771/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data2.csv'; From 7d7bd5bb66fbf1e0d3a1e35a0fe74231d6e5e7ed Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 12 Jun 2023 11:50:23 +0200 Subject: [PATCH 0292/1997] update comment describing workflow --- src/Storages/HDFS/StorageHDFS.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 379ee395939..bd50b66ede5 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -63,12 +63,20 @@ namespace ErrorCodes } namespace { + /// Forward-declared to use in LSWithFoldedRegexpMatching w/o circular dependency. Strings LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, const String & for_match, std::unordered_map * last_mod_times); - /// When `{...}` has any `/`s, it must be processed in a different way + /* + * When `{...}` has any `/`s, it must be processed in a different way: + * Basically, a path with globs is processed by LSWithRegexpMatching. In case it detects multi-dir glob {.../..., .../...}, + * LSWithFoldedRegexpMatching is in charge from now on. + * It works a bit different: it still recursively goes through subdirectories, but does not match every directory to glob. + * Instead, it goes many levels down (until the approximate max_depth is reached) and compares this multi-dir path to a glob. + * StorageFile.cpp has the same logic. + */ Strings LSWithFoldedRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, std::unordered_map * last_mod_times, From 3e6d393e17b0913d2664838a98a8f946bc8b7695 Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 12 Jun 2023 12:06:21 +0200 Subject: [PATCH 0293/1997] remove debug cerr --- src/Storages/StorageFile.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index c7a57b7d4c9..93228cf4d39 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -127,7 +127,6 @@ void listFilesWithFoldedRegexpMatchingImpl(const std::string & path_for_ls, const size_t last_slash = full_path.rfind('/'); const String dir_or_file_name = full_path.substr(last_slash); - std::cerr << "\nprocessing file (full_path): " << full_path << "\n"; if (re2::RE2::FullMatch(processed_suffix + dir_or_file_name, matcher)) { if (next_slash_after_glob_pos == std::string::npos) From b634012feb40445079145639d23c44967c00547e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 12 Jun 2023 10:57:03 +0000 Subject: [PATCH 0294/1997] Add feature flags for Keeper API --- src/Common/ZooKeeper/IKeeper.h | 4 +- src/Common/ZooKeeper/TestKeeper.h | 5 +- src/Common/ZooKeeper/ZooKeeper.cpp | 4 +- src/Common/ZooKeeper/ZooKeeper.h | 7 +- src/Common/ZooKeeper/ZooKeeperArgs.cpp | 1 + src/Common/ZooKeeper/ZooKeeperImpl.cpp | 60 +++++++---- src/Common/ZooKeeper/ZooKeeperImpl.h | 7 +- .../ZooKeeper/ZooKeeperWithFaultInjection.h | 4 +- src/Coordination/CoordinationSettings.cpp | 2 +- src/Coordination/FourLetterCommand.cpp | 27 ++++- src/Coordination/FourLetterCommand.h | 12 +++ src/Coordination/KeeperConstants.h | 9 +- src/Coordination/KeeperContext.cpp | 46 ++++---- src/Coordination/KeeperContext.h | 3 + src/Coordination/KeeperDispatcher.cpp | 47 ++++---- src/Coordination/KeeperDispatcher.h | 8 ++ src/Coordination/KeeperFeatureFlags.cpp | 102 ++++++++++++++++++ src/Coordination/KeeperFeatureFlags.h | 49 +++++++++ src/Coordination/KeeperServer.cpp | 8 +- src/Coordination/KeeperServer.h | 1 + src/Coordination/KeeperStorage.cpp | 4 +- src/Coordination/tests/gtest_coordination.cpp | 15 +-- .../StorageSystemZooKeeperConnection.cpp | 4 +- .../test_keeper_api_version_config/test.py | 87 --------------- .../__init__.py | 0 .../configs/enable_keeper.xml | 2 +- .../test_keeper_feature_flags_config/test.py | 92 ++++++++++++++++ 27 files changed, 422 insertions(+), 188 deletions(-) create mode 100644 src/Coordination/KeeperFeatureFlags.cpp create mode 100644 src/Coordination/KeeperFeatureFlags.h delete mode 100644 tests/integration/test_keeper_api_version_config/test.py rename tests/integration/{test_keeper_api_version_config => test_keeper_feature_flags_config}/__init__.py (100%) rename tests/integration/{test_keeper_api_version_config => test_keeper_feature_flags_config}/configs/enable_keeper.xml (97%) create mode 100644 tests/integration/test_keeper_feature_flags_config/test.py diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index 3eb5819df90..369aacf16c7 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include @@ -530,7 +530,7 @@ public: const Requests & requests, MultiCallback callback) = 0; - virtual DB::KeeperApiVersion getApiVersion() const = 0; + virtual bool isFeatureEnabled(DB::KeeperFeatureFlag feature_flag) const = 0; /// Expire session and finish all pending requests virtual void finalize(const String & reason) = 0; diff --git a/src/Common/ZooKeeper/TestKeeper.h b/src/Common/ZooKeeper/TestKeeper.h index 4bffa4e1d4f..9bbd018cfb1 100644 --- a/src/Common/ZooKeeper/TestKeeper.h +++ b/src/Common/ZooKeeper/TestKeeper.h @@ -11,6 +11,7 @@ #include #include #include +#include namespace Coordination @@ -92,9 +93,9 @@ public: void finalize(const String & reason) override; - DB::KeeperApiVersion getApiVersion() const override + bool isFeatureEnabled(DB::KeeperFeatureFlag) const override { - return KeeperApiVersion::ZOOKEEPER_COMPATIBLE; + return false; } struct Node diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index a587ad6caf4..826032fc56b 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -865,9 +865,9 @@ bool ZooKeeper::expired() return impl->isExpired(); } -DB::KeeperApiVersion ZooKeeper::getApiVersion() const +bool ZooKeeper::isFeatureEnabled(DB::KeeperFeatureFlag feature_flag) const { - return impl->getApiVersion(); + return impl->isFeatureEnabled(feature_flag); } Int64 ZooKeeper::getClientID() diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 96f9914b597..a4a631b9d5a 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -215,7 +216,7 @@ public: /// Returns true, if the session has expired. bool expired(); - DB::KeeperApiVersion getApiVersion() const; + bool isFeatureEnabled(DB::KeeperFeatureFlag feature_flag) const; /// Create a znode. /// Throw an exception if something went wrong. @@ -553,7 +554,7 @@ private: template MultiReadResponses multiRead(TIter start, TIter end, RequestFactory request_factory, AsyncFunction async_fun) { - if (getApiVersion() >= DB::KeeperApiVersion::WITH_MULTI_READ) + if (isFeatureEnabled(DB::KeeperFeatureFlag::MULTI_READ)) { Coordination::Requests requests; for (auto it = start; it != end; ++it) @@ -685,7 +686,7 @@ String getZooKeeperConfigName(const Poco::Util::AbstractConfiguration & config); template void addCheckNotExistsRequest(Coordination::Requests & requests, const Client & client, const std::string & path) { - if (client.getApiVersion() >= DB::KeeperApiVersion::WITH_CHECK_NOT_EXISTS) + if (client.isFeatureEnabled(DB::KeeperFeatureFlag::CHECK_NOT_EXISTS)) { auto request = std::make_shared(); request->path = path; diff --git a/src/Common/ZooKeeper/ZooKeeperArgs.cpp b/src/Common/ZooKeeper/ZooKeeperArgs.cpp index 0ebc7f667cb..e99285b0056 100644 --- a/src/Common/ZooKeeper/ZooKeeperArgs.cpp +++ b/src/Common/ZooKeeper/ZooKeeperArgs.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include namespace DB diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 7f23ac00efe..bf0d1871244 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -354,7 +354,8 @@ ZooKeeper::ZooKeeper( send_thread = ThreadFromGlobalPool([this] { sendThread(); }); receive_thread = ThreadFromGlobalPool([this] { receiveThread(); }); - initApiVersion(); + initFeatureFlags(); + keeper_feature_flags.logFlags(log); ProfileEvents::increment(ProfileEvents::ZooKeeperInit); } @@ -1089,41 +1090,58 @@ void ZooKeeper::pushRequest(RequestInfo && info) ProfileEvents::increment(ProfileEvents::ZooKeeperTransactions); } -KeeperApiVersion ZooKeeper::getApiVersion() const +bool ZooKeeper::isFeatureEnabled(KeeperFeatureFlag feature_flag) const { - return keeper_api_version; + return keeper_feature_flags.isEnabled(feature_flag); } -void ZooKeeper::initApiVersion() +void ZooKeeper::initFeatureFlags() { - auto promise = std::make_shared>(); - auto future = promise->get_future(); - - auto callback = [promise](const Coordination::GetResponse & response) mutable + const auto try_get = [&](const std::string & path, const std::string & description) -> std::optional { - promise->set_value(response); + auto promise = std::make_shared>(); + auto future = promise->get_future(); + + auto callback = [promise](const Coordination::GetResponse & response) mutable + { + promise->set_value(response); + }; + + get(path, std::move(callback), {}); + if (future.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) + { + LOG_TRACE(log, "Failed to get {}: timeout", description); + return std::nullopt; + } + + auto response = future.get(); + + if (response.error != Coordination::Error::ZOK) + { + LOG_TRACE(log, "Failed to get {}", description); + return std::nullopt; + } + + return std::move(response.data); }; - get(keeper_api_version_path, std::move(callback), {}); - if (future.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) + if (auto feature_flags = try_get(keeper_api_feature_flags_path, "feature flags"); feature_flags.has_value()) { - LOG_TRACE(log, "Failed to get API version: timeout"); + keeper_feature_flags.setFeatureFlags(std::move(*feature_flags)); return; } - auto response = future.get(); + auto keeper_api_version_string = try_get(keeper_api_version_path, "API version"); - if (response.error != Coordination::Error::ZOK) - { - LOG_TRACE(log, "Failed to get API version"); + if (!keeper_api_version_string.has_value()) return; - } + DB::ReadBufferFromOwnString buf(*keeper_api_version_string); uint8_t keeper_version{0}; - DB::ReadBufferFromOwnString buf(response.data); DB::readIntText(keeper_version, buf); - keeper_api_version = static_cast(keeper_version); + auto keeper_api_version = static_cast(keeper_version); LOG_TRACE(log, "Detected server's API version: {}", keeper_api_version); + keeper_feature_flags.fromApiVersion(keeper_api_version); } @@ -1243,7 +1261,7 @@ void ZooKeeper::list( WatchCallback watch) { std::shared_ptr request{nullptr}; - if (keeper_api_version < Coordination::KeeperApiVersion::WITH_FILTERED_LIST) + if (!isFeatureEnabled(KeeperFeatureFlag::FILTERED_LIST)) { if (list_request_type != ListRequestType::ALL) throw Exception(Error::ZBADARGUMENTS, "Filtered list request type cannot be used because it's not supported by the server"); @@ -1308,7 +1326,7 @@ void ZooKeeper::multi( { ZooKeeperMultiRequest request(requests, default_acls); - if (request.getOpNum() == OpNum::MultiRead && keeper_api_version < Coordination::KeeperApiVersion::WITH_MULTI_READ) + if (request.getOpNum() == OpNum::MultiRead && !isFeatureEnabled(KeeperFeatureFlag::MULTI_READ)) throw Exception(Error::ZBADARGUMENTS, "MultiRead request type cannot be used because it's not supported by the server"); RequestInfo request_info; diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index 944c5032fac..ae6bef067e3 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -181,7 +182,7 @@ public: const Requests & requests, MultiCallback callback) override; - DB::KeeperApiVersion getApiVersion() const override; + bool isFeatureEnabled(KeeperFeatureFlag feature_flag) const override; /// Without forcefully invalidating (finalizing) ZooKeeper session before /// establishing a new one, there was a possibility that server is using @@ -312,12 +313,12 @@ private: void logOperationIfNeeded(const ZooKeeperRequestPtr & request, const ZooKeeperResponsePtr & response = nullptr, bool finalize = false, UInt64 elapsed_ms = 0); - void initApiVersion(); + void initFeatureFlags(); CurrentMetrics::Increment active_session_metric_increment{CurrentMetrics::ZooKeeperSession}; std::shared_ptr zk_log; - DB::KeeperApiVersion keeper_api_version{DB::KeeperApiVersion::ZOOKEEPER_COMPATIBLE}; + DB::KeeperFeatureFlags keeper_feature_flags; }; } diff --git a/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h index bf99cb76798..9d02d674010 100644 --- a/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h +++ b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h @@ -402,9 +402,9 @@ public: ephemeral_nodes.clear(); } - KeeperApiVersion getApiVersion() const + bool isFeatureEnabled(KeeperFeatureFlag feature_flag) const { - return keeper->getApiVersion(); + return keeper->isFeatureEnabled(feature_flag); } private: diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp index 7a66134f43f..edbdfd4f9fc 100644 --- a/src/Coordination/CoordinationSettings.cpp +++ b/src/Coordination/CoordinationSettings.cpp @@ -36,7 +36,7 @@ void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco } -const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs"; +const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs,ftfl"; KeeperConfigurationAndSettings::KeeperConfigurationAndSettings() : server_id(NOT_EXIST) diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index a64969e3d31..55120e70d99 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -9,6 +9,7 @@ #include #include #include +#include "Coordination/KeeperFeatureFlags.h" #include #include #include @@ -153,6 +154,9 @@ void FourLetterCommandFactory::registerCommands(KeeperDispatcher & keeper_dispat FourLetterCommandPtr clean_resources_command = std::make_shared(keeper_dispatcher); factory.registerCommand(clean_resources_command); + FourLetterCommandPtr feature_flags_command = std::make_shared(keeper_dispatcher); + factory.registerCommand(feature_flags_command); + factory.initializeAllowList(keeper_dispatcher); factory.setInitialize(true); } @@ -486,7 +490,7 @@ String RecoveryCommand::run() String ApiVersionCommand::run() { - return toString(static_cast(Coordination::latest_keeper_api_version)); + return toString(0); } String CreateSnapshotCommand::run() @@ -535,4 +539,25 @@ String CleanResourcesCommand::run() return "ok"; } +String FeatureFlagsCommand::run() +{ + const auto & feature_flags = keeper_dispatcher.getKeeperContext()->feature_flags; + + StringBuffer ret; + + auto append = [&ret] (String key, uint8_t value) -> void + { + writeText(key, ret); + writeText('\t', ret); + writeText(std::to_string(value), ret); + writeText('\n', ret); + }; + + for (const auto feature : all_keeper_feature_flags) + append(SettingFieldKeeperFeatureFlagTraits::toString(feature), feature_flags.isEnabled(feature)); + + return ret.str(); + +} + } diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h index c1a91303c05..eb2cf9419ae 100644 --- a/src/Coordination/FourLetterCommand.h +++ b/src/Coordination/FourLetterCommand.h @@ -401,4 +401,16 @@ struct CleanResourcesCommand : public IFourLetterCommand ~CleanResourcesCommand() override = default; }; +struct FeatureFlagsCommand : public IFourLetterCommand +{ + explicit FeatureFlagsCommand(KeeperDispatcher & keeper_dispatcher_) + : IFourLetterCommand(keeper_dispatcher_) + { + } + + String name() override { return "ftfl"; } + String run() override; + ~FeatureFlagsCommand() override = default; +}; + } diff --git a/src/Coordination/KeeperConstants.h b/src/Coordination/KeeperConstants.h index 42161eee908..6d9c03ca8a9 100644 --- a/src/Coordination/KeeperConstants.h +++ b/src/Coordination/KeeperConstants.h @@ -5,6 +5,7 @@ namespace DB { +/// left for backwards compatibility enum class KeeperApiVersion : uint8_t { ZOOKEEPER_COMPATIBLE = 0, @@ -13,15 +14,9 @@ enum class KeeperApiVersion : uint8_t WITH_CHECK_NOT_EXISTS, }; -inline constexpr auto latest_keeper_api_version = KeeperApiVersion::WITH_CHECK_NOT_EXISTS; const std::string keeper_system_path = "/keeper"; const std::string keeper_api_version_path = keeper_system_path + "/api_version"; - -using PathWithData = std::pair; -const std::vector child_system_paths_with_data -{ - {keeper_api_version_path, toString(static_cast(latest_keeper_api_version))} -}; +const std::string keeper_api_feature_flags_path = keeper_system_path + "/feature_flags"; } diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index a750f2e1860..dc408afc19a 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -1,40 +1,42 @@ #include #include -#include -#include #include -#include namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - -DECLARE_SETTING_ENUM(KeeperApiVersion); -IMPLEMENT_SETTING_ENUM(KeeperApiVersion, ErrorCodes::BAD_ARGUMENTS, - {{"ZOOKEEPER_COMPATIBLE", KeeperApiVersion::ZOOKEEPER_COMPATIBLE}, - {"WITH_FILTERED_LIST", KeeperApiVersion::WITH_FILTERED_LIST}, - {"WITH_MULTI_READ", KeeperApiVersion::WITH_MULTI_READ}, - {"WITH_CHECK_NOT_EXISTS", KeeperApiVersion::WITH_CHECK_NOT_EXISTS}}); - KeeperContext::KeeperContext() { - for (const auto & [path, data] : child_system_paths_with_data) - system_nodes_with_data[std::string{path}] = data; + /// enable by default some feature flags + feature_flags.enableFeatureFlag(KeeperFeatureFlag::FILTERED_LIST); + feature_flags.enableFeatureFlag(KeeperFeatureFlag::MULTI_READ); + system_nodes_with_data[keeper_api_feature_flags_path] = feature_flags.getFeatureFlags(); } void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config) { - if (config.has("keeper_server.api_version")) + digest_enabled = config.getBool("keeper_server.digest_enabled", false); + ignore_system_path_on_startup = config.getBool("keeper_server.ignore_system_path_on_startup", false); + + static const std::string feature_flags_key = "keeper_server.feature_flags"; + if (config.has(feature_flags_key)) { - auto version_string = config.getString("keeper_server.api_version"); - auto api_version = SettingFieldKeeperApiVersionTraits::fromString(version_string); - LOG_INFO(&Poco::Logger::get("KeeperContext"), "API version override used: {}", version_string); - system_nodes_with_data[keeper_api_version_path] = toString(static_cast(api_version)); + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(feature_flags_key, keys); + for (const auto & key : keys) + { + auto feature_flag = SettingFieldKeeperFeatureFlagTraits::fromString(key); + auto is_enabled = config.getBool(feature_flags_key + "." + key); + if (is_enabled) + feature_flags.enableFeatureFlag(feature_flag); + else + feature_flags.disableFeatureFlag(feature_flag); + } + + system_nodes_with_data[keeper_api_feature_flags_path] = feature_flags.getFeatureFlags(); } + + feature_flags.logFlags(&Poco::Logger::get("KeeperContext")); } } diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index de502b6c566..9b7000fa726 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB { @@ -24,6 +25,8 @@ struct KeeperContext bool digest_enabled{true}; std::unordered_map system_nodes_with_data; + + KeeperFeatureFlags feature_flags; }; using KeeperContextPtr = std::shared_ptr; diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index d64134f3024..7318a492b35 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -336,28 +336,39 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf snapshot_s3.startup(config, macros); - server = std::make_unique(configuration_and_settings, config, responses_queue, snapshots_queue, snapshot_s3, [this](const KeeperStorage::RequestForSession & request_for_session) - { - /// check if we have queue of read requests depending on this request to be committed - std::lock_guard lock(read_request_queue_mutex); - if (auto it = read_request_queue.find(request_for_session.session_id); it != read_request_queue.end()) + keeper_context = std::make_shared(); + keeper_context->initialize(config); + + server = std::make_unique( + configuration_and_settings, + config, + responses_queue, + snapshots_queue, + keeper_context, + snapshot_s3, + [this](const KeeperStorage::RequestForSession & request_for_session) { - auto & xid_to_request_queue = it->second; - - if (auto request_queue_it = xid_to_request_queue.find(request_for_session.request->xid); request_queue_it != xid_to_request_queue.end()) + /// check if we have queue of read requests depending on this request to be committed + std::lock_guard lock(read_request_queue_mutex); + if (auto it = read_request_queue.find(request_for_session.session_id); it != read_request_queue.end()) { - for (const auto & read_request : request_queue_it->second) - { - if (server->isLeaderAlive()) - server->putLocalReadRequest(read_request); - else - addErrorResponses({read_request}, Coordination::Error::ZCONNECTIONLOSS); - } + auto & xid_to_request_queue = it->second; - xid_to_request_queue.erase(request_queue_it); + if (auto request_queue_it = xid_to_request_queue.find(request_for_session.request->xid); + request_queue_it != xid_to_request_queue.end()) + { + for (const auto & read_request : request_queue_it->second) + { + if (server->isLeaderAlive()) + server->putLocalReadRequest(read_request); + else + addErrorResponses({read_request}, Coordination::Error::ZCONNECTIONLOSS); + } + + xid_to_request_queue.erase(request_queue_it); + } } - } - }); + }); try { diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 4b8b134cf8f..1759f55d981 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -81,6 +81,8 @@ private: KeeperSnapshotManagerS3 snapshot_s3; + KeeperContextPtr keeper_context; + /// Thread put requests to raft void requestThread(); /// Thread put responses for subscribed sessions @@ -198,6 +200,12 @@ public: return configuration_and_settings; } + const KeeperContextPtr & getKeeperContext() const + { + return keeper_context; + } + + void incrementPacketsSent() { keeper_stats.incrementPacketsSent(); diff --git a/src/Coordination/KeeperFeatureFlags.cpp b/src/Coordination/KeeperFeatureFlags.cpp new file mode 100644 index 00000000000..216dca014d4 --- /dev/null +++ b/src/Coordination/KeeperFeatureFlags.cpp @@ -0,0 +1,102 @@ +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +std::pair getByteAndBitIndex(size_t num) +{ + size_t byte_idx = num / 8; + auto bit_idx = (7 - num % 8); + return {byte_idx, bit_idx}; +} + +} + +IMPLEMENT_SETTING_ENUM(KeeperFeatureFlag, ErrorCodes::BAD_ARGUMENTS, + {{"filtered_list", KeeperFeatureFlag::FILTERED_LIST}, + {"multi_read", KeeperFeatureFlag::MULTI_READ}, + {"check_not_exists", KeeperFeatureFlag::CHECK_NOT_EXISTS}}); + +KeeperFeatureFlags::KeeperFeatureFlags() +{ + /// get byte idx of largest value + auto [byte_idx, _] = getByteAndBitIndex(all_keeper_feature_flags.size() - 1); + feature_flags = std::string(byte_idx + 1, 0); +} + +KeeperFeatureFlags::KeeperFeatureFlags(std::string feature_flags_) + : feature_flags(std::move(feature_flags_)) +{} + +void KeeperFeatureFlags::fromApiVersion(KeeperApiVersion keeper_api_version) +{ + if (keeper_api_version == KeeperApiVersion::ZOOKEEPER_COMPATIBLE) + return; + + if (keeper_api_version >= KeeperApiVersion::WITH_FILTERED_LIST) + enableFeatureFlag(KeeperFeatureFlag::FILTERED_LIST); + + if (keeper_api_version >= KeeperApiVersion::WITH_MULTI_READ) + enableFeatureFlag(KeeperFeatureFlag::MULTI_READ); + + if (keeper_api_version >= KeeperApiVersion::WITH_CHECK_NOT_EXISTS) + enableFeatureFlag(KeeperFeatureFlag::CHECK_NOT_EXISTS); +} + +bool KeeperFeatureFlags::isEnabled(KeeperFeatureFlag feature_flag) const +{ + auto [byte_idx, bit_idx] = getByteAndBitIndex(feature_flag); + + if (byte_idx > feature_flags.size()) + return false; + + return feature_flags[byte_idx] & (1 << bit_idx); +} + +void KeeperFeatureFlags::setFeatureFlags(std::string feature_flags_) +{ + feature_flags = std::move(feature_flags_); +} + +void KeeperFeatureFlags::enableFeatureFlag(KeeperFeatureFlag feature_flag) +{ + auto [byte_idx, bit_idx] = getByteAndBitIndex(feature_flag); + chassert(byte_idx < feature_flags.size()); + + feature_flags[byte_idx] |= (1 << bit_idx); +} + +void KeeperFeatureFlags::disableFeatureFlag(KeeperFeatureFlag feature_flag) +{ + auto [byte_idx, bit_idx] = getByteAndBitIndex(feature_flag); + chassert(byte_idx < feature_flags.size()); + + feature_flags[byte_idx] &= ~(1 << bit_idx); +} + +const std::string & KeeperFeatureFlags::getFeatureFlags() const +{ + return feature_flags; +} + +void KeeperFeatureFlags::logFlags(Poco::Logger * log) const +{ + for (const auto & feature_flag : all_keeper_feature_flags) + { + auto is_enabled = isEnabled(feature_flag); + LOG_INFO(log, "Keeper feature flag {}: {}", SettingFieldKeeperFeatureFlagTraits::toString(feature_flag), is_enabled ? "enabled" : "disabled"); + } +} + +} diff --git a/src/Coordination/KeeperFeatureFlags.h b/src/Coordination/KeeperFeatureFlags.h new file mode 100644 index 00000000000..cdd4704a7ca --- /dev/null +++ b/src/Coordination/KeeperFeatureFlags.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +enum KeeperFeatureFlag +{ + FILTERED_LIST = 0, + MULTI_READ, + CHECK_NOT_EXISTS, +}; + +static inline constexpr std::array all_keeper_feature_flags +{ + KeeperFeatureFlag::FILTERED_LIST, + KeeperFeatureFlag::MULTI_READ, + KeeperFeatureFlag::CHECK_NOT_EXISTS, +}; + +DECLARE_SETTING_ENUM(KeeperFeatureFlag); + +class KeeperFeatureFlags +{ +public: + KeeperFeatureFlags(); + + explicit KeeperFeatureFlags(std::string feature_flags_); + + /// backwards compatibility + void fromApiVersion(KeeperApiVersion keeper_api_version); + + bool isEnabled(KeeperFeatureFlag feature) const; + + void setFeatureFlags(std::string feature_flags_); + const std::string & getFeatureFlags() const; + + void enableFeatureFlag(KeeperFeatureFlag feature); + void disableFeatureFlag(KeeperFeatureFlag feature); + + void logFlags(Poco::Logger * log) const; +private: + std::string feature_flags; +}; + +} diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index b3150f11cfb..43719e5a69e 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -108,23 +108,19 @@ KeeperServer::KeeperServer( const Poco::Util::AbstractConfiguration & config, ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, + KeeperContextPtr keeper_context_, KeeperSnapshotManagerS3 & snapshot_manager_s3, KeeperStateMachine::CommitCallback commit_callback) : server_id(configuration_and_settings_->server_id) , coordination_settings(configuration_and_settings_->coordination_settings) , log(&Poco::Logger::get("KeeperServer")) , is_recovering(config.getBool("keeper_server.force_recovery", false)) - , keeper_context{std::make_shared()} + , keeper_context{std::move(keeper_context_)} , create_snapshot_on_exit(config.getBool("keeper_server.create_snapshot_on_exit", true)) { if (coordination_settings->quorum_reads) LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower."); - keeper_context->initialize(config); - - keeper_context->digest_enabled = config.getBool("keeper_server.digest_enabled", false); - keeper_context->ignore_system_path_on_startup = config.getBool("keeper_server.ignore_system_path_on_startup", false); - state_machine = nuraft::cs_new( responses_queue_, snapshots_queue_, diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h index db4e9c1962e..8f416b1f48c 100644 --- a/src/Coordination/KeeperServer.h +++ b/src/Coordination/KeeperServer.h @@ -72,6 +72,7 @@ public: const Poco::Util::AbstractConfiguration & config_, ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, + KeeperContextPtr keeper_context_, KeeperSnapshotManagerS3 & snapshot_manager_s3, KeeperStateMachine::CommitCallback commit_callback); diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 8abcd062b7f..ecf795f4068 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -285,7 +285,7 @@ void KeeperStorage::initializeSystemNodes() // insert child system nodes for (const auto & [path, data] : keeper_context->system_nodes_with_data) { - assert(keeper_api_version_path.starts_with(keeper_system_path)); + assert(path.starts_with(keeper_system_path)); Node child_system_node; child_system_node.setData(data); auto [map_key, _] = container.insert(std::string{path}, child_system_node); @@ -1060,7 +1060,7 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce ProfileEvents::increment(ProfileEvents::KeeperGetRequest); Coordination::ZooKeeperGetRequest & request = dynamic_cast(*zk_request); - if (request.path == Coordination::keeper_api_version_path) + if (request.path == Coordination::keeper_api_feature_flags_path) return {}; if (!storage.uncommitted_state.getNode(request.path)) diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 005c67ad261..2793b23c572 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -2,7 +2,9 @@ #include #include "Common/ZooKeeper/IKeeper.h" +#include "Coordination/KeeperConstants.h" #include "Coordination/KeeperContext.h" +#include "Coordination/KeeperFeatureFlags.h" #include "Coordination/KeeperStorage.h" #include "Core/Defines.h" #include "IO/WriteHelpers.h" @@ -2346,18 +2348,19 @@ TEST_P(CoordinationTest, TestDurableState) } } -TEST_P(CoordinationTest, TestCurrentApiVersion) +TEST_P(CoordinationTest, TestFeatureFlags) { using namespace Coordination; KeeperStorage storage{500, "", keeper_context}; auto request = std::make_shared(); - request->path = DB::keeper_api_version_path; + request->path = DB::keeper_api_feature_flags_path; auto responses = storage.processRequest(request, 0, std::nullopt, true, true); const auto & get_response = getSingleResponse(responses); - uint8_t keeper_version{0}; - DB::ReadBufferFromOwnString buf(get_response.data); - DB::readIntText(keeper_version, buf); - EXPECT_EQ(keeper_version, static_cast(latest_keeper_api_version)); + DB::KeeperFeatureFlags feature_flags; + feature_flags.setFeatureFlags(get_response.data); + ASSERT_TRUE(feature_flags.isEnabled(KeeperFeatureFlag::FILTERED_LIST)); + ASSERT_TRUE(feature_flags.isEnabled(KeeperFeatureFlag::MULTI_READ)); + ASSERT_FALSE(feature_flags.isEnabled(KeeperFeatureFlag::CHECK_NOT_EXISTS)); } TEST_P(CoordinationTest, TestSystemNodeModify) diff --git a/src/Storages/System/StorageSystemZooKeeperConnection.cpp b/src/Storages/System/StorageSystemZooKeeperConnection.cpp index 559e12ad5ee..9a6a592f2c4 100644 --- a/src/Storages/System/StorageSystemZooKeeperConnection.cpp +++ b/src/Storages/System/StorageSystemZooKeeperConnection.cpp @@ -31,7 +31,7 @@ void StorageSystemZooKeeperConnection::fillData(MutableColumns & res_columns, Co res_columns[3]->insert(context->getZooKeeper()->getConnectedZooKeeperIndex()); res_columns[4]->insert(context->getZooKeeperSessionUptime()); res_columns[5]->insert(context->getZooKeeper()->expired()); - res_columns[6]->insert(context->getZooKeeper()->getApiVersion()); + res_columns[6]->insert(0); res_columns[7]->insert(context->getZooKeeper()->getClientID()); for (const auto & elem : context->getAuxiliaryZooKeepers()) @@ -42,7 +42,7 @@ void StorageSystemZooKeeperConnection::fillData(MutableColumns & res_columns, Co res_columns[3]->insert(elem.second->getConnectedZooKeeperIndex()); res_columns[4]->insert(elem.second->getSessionUptime()); res_columns[5]->insert(elem.second->expired()); - res_columns[6]->insert(elem.second->getApiVersion()); + res_columns[6]->insert(0); res_columns[7]->insert(elem.second->getClientID()); } diff --git a/tests/integration/test_keeper_api_version_config/test.py b/tests/integration/test_keeper_api_version_config/test.py deleted file mode 100644 index 34d3acc4f04..00000000000 --- a/tests/integration/test_keeper_api_version_config/test.py +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env python3 - -import pytest -import os -from helpers.cluster import ClickHouseCluster -import helpers.keeper_utils as keeper_utils -from kazoo.client import KazooClient, KazooState - -CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -cluster = ClickHouseCluster(__file__) - -# clickhouse itself will use external zookeeper -node = cluster.add_instance( - "node", - main_configs=["configs/enable_keeper.xml"], - stay_alive=True, -) - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - - yield cluster - - finally: - cluster.shutdown() - - -def get_connection_zk(nodename, timeout=30.0): - _fake_zk_instance = KazooClient( - hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout - ) - _fake_zk_instance.start() - return _fake_zk_instance - - -def restart_clickhouse(api_version=None, expect_fail=True): - node.stop_clickhouse() - node.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "configs/enable_keeper.xml"), - "/etc/clickhouse-server/config.d/enable_keeper.xml", - ) - - if api_version: - node.replace_in_config( - "/etc/clickhouse-server/config.d/enable_keeper.xml", - "", - f"{api_version}<\\/api_version>", - ) - - node.start_clickhouse(retry_start=not expect_fail) - keeper_utils.wait_until_connected(cluster, node) - - -def test_keeper_api_version(started_cluster): - restart_clickhouse() - - def assert_version(string_version, version_number): - node.wait_for_log_line( - f"Detected server's API version: {string_version}", look_behind_lines=1000 - ) - - try: - node_zk = get_connection_zk(node.name) - assert node_zk.get("/keeper/api_version")[0] == str(version_number).encode() - finally: - if node_zk: - node_zk.stop() - node_zk.close() - - assert_version("WITH_CHECK_NOT_EXISTS", 3) - - for i, version in enumerate( - [ - "ZOOKEEPER_COMPATIBLE", - "WITH_FILTERED_LIST", - "WITH_MULTI_READ", - "WITH_CHECK_NOT_EXISTS", - ] - ): - restart_clickhouse(version) - assert_version(version, i) - - with pytest.raises(Exception): - restart_clickhouse("INVALID_VERSION", expect_fail=True) diff --git a/tests/integration/test_keeper_api_version_config/__init__.py b/tests/integration/test_keeper_feature_flags_config/__init__.py similarity index 100% rename from tests/integration/test_keeper_api_version_config/__init__.py rename to tests/integration/test_keeper_feature_flags_config/__init__.py diff --git a/tests/integration/test_keeper_api_version_config/configs/enable_keeper.xml b/tests/integration/test_keeper_feature_flags_config/configs/enable_keeper.xml similarity index 97% rename from tests/integration/test_keeper_api_version_config/configs/enable_keeper.xml rename to tests/integration/test_keeper_feature_flags_config/configs/enable_keeper.xml index c153d025598..53a169c4c3c 100644 --- a/tests/integration/test_keeper_api_version_config/configs/enable_keeper.xml +++ b/tests/integration/test_keeper_feature_flags_config/configs/enable_keeper.xml @@ -18,7 +18,7 @@ 0 - + diff --git a/tests/integration/test_keeper_feature_flags_config/test.py b/tests/integration/test_keeper_feature_flags_config/test.py new file mode 100644 index 00000000000..432c413cbe8 --- /dev/null +++ b/tests/integration/test_keeper_feature_flags_config/test.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 + +import pytest +import os +from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils +from kazoo.client import KazooClient, KazooState + +CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) +cluster = ClickHouseCluster(__file__) + +# clickhouse itself will use external zookeeper +node = cluster.add_instance( + "node", + main_configs=["configs/enable_keeper.xml"], + stay_alive=True, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + + +def get_connection_zk(nodename, timeout=30.0): + _fake_zk_instance = KazooClient( + hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout + ) + _fake_zk_instance.start() + return _fake_zk_instance + + +def restart_clickhouse(feature_flags=[], expect_fail=True): + node.stop_clickhouse() + node.copy_file_to_container( + os.path.join(CURRENT_TEST_DIR, "configs/enable_keeper.xml"), + "/etc/clickhouse-server/config.d/enable_keeper.xml", + ) + + if len(feature_flags) > 0: + feature_flags_config = "" + + for feature, is_enabled in feature_flags: + feature_flags_config += f"<{feature}>{is_enabled}<\\/{feature}>" + + feature_flags_config += "<\\/feature_flags>" + + node.replace_in_config( + "/etc/clickhouse-server/config.d/enable_keeper.xml", + "", + feature_flags_config + ) + + node.start_clickhouse(retry_start=not expect_fail) + keeper_utils.wait_until_connected(cluster, node) + + +def test_keeper_feature_flags(started_cluster): + restart_clickhouse() + + def assert_feature_flags(feature_flags): + res = keeper_utils.send_4lw_cmd(started_cluster, node, "ftfl") + + for feature, is_enabled in feature_flags: + node.wait_for_log_line( + f"ZooKeeperClient: Keeper feature flag {feature}: {'enabled' if is_enabled else 'disabled'}", look_behind_lines=1000 + ) + + node.wait_for_log_line( + f"KeeperContext: Keeper feature flag {feature}: {'enabled' if is_enabled else 'disabled'}", look_behind_lines=1000 + ) + + assert f"{feature}\t{1 if is_enabled else 0}" in res + + assert_feature_flags([("filtered_list", 1), ("multi_read", 1), ("check_not_exists", 0)]) + + feature_flags = [("multi_read", 0), ("check_not_exists", 1)] + restart_clickhouse(feature_flags) + assert_feature_flags(feature_flags + [("filtered_list", 1)]) + + feature_flags = [("multi_read", 0), ("check_not_exists", 0), ("filtered_list", 0)] + restart_clickhouse(feature_flags) + assert_feature_flags(feature_flags) + + with pytest.raises(Exception): + restart_clickhouse([("invalid_feature", 1)], expect_fail=True) From eb16745033fd34b5c9c32124b6bb870f7c795f9c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 12 Jun 2023 11:04:07 +0000 Subject: [PATCH 0295/1997] Collect sets from indexHint actions as well. --- src/Planner/Planner.cpp | 44 ++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index c1f472bb5a8..4ac81e28f92 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -895,6 +896,34 @@ void addOffsetStep(QueryPlan & query_plan, const QueryAnalysisResult & query_ana query_plan.addStep(std::move(offsets_step)); } +void collectSetsFromActionsDAG(const ActionsDAGPtr & dag, std::unordered_set & useful_sets) +{ + for (const auto & node : dag->getNodes()) + { + if (node.column) + { + const IColumn * column = node.column.get(); + if (const auto * column_const = typeid_cast(column)) + column = &column_const->getDataColumn(); + + if (const auto * column_set = typeid_cast(column)) + useful_sets.insert(column_set->getData().get()); + } + + if (node.type == ActionsDAG::ActionType::FUNCTION && node.function_base->getName() == "indexHint") + { + ActionsDAG::NodeRawConstPtrs children; + if (const auto * adaptor = typeid_cast(node.function_base.get())) + { + if (const auto * index_hint = typeid_cast(adaptor->getFunction().get())) + { + collectSetsFromActionsDAG(index_hint->getActions(), useful_sets); + } + } + } + } +} + void addBuildSubqueriesForSetsStepIfNeeded( QueryPlan & query_plan, const SelectQueryOptions & select_query_options, @@ -907,20 +936,7 @@ void addBuildSubqueriesForSetsStepIfNeeded( PreparedSets::SubqueriesForSets subqueries_for_sets; for (const auto & actions_to_execute : result_actions_to_execute) - { - for (const auto & node : actions_to_execute->getNodes()) - { - if (node.column) - { - const IColumn * column = node.column.get(); - if (const auto * column_const = typeid_cast(column)) - column = &column_const->getDataColumn(); - - if (const auto * column_set = typeid_cast(column)) - useful_sets.insert(column_set->getData().get()); - } - } - } + collectSetsFromActionsDAG(actions_to_execute, useful_sets); auto predicate = [&useful_sets](const auto & set) { return !useful_sets.contains(set.set.get()); }; auto it = std::remove_if(subqueries.begin(), subqueries.end(), std::move(predicate)); From 708a99a6ea63409ce33f83d450592eaa42411ebb Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 12 Jun 2023 11:19:38 +0000 Subject: [PATCH 0296/1997] Automatic style fix --- .../test_keeper_feature_flags_config/test.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_keeper_feature_flags_config/test.py b/tests/integration/test_keeper_feature_flags_config/test.py index 432c413cbe8..bb7252e9ec8 100644 --- a/tests/integration/test_keeper_feature_flags_config/test.py +++ b/tests/integration/test_keeper_feature_flags_config/test.py @@ -45,7 +45,7 @@ def restart_clickhouse(feature_flags=[], expect_fail=True): if len(feature_flags) > 0: feature_flags_config = "" - + for feature, is_enabled in feature_flags: feature_flags_config += f"<{feature}>{is_enabled}<\\/{feature}>" @@ -54,7 +54,7 @@ def restart_clickhouse(feature_flags=[], expect_fail=True): node.replace_in_config( "/etc/clickhouse-server/config.d/enable_keeper.xml", "", - feature_flags_config + feature_flags_config, ) node.start_clickhouse(retry_start=not expect_fail) @@ -69,16 +69,20 @@ def test_keeper_feature_flags(started_cluster): for feature, is_enabled in feature_flags: node.wait_for_log_line( - f"ZooKeeperClient: Keeper feature flag {feature}: {'enabled' if is_enabled else 'disabled'}", look_behind_lines=1000 + f"ZooKeeperClient: Keeper feature flag {feature}: {'enabled' if is_enabled else 'disabled'}", + look_behind_lines=1000, ) node.wait_for_log_line( - f"KeeperContext: Keeper feature flag {feature}: {'enabled' if is_enabled else 'disabled'}", look_behind_lines=1000 + f"KeeperContext: Keeper feature flag {feature}: {'enabled' if is_enabled else 'disabled'}", + look_behind_lines=1000, ) assert f"{feature}\t{1 if is_enabled else 0}" in res - assert_feature_flags([("filtered_list", 1), ("multi_read", 1), ("check_not_exists", 0)]) + assert_feature_flags( + [("filtered_list", 1), ("multi_read", 1), ("check_not_exists", 0)] + ) feature_flags = [("multi_read", 0), ("check_not_exists", 1)] restart_clickhouse(feature_flags) From cd1a3916a6ea755b24b475983f6f67447cebdd6a Mon Sep 17 00:00:00 2001 From: Andrey Zvonov Date: Mon, 12 Jun 2023 13:28:17 +0200 Subject: [PATCH 0297/1997] cleanup HDFS --- src/Storages/HDFS/StorageHDFS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index bd50b66ede5..7e836c028a0 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -129,7 +129,7 @@ namespace } else if (is_directory) { - Strings result_part = LSWithFoldedRegexpMatching(fs::path(full_path).append(processed_suffix), + Strings result_part = LSWithFoldedRegexpMatching(fs::path(full_path), fs, last_mod_times, processed_suffix + dir_or_file_name, suffix_with_globs, current_glob, matcher, max_depth - 1, next_slash_after_glob_pos); std::move(result_part.begin(), result_part.end(), std::back_inserter(result)); From 5cec4c3161b84e32341ef723dc8cea2b38343b69 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 12 Jun 2023 11:34:40 +0000 Subject: [PATCH 0298/1997] Fallback to parsing big integer from String instead of exception in Parquet format --- src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 9 +++------ .../02786_parquet_big_integer_compatibility.reference | 1 + .../02786_parquet_big_integer_compatibility.sh | 9 +++++++++ 3 files changed, 13 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/02786_parquet_big_integer_compatibility.reference create mode 100755 tests/queries/0_stateless/02786_parquet_big_integer_compatibility.sh diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 0b4700c9d4c..5a7306111a5 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -202,13 +202,10 @@ static ColumnWithTypeAndName readColumnWithBigNumberFromBinaryData(std::shared_p for (size_t i = 0; i != chunk_length; ++i) { + /// If at least one value size is not equal to the size if big integer, fallback to reading String column and further cast to result type. if (!chunk.IsNull(i) && chunk.value_length(i) != sizeof(ValueType)) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Cannot insert data into {} column from binary value, expected data with size {}, got {}", - column_type->getName(), - sizeof(ValueType), - chunk.value_length(i)); + return readColumnWithStringData(arrow_column, column_name); + total_size += chunk_length; } } diff --git a/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.reference b/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.reference new file mode 100644 index 00000000000..7764974255b --- /dev/null +++ b/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.reference @@ -0,0 +1 @@ +424242424242424242424242424242424242424242424242424242 diff --git a/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.sh b/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.sh new file mode 100755 index 00000000000..8865b2e7aab --- /dev/null +++ b/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select toString(424242424242424242424242424242424242424242424242424242::UInt256) as x format Parquet" | $CLICKHOUSE_LOCAL --input-format=Parquet --structure='x UInt256' -q "select * from table" + From a7e6264d567ffa7456d00df017675f0acf4ca90f Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 12 Jun 2023 13:05:44 +0000 Subject: [PATCH 0299/1997] Add backward compatibility --- programs/keeper/CMakeLists.txt | 1 + src/Common/ZooKeeper/CMakeLists.txt | 2 ++ src/Coordination/FourLetterCommand.cpp | 2 +- src/Coordination/KeeperConstants.h | 1 - src/Coordination/KeeperContext.cpp | 4 ++++ src/Storages/System/StorageSystemZooKeeperConnection.cpp | 4 ++-- 6 files changed, 10 insertions(+), 4 deletions(-) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 4f74cc06801..555c6431865 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -44,6 +44,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperLogStore.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperServer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperContext.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperFeatureFlags.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManagerS3.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateMachine.cpp diff --git a/src/Common/ZooKeeper/CMakeLists.txt b/src/Common/ZooKeeper/CMakeLists.txt index a9a335d1461..3f7e87ff4a7 100644 --- a/src/Common/ZooKeeper/CMakeLists.txt +++ b/src/Common/ZooKeeper/CMakeLists.txt @@ -2,6 +2,8 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_common_zookeeper .) +list(APPEND clickhouse_common_zookeeper_sources ${CMAKE_CURRENT_SOURCE_DIR}/../../../src/Coordination/KeeperFeatureFlags.cpp) + # for clickhouse server add_library(clickhouse_common_zookeeper ${clickhouse_common_zookeeper_headers} ${clickhouse_common_zookeeper_sources}) target_compile_definitions (clickhouse_common_zookeeper PRIVATE -DZOOKEEPER_LOG) diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 55120e70d99..10d13657fb0 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -490,7 +490,7 @@ String RecoveryCommand::run() String ApiVersionCommand::run() { - return toString(0); + return toString(static_cast(KeeperApiVersion::WITH_MULTI_READ)); } String CreateSnapshotCommand::run() diff --git a/src/Coordination/KeeperConstants.h b/src/Coordination/KeeperConstants.h index 6d9c03ca8a9..84cbb0ab7c5 100644 --- a/src/Coordination/KeeperConstants.h +++ b/src/Coordination/KeeperConstants.h @@ -14,7 +14,6 @@ enum class KeeperApiVersion : uint8_t WITH_CHECK_NOT_EXISTS, }; - const std::string keeper_system_path = "/keeper"; const std::string keeper_api_version_path = keeper_system_path + "/api_version"; const std::string keeper_api_feature_flags_path = keeper_system_path + "/feature_flags"; diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index dc408afc19a..e6f30c81310 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -11,6 +11,10 @@ KeeperContext::KeeperContext() feature_flags.enableFeatureFlag(KeeperFeatureFlag::FILTERED_LIST); feature_flags.enableFeatureFlag(KeeperFeatureFlag::MULTI_READ); system_nodes_with_data[keeper_api_feature_flags_path] = feature_flags.getFeatureFlags(); + + + /// for older clients, the default is equivalent to WITH_MULTI_READ version + system_nodes_with_data[keeper_api_version_path] = toString(static_cast(KeeperApiVersion::WITH_MULTI_READ)); } void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config) diff --git a/src/Storages/System/StorageSystemZooKeeperConnection.cpp b/src/Storages/System/StorageSystemZooKeeperConnection.cpp index 9a6a592f2c4..99872be6ba0 100644 --- a/src/Storages/System/StorageSystemZooKeeperConnection.cpp +++ b/src/Storages/System/StorageSystemZooKeeperConnection.cpp @@ -31,7 +31,7 @@ void StorageSystemZooKeeperConnection::fillData(MutableColumns & res_columns, Co res_columns[3]->insert(context->getZooKeeper()->getConnectedZooKeeperIndex()); res_columns[4]->insert(context->getZooKeeperSessionUptime()); res_columns[5]->insert(context->getZooKeeper()->expired()); - res_columns[6]->insert(0); + res_columns[6]->insert(static_cast(KeeperApiVersion::WITH_MULTI_READ)); res_columns[7]->insert(context->getZooKeeper()->getClientID()); for (const auto & elem : context->getAuxiliaryZooKeepers()) @@ -42,7 +42,7 @@ void StorageSystemZooKeeperConnection::fillData(MutableColumns & res_columns, Co res_columns[3]->insert(elem.second->getConnectedZooKeeperIndex()); res_columns[4]->insert(elem.second->getSessionUptime()); res_columns[5]->insert(elem.second->expired()); - res_columns[6]->insert(0); + res_columns[6]->insert(static_cast(KeeperApiVersion::WITH_MULTI_READ)); res_columns[7]->insert(elem.second->getClientID()); } From 25f08f8d194f77d0ee56e7c5132b9d5c4244a30f Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 12 Jun 2023 15:35:44 +0200 Subject: [PATCH 0300/1997] Remove duplicate include --- src/Coordination/Changelog.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index cc2ea491e17..ffa22a6b888 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -14,7 +14,6 @@ #include #include #include #include From e9763caa0eb7078cd28e3765d0da1e0a9b4a204b Mon Sep 17 00:00:00 2001 From: flynn Date: Mon, 12 Jun 2023 14:21:58 +0000 Subject: [PATCH 0301/1997] fix --- src/Interpreters/Context.cpp | 18 +++++++++++++----- .../01945_system_warnings.reference | 2 +- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index a12117b7677..823c3d678df 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -777,17 +777,25 @@ Strings Context::getWarnings() const auto lock = getLock(); common_warnings = shared->warnings; } + String res = "Obsolete settings ["; + size_t obsolete_settings_count = 0; for (const auto & setting : settings) { if (setting.isValueChanged() && setting.isObsolete()) { - common_warnings.emplace_back( - "Obsolete setting `" + setting.getName() - + "` is changed. " - "Check 'select * from system.settings where changed' and read the changelog."); - break; + res += (obsolete_settings_count ? ", `" : "`") + setting.getName() + "`"; + ++obsolete_settings_count; } } + + if (obsolete_settings_count) + { + res = res + "]" + (obsolete_settings_count == 1 ? " is" : " are") + + " changed. " + "Please check 'select * from system.settings where changed and is_obsolete' and read the changelog."; + common_warnings.emplace_back(res); + } + return common_warnings; } diff --git a/tests/queries/0_stateless/01945_system_warnings.reference b/tests/queries/0_stateless/01945_system_warnings.reference index d6ae567289c..3e7edacd275 100644 --- a/tests/queries/0_stateless/01945_system_warnings.reference +++ b/tests/queries/0_stateless/01945_system_warnings.reference @@ -1,5 +1,5 @@ Server was built in debug mode. It will work slowly. 0 -Obsolete setting `multiple_joins_rewriter_version` is changed. Check \'select * from system.settings where changed\' and read the changelog. +Obsolete settings [`multiple_joins_rewriter_version`] is changed. Check \'select * from system.settings where changed\' and read the changelog. 1 1 From 18f4f1a5238c64f3b45e1d6781ef2c7104ab842d Mon Sep 17 00:00:00 2001 From: flynn Date: Mon, 12 Jun 2023 15:11:19 +0000 Subject: [PATCH 0302/1997] udpate test --- tests/queries/0_stateless/01945_show_debug_warning.expect | 2 +- tests/queries/0_stateless/01945_system_warnings.reference | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect index 3d5b1ca99a5..f0c97acb1f5 100755 --- a/tests/queries/0_stateless/01945_show_debug_warning.expect +++ b/tests/queries/0_stateless/01945_show_debug_warning.expect @@ -55,7 +55,7 @@ expect eof spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file" expect "Warnings:" -expect " * Obsolete setting `max_memory_usage_for_all_queries` is changed." +expect " * Obsolete settings [`max_memory_usage_for_all_queries`] is changed." expect ":) " send -- "q\r" expect eof diff --git a/tests/queries/0_stateless/01945_system_warnings.reference b/tests/queries/0_stateless/01945_system_warnings.reference index 3e7edacd275..0c05d5d7049 100644 --- a/tests/queries/0_stateless/01945_system_warnings.reference +++ b/tests/queries/0_stateless/01945_system_warnings.reference @@ -1,5 +1,5 @@ Server was built in debug mode. It will work slowly. 0 -Obsolete settings [`multiple_joins_rewriter_version`] is changed. Check \'select * from system.settings where changed\' and read the changelog. +Obsolete settings [`multiple_joins_rewriter_version`] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog. 1 1 From 252a10c670977c93b8808d8b98a8679714d6e9a3 Mon Sep 17 00:00:00 2001 From: tpanetti Date: Mon, 12 Jun 2023 08:19:06 -0700 Subject: [PATCH 0303/1997] Add "no-parallel" tag to MySQL Compatible Types test to fix test issue --- .../0_stateless/02775_show_columns_mysql_compatibility.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh index 51c9da2a842..e324926e2e7 100755 --- a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh +++ b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From b76ba13250ad5b0abe728875be0e41667450cd5f Mon Sep 17 00:00:00 2001 From: flynn Date: Mon, 12 Jun 2023 15:41:46 +0000 Subject: [PATCH 0304/1997] fix --- tests/queries/0_stateless/01945_show_debug_warning.expect | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect index f0c97acb1f5..617e54a375e 100755 --- a/tests/queries/0_stateless/01945_show_debug_warning.expect +++ b/tests/queries/0_stateless/01945_show_debug_warning.expect @@ -55,7 +55,7 @@ expect eof spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file" expect "Warnings:" -expect " * Obsolete settings [`max_memory_usage_for_all_queries`] is changed." +expect " * Obsolete settings [`max_memory_usage_for_all_queries`] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog." expect ":) " send -- "q\r" expect eof From 326a3a3e8d719aebdc9ef9ee79f8b5fc8645183e Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 12 Jun 2023 16:46:10 +0000 Subject: [PATCH 0305/1997] Use query tree to rewrite the query --- src/Storages/StorageDistributed.cpp | 335 +--------------- src/Storages/StorageReplicatedMergeTree.cpp | 22 +- src/Storages/buildQueryTreeForShard.cpp | 372 ++++++++++++++++++ src/Storages/buildQueryTreeForShard.h | 15 + ...02771_parallel_replicas_analyzer.reference | 2 +- 5 files changed, 406 insertions(+), 340 deletions(-) create mode 100644 src/Storages/buildQueryTreeForShard.cpp create mode 100644 src/Storages/buildQueryTreeForShard.h diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index b91ad0b963a..1ec45ce3d57 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -81,6 +81,7 @@ #include #include +#include #include #include @@ -650,264 +651,6 @@ StorageSnapshotPtr StorageDistributed::getStorageSnapshotForQuery( namespace { -/// Visitor that collect column source to columns mapping from query and all subqueries -class CollectColumnSourceToColumnsVisitor : public InDepthQueryTreeVisitor -{ -public: - struct Columns - { - NameSet column_names; - NamesAndTypes columns; - - void addColumn(NameAndTypePair column) - { - if (column_names.contains(column.name)) - return; - - column_names.insert(column.name); - columns.push_back(std::move(column)); - } - }; - - const std::unordered_map & getColumnSourceToColumns() const - { - return column_source_to_columns; - } - - void visitImpl(QueryTreeNodePtr & node) - { - auto * column_node = node->as(); - if (!column_node) - return; - - auto column_source = column_node->getColumnSourceOrNull(); - if (!column_source) - return; - - auto it = column_source_to_columns.find(column_source); - if (it == column_source_to_columns.end()) - { - auto [insert_it, _] = column_source_to_columns.emplace(column_source, Columns()); - it = insert_it; - } - - it->second.addColumn(column_node->getColumn()); - } - -private: - std::unordered_map column_source_to_columns; -}; - -/** Visitor that rewrites IN and JOINs in query and all subqueries according to distributed_product_mode and - * prefer_global_in_and_join settings. - * - * Additionally collects GLOBAL JOIN and GLOBAL IN query nodes. - * - * If distributed_product_mode = deny, then visitor throws exception if there are multiple distributed tables. - * If distributed_product_mode = local, then visitor collects replacement map for tables that must be replaced - * with local tables. - * If distributed_product_mode = global or prefer_global_in_and_join setting is true, then visitor rewrites JOINs and IN functions that - * contain distributed tables to GLOBAL JOINs and GLOBAL IN functions. - * If distributed_product_mode = allow, then visitor does not rewrite query if there are multiple distributed tables. - */ -class DistributedProductModeRewriteInJoinVisitor : public InDepthQueryTreeVisitorWithContext -{ -public: - using Base = InDepthQueryTreeVisitorWithContext; - using Base::Base; - - explicit DistributedProductModeRewriteInJoinVisitor(const ContextPtr & context_) - : Base(context_) - {} - - struct InFunctionOrJoin - { - QueryTreeNodePtr query_node; - size_t subquery_depth = 0; - }; - - const std::unordered_map & getReplacementMap() const - { - return replacement_map; - } - - const std::vector & getGlobalInOrJoinNodes() const - { - return global_in_or_join_nodes; - } - - static bool needChildVisit(QueryTreeNodePtr & parent, QueryTreeNodePtr & child) - { - auto * function_node = parent->as(); - if (function_node && isNameOfGlobalInFunction(function_node->getFunctionName())) - return false; - - auto * join_node = parent->as(); - if (join_node && join_node->getLocality() == JoinLocality::Global && join_node->getRightTableExpression() == child) - return false; - - return true; - } - - void visitImpl(QueryTreeNodePtr & node) - { - auto * function_node = node->as(); - auto * join_node = node->as(); - - if ((function_node && isNameOfGlobalInFunction(function_node->getFunctionName())) || - (join_node && join_node->getLocality() == JoinLocality::Global)) - { - InFunctionOrJoin in_function_or_join_entry; - in_function_or_join_entry.query_node = node; - in_function_or_join_entry.subquery_depth = getSubqueryDepth(); - global_in_or_join_nodes.push_back(std::move(in_function_or_join_entry)); - return; - } - - if ((function_node && isNameOfLocalInFunction(function_node->getFunctionName())) || - (join_node && join_node->getLocality() != JoinLocality::Global)) - { - InFunctionOrJoin in_function_or_join_entry; - in_function_or_join_entry.query_node = node; - in_function_or_join_entry.subquery_depth = getSubqueryDepth(); - in_function_or_join_stack.push_back(in_function_or_join_entry); - return; - } - - if (node->getNodeType() == QueryTreeNodeType::TABLE) - tryRewriteTableNodeIfNeeded(node); - } - - void leaveImpl(QueryTreeNodePtr & node) - { - if (!in_function_or_join_stack.empty() && node.get() == in_function_or_join_stack.back().query_node.get()) - in_function_or_join_stack.pop_back(); - } - -private: - void tryRewriteTableNodeIfNeeded(const QueryTreeNodePtr & table_node) - { - const auto & table_node_typed = table_node->as(); - const auto * distributed_storage = typeid_cast(table_node_typed.getStorage().get()); - if (!distributed_storage) - return; - - bool distributed_valid_for_rewrite = distributed_storage->getShardCount() >= 2; - if (!distributed_valid_for_rewrite) - return; - - auto distributed_product_mode = getSettings().distributed_product_mode; - - if (distributed_product_mode == DistributedProductMode::LOCAL) - { - StorageID remote_storage_id = StorageID{distributed_storage->getRemoteDatabaseName(), - distributed_storage->getRemoteTableName()}; - auto resolved_remote_storage_id = getContext()->resolveStorageID(remote_storage_id); - const auto & distributed_storage_columns = table_node_typed.getStorageSnapshot()->metadata->getColumns(); - auto storage = std::make_shared(resolved_remote_storage_id, distributed_storage_columns); - auto replacement_table_expression = std::make_shared(std::move(storage), getContext()); - replacement_map.emplace(table_node.get(), std::move(replacement_table_expression)); - } - else if ((distributed_product_mode == DistributedProductMode::GLOBAL || getSettings().prefer_global_in_and_join) && - !in_function_or_join_stack.empty()) - { - auto * in_or_join_node_to_modify = in_function_or_join_stack.back().query_node.get(); - - if (auto * in_function_to_modify = in_or_join_node_to_modify->as()) - { - auto global_in_function_name = getGlobalInFunctionNameForLocalInFunctionName(in_function_to_modify->getFunctionName()); - auto global_in_function_resolver = FunctionFactory::instance().get(global_in_function_name, getContext()); - in_function_to_modify->resolveAsFunction(global_in_function_resolver->build(in_function_to_modify->getArgumentColumns())); - } - else if (auto * join_node_to_modify = in_or_join_node_to_modify->as()) - { - join_node_to_modify->setLocality(JoinLocality::Global); - } - - global_in_or_join_nodes.push_back(in_function_or_join_stack.back()); - } - else if (distributed_product_mode == DistributedProductMode::ALLOW) - { - return; - } - else if (distributed_product_mode == DistributedProductMode::DENY) - { - throw Exception(ErrorCodes::DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED, - "Double-distributed IN/JOIN subqueries is denied (distributed_product_mode = 'deny'). " - "You may rewrite query to use local tables " - "in subqueries, or use GLOBAL keyword, or set distributed_product_mode to suitable value."); - } - } - - std::vector in_function_or_join_stack; - std::unordered_map replacement_map; - std::vector global_in_or_join_nodes; -}; - -/** Execute subquery node and put result in mutable context temporary table. - * Returns table node that is initialized with temporary table storage. - */ -TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, - ContextMutablePtr & mutable_context, - size_t subquery_depth) -{ - auto subquery_hash = subquery_node->getTreeHash(); - String temporary_table_name = fmt::format("_data_{}_{}", subquery_hash.first, subquery_hash.second); - - const auto & external_tables = mutable_context->getExternalTables(); - auto external_table_it = external_tables.find(temporary_table_name); - if (external_table_it != external_tables.end()) - { - auto temporary_table_expression_node = std::make_shared(external_table_it->second, mutable_context); - temporary_table_expression_node->setTemporaryTableName(temporary_table_name); - return temporary_table_expression_node; - } - - auto subquery_options = SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth, true /*is_subquery*/); - auto context_copy = Context::createCopy(mutable_context); - updateContextForSubqueryExecution(context_copy); - - InterpreterSelectQueryAnalyzer interpreter(subquery_node, context_copy, subquery_options); - auto & query_plan = interpreter.getQueryPlan(); - - auto sample_block_with_unique_names = query_plan.getCurrentDataStream().header; - makeUniqueColumnNamesInBlock(sample_block_with_unique_names); - - if (!blocksHaveEqualStructure(sample_block_with_unique_names, query_plan.getCurrentDataStream().header)) - { - auto actions_dag = ActionsDAG::makeConvertingActions( - query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(), - sample_block_with_unique_names.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position); - auto converting_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(actions_dag)); - query_plan.addStep(std::move(converting_step)); - } - - Block sample = interpreter.getSampleBlock(); - NamesAndTypesList columns = sample.getNamesAndTypesList(); - - auto external_storage_holder = TemporaryTableHolder( - mutable_context, - ColumnsDescription{columns}, - ConstraintsDescription{}, - nullptr /*query*/, - true /*create_for_global_subquery*/); - - StoragePtr external_storage = external_storage_holder.getTable(); - auto temporary_table_expression_node = std::make_shared(external_storage, mutable_context); - temporary_table_expression_node->setTemporaryTableName(temporary_table_name); - - auto table_out = external_storage->write({}, external_storage->getInMemoryMetadataPtr(), mutable_context, /*async_insert=*/false); - auto io = interpreter.execute(); - io.pipeline.complete(std::move(table_out)); - CompletedPipelineExecutor executor(io.pipeline); - executor.execute(); - - mutable_context->addExternalTable(temporary_table_name, std::move(external_storage_holder)); - - return temporary_table_expression_node; -} - QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info, const StorageSnapshotPtr & distributed_storage_snapshot, const StorageID & remote_storage_id, @@ -963,81 +706,7 @@ QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info, auto query_tree_to_modify = query_info.query_tree->cloneAndReplace(query_info.table_expression, std::move(replacement_table_expression)); - CollectColumnSourceToColumnsVisitor collect_column_source_to_columns_visitor; - collect_column_source_to_columns_visitor.visit(query_tree_to_modify); - - const auto & column_source_to_columns = collect_column_source_to_columns_visitor.getColumnSourceToColumns(); - - DistributedProductModeRewriteInJoinVisitor visitor(query_info.planner_context->getQueryContext()); - visitor.visit(query_tree_to_modify); - - auto replacement_map = visitor.getReplacementMap(); - const auto & global_in_or_join_nodes = visitor.getGlobalInOrJoinNodes(); - - for (const auto & global_in_or_join_node : global_in_or_join_nodes) - { - if (auto * join_node = global_in_or_join_node.query_node->as()) - { - auto join_right_table_expression = join_node->getRightTableExpression(); - auto join_right_table_expression_node_type = join_right_table_expression->getNodeType(); - - QueryTreeNodePtr subquery_node; - - if (join_right_table_expression_node_type == QueryTreeNodeType::QUERY || - join_right_table_expression_node_type == QueryTreeNodeType::UNION) - { - subquery_node = join_right_table_expression; - } - else if (join_right_table_expression_node_type == QueryTreeNodeType::TABLE || - join_right_table_expression_node_type == QueryTreeNodeType::TABLE_FUNCTION) - { - const auto & columns = column_source_to_columns.at(join_right_table_expression).columns; - subquery_node = buildSubqueryToReadColumnsFromTableExpression(columns, - join_right_table_expression, - planner_context->getQueryContext()); - } - else - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Expected JOIN right table expression to be table, table function, query or union node. Actual {}", - join_right_table_expression->formatASTForErrorMessage()); - } - - auto temporary_table_expression_node = executeSubqueryNode(subquery_node, - planner_context->getMutableQueryContext(), - global_in_or_join_node.subquery_depth); - temporary_table_expression_node->setAlias(join_right_table_expression->getAlias()); - - replacement_map.emplace(join_right_table_expression.get(), std::move(temporary_table_expression_node)); - continue; - } - else if (auto * in_function_node = global_in_or_join_node.query_node->as()) - { - auto & in_function_subquery_node = in_function_node->getArguments().getNodes().at(1); - auto in_function_node_type = in_function_subquery_node->getNodeType(); - if (in_function_node_type != QueryTreeNodeType::QUERY && in_function_node_type != QueryTreeNodeType::UNION) - continue; - - auto temporary_table_expression_node = executeSubqueryNode(in_function_subquery_node, - planner_context->getMutableQueryContext(), - global_in_or_join_node.subquery_depth); - - in_function_subquery_node = std::move(temporary_table_expression_node); - } - else - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Expected global IN or JOIN query node. Actual {}", - global_in_or_join_node.query_node->formatASTForErrorMessage()); - } - } - - if (!replacement_map.empty()) - query_tree_to_modify = query_tree_to_modify->cloneAndReplace(replacement_map); - - removeGroupingFunctionSpecializations(query_tree_to_modify); - - return query_tree_to_modify; + return buildQueryTreeForShard(query_info, query_tree_to_modify); } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 61d1442df92..fafb3b124f2 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -21,6 +21,7 @@ #include +#include #include #include #include @@ -74,6 +75,8 @@ #include #include +#include + #include #include #include @@ -4734,20 +4737,27 @@ void StorageReplicatedMergeTree::read( { auto table_id = getStorageID(); - const auto & modified_query_ast = ClusterProxy::rewriteSelectQuery( - local_context, query_info.query, - table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); - - auto cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas); + ASTPtr modified_query_ast; Block header; if (local_context->getSettingsRef().allow_experimental_analyzer) + { + auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree); + header = InterpreterSelectQueryAnalyzer::getSampleBlock( - modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()); + modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze()); + modified_query_ast = queryNodeToSelectQuery(modified_query_tree); + } else + { header = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); + modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query, + table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); + } + + auto cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas); ClusterProxy::SelectStreamFactory select_stream_factory = ClusterProxy::SelectStreamFactory( diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp new file mode 100644 index 00000000000..a42d67d9aa7 --- /dev/null +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -0,0 +1,372 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED; +} + +namespace +{ + +/// Visitor that collect column source to columns mapping from query and all subqueries +class CollectColumnSourceToColumnsVisitor : public InDepthQueryTreeVisitor +{ +public: + struct Columns + { + NameSet column_names; + NamesAndTypes columns; + + void addColumn(NameAndTypePair column) + { + if (column_names.contains(column.name)) + return; + + column_names.insert(column.name); + columns.push_back(std::move(column)); + } + }; + + const std::unordered_map & getColumnSourceToColumns() const + { + return column_source_to_columns; + } + + void visitImpl(QueryTreeNodePtr & node) + { + auto * column_node = node->as(); + if (!column_node) + return; + + auto column_source = column_node->getColumnSourceOrNull(); + if (!column_source) + return; + + auto it = column_source_to_columns.find(column_source); + if (it == column_source_to_columns.end()) + { + auto [insert_it, _] = column_source_to_columns.emplace(column_source, Columns()); + it = insert_it; + } + + it->second.addColumn(column_node->getColumn()); + } + +private: + std::unordered_map column_source_to_columns; +}; + +/** Visitor that rewrites IN and JOINs in query and all subqueries according to distributed_product_mode and + * prefer_global_in_and_join settings. + * + * Additionally collects GLOBAL JOIN and GLOBAL IN query nodes. + * + * If distributed_product_mode = deny, then visitor throws exception if there are multiple distributed tables. + * If distributed_product_mode = local, then visitor collects replacement map for tables that must be replaced + * with local tables. + * If distributed_product_mode = global or prefer_global_in_and_join setting is true, then visitor rewrites JOINs and IN functions that + * contain distributed tables to GLOBAL JOINs and GLOBAL IN functions. + * If distributed_product_mode = allow, then visitor does not rewrite query if there are multiple distributed tables. + */ +class DistributedProductModeRewriteInJoinVisitor : public InDepthQueryTreeVisitorWithContext +{ +public: + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; + + explicit DistributedProductModeRewriteInJoinVisitor(const ContextPtr & context_) + : Base(context_) + {} + + struct InFunctionOrJoin + { + QueryTreeNodePtr query_node; + size_t subquery_depth = 0; + }; + + const std::unordered_map & getReplacementMap() const + { + return replacement_map; + } + + const std::vector & getGlobalInOrJoinNodes() const + { + return global_in_or_join_nodes; + } + + static bool needChildVisit(QueryTreeNodePtr & parent, QueryTreeNodePtr & child) + { + auto * function_node = parent->as(); + if (function_node && isNameOfGlobalInFunction(function_node->getFunctionName())) + return false; + + auto * join_node = parent->as(); + if (join_node && join_node->getLocality() == JoinLocality::Global && join_node->getRightTableExpression() == child) + return false; + + return true; + } + + void visitImpl(QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + auto * join_node = node->as(); + + if ((function_node && isNameOfGlobalInFunction(function_node->getFunctionName())) || + (join_node && join_node->getLocality() == JoinLocality::Global)) + { + InFunctionOrJoin in_function_or_join_entry; + in_function_or_join_entry.query_node = node; + in_function_or_join_entry.subquery_depth = getSubqueryDepth(); + global_in_or_join_nodes.push_back(std::move(in_function_or_join_entry)); + return; + } + + if ((function_node && isNameOfLocalInFunction(function_node->getFunctionName())) || + (join_node && join_node->getLocality() != JoinLocality::Global)) + { + InFunctionOrJoin in_function_or_join_entry; + in_function_or_join_entry.query_node = node; + in_function_or_join_entry.subquery_depth = getSubqueryDepth(); + in_function_or_join_stack.push_back(in_function_or_join_entry); + return; + } + + if (node->getNodeType() == QueryTreeNodeType::TABLE) + tryRewriteTableNodeIfNeeded(node); + } + + void leaveImpl(QueryTreeNodePtr & node) + { + if (!in_function_or_join_stack.empty() && node.get() == in_function_or_join_stack.back().query_node.get()) + in_function_or_join_stack.pop_back(); + } + +private: + void tryRewriteTableNodeIfNeeded(const QueryTreeNodePtr & table_node) + { + const auto & table_node_typed = table_node->as(); + const auto * distributed_storage = typeid_cast(table_node_typed.getStorage().get()); + if (!distributed_storage) + return; + + bool distributed_valid_for_rewrite = distributed_storage->getShardCount() >= 2; + if (!distributed_valid_for_rewrite) + return; + + auto distributed_product_mode = getSettings().distributed_product_mode; + + if (distributed_product_mode == DistributedProductMode::LOCAL) + { + StorageID remote_storage_id = StorageID{distributed_storage->getRemoteDatabaseName(), + distributed_storage->getRemoteTableName()}; + auto resolved_remote_storage_id = getContext()->resolveStorageID(remote_storage_id); + const auto & distributed_storage_columns = table_node_typed.getStorageSnapshot()->metadata->getColumns(); + auto storage = std::make_shared(resolved_remote_storage_id, distributed_storage_columns); + auto replacement_table_expression = std::make_shared(std::move(storage), getContext()); + replacement_map.emplace(table_node.get(), std::move(replacement_table_expression)); + } + else if ((distributed_product_mode == DistributedProductMode::GLOBAL || getSettings().prefer_global_in_and_join) && + !in_function_or_join_stack.empty()) + { + auto * in_or_join_node_to_modify = in_function_or_join_stack.back().query_node.get(); + + if (auto * in_function_to_modify = in_or_join_node_to_modify->as()) + { + auto global_in_function_name = getGlobalInFunctionNameForLocalInFunctionName(in_function_to_modify->getFunctionName()); + auto global_in_function_resolver = FunctionFactory::instance().get(global_in_function_name, getContext()); + in_function_to_modify->resolveAsFunction(global_in_function_resolver->build(in_function_to_modify->getArgumentColumns())); + } + else if (auto * join_node_to_modify = in_or_join_node_to_modify->as()) + { + join_node_to_modify->setLocality(JoinLocality::Global); + } + + global_in_or_join_nodes.push_back(in_function_or_join_stack.back()); + } + else if (distributed_product_mode == DistributedProductMode::ALLOW) + { + return; + } + else if (distributed_product_mode == DistributedProductMode::DENY) + { + throw Exception(ErrorCodes::DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED, + "Double-distributed IN/JOIN subqueries is denied (distributed_product_mode = 'deny'). " + "You may rewrite query to use local tables " + "in subqueries, or use GLOBAL keyword, or set distributed_product_mode to suitable value."); + } + } + + std::vector in_function_or_join_stack; + std::unordered_map replacement_map; + std::vector global_in_or_join_nodes; +}; + +/** Execute subquery node and put result in mutable context temporary table. + * Returns table node that is initialized with temporary table storage. + */ +TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, + ContextMutablePtr & mutable_context, + size_t subquery_depth) +{ + auto subquery_hash = subquery_node->getTreeHash(); + String temporary_table_name = fmt::format("_data_{}_{}", subquery_hash.first, subquery_hash.second); + + const auto & external_tables = mutable_context->getExternalTables(); + auto external_table_it = external_tables.find(temporary_table_name); + if (external_table_it != external_tables.end()) + { + auto temporary_table_expression_node = std::make_shared(external_table_it->second, mutable_context); + temporary_table_expression_node->setTemporaryTableName(temporary_table_name); + return temporary_table_expression_node; + } + + auto subquery_options = SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth, true /*is_subquery*/); + auto context_copy = Context::createCopy(mutable_context); + updateContextForSubqueryExecution(context_copy); + + InterpreterSelectQueryAnalyzer interpreter(subquery_node, context_copy, subquery_options); + auto & query_plan = interpreter.getQueryPlan(); + + auto sample_block_with_unique_names = query_plan.getCurrentDataStream().header; + makeUniqueColumnNamesInBlock(sample_block_with_unique_names); + + if (!blocksHaveEqualStructure(sample_block_with_unique_names, query_plan.getCurrentDataStream().header)) + { + auto actions_dag = ActionsDAG::makeConvertingActions( + query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(), + sample_block_with_unique_names.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto converting_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(actions_dag)); + query_plan.addStep(std::move(converting_step)); + } + + Block sample = interpreter.getSampleBlock(); + NamesAndTypesList columns = sample.getNamesAndTypesList(); + + auto external_storage_holder = TemporaryTableHolder( + mutable_context, + ColumnsDescription{columns}, + ConstraintsDescription{}, + nullptr /*query*/, + true /*create_for_global_subquery*/); + + StoragePtr external_storage = external_storage_holder.getTable(); + auto temporary_table_expression_node = std::make_shared(external_storage, mutable_context); + temporary_table_expression_node->setTemporaryTableName(temporary_table_name); + + auto table_out = external_storage->write({}, external_storage->getInMemoryMetadataPtr(), mutable_context, /*async_insert=*/false); + auto io = interpreter.execute(); + io.pipeline.complete(std::move(table_out)); + CompletedPipelineExecutor executor(io.pipeline); + executor.execute(); + + mutable_context->addExternalTable(temporary_table_name, std::move(external_storage_holder)); + + return temporary_table_expression_node; +} + +} + +QueryTreeNodePtr buildQueryTreeForShard(SelectQueryInfo & query_info, QueryTreeNodePtr query_tree_to_modify) +{ + auto & planner_context = query_info.planner_context; + const auto & query_context = planner_context->getQueryContext(); + + CollectColumnSourceToColumnsVisitor collect_column_source_to_columns_visitor; + collect_column_source_to_columns_visitor.visit(query_tree_to_modify); + + const auto & column_source_to_columns = collect_column_source_to_columns_visitor.getColumnSourceToColumns(); + + DistributedProductModeRewriteInJoinVisitor visitor(query_info.planner_context->getQueryContext()); + visitor.visit(query_tree_to_modify); + + auto replacement_map = visitor.getReplacementMap(); + const auto & global_in_or_join_nodes = visitor.getGlobalInOrJoinNodes(); + + for (const auto & global_in_or_join_node : global_in_or_join_nodes) + { + if (auto * join_node = global_in_or_join_node.query_node->as()) + { + auto join_right_table_expression = join_node->getRightTableExpression(); + auto join_right_table_expression_node_type = join_right_table_expression->getNodeType(); + + QueryTreeNodePtr subquery_node; + + if (join_right_table_expression_node_type == QueryTreeNodeType::QUERY || + join_right_table_expression_node_type == QueryTreeNodeType::UNION) + { + subquery_node = join_right_table_expression; + } + else if (join_right_table_expression_node_type == QueryTreeNodeType::TABLE || + join_right_table_expression_node_type == QueryTreeNodeType::TABLE_FUNCTION) + { + const auto & columns = column_source_to_columns.at(join_right_table_expression).columns; + subquery_node = buildSubqueryToReadColumnsFromTableExpression(columns, + join_right_table_expression, + planner_context->getQueryContext()); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected JOIN right table expression to be table, table function, query or union node. Actual {}", + join_right_table_expression->formatASTForErrorMessage()); + } + + auto temporary_table_expression_node = executeSubqueryNode(subquery_node, + planner_context->getMutableQueryContext(), + global_in_or_join_node.subquery_depth); + temporary_table_expression_node->setAlias(join_right_table_expression->getAlias()); + + replacement_map.emplace(join_right_table_expression.get(), std::move(temporary_table_expression_node)); + continue; + } + else if (auto * in_function_node = global_in_or_join_node.query_node->as()) + { + auto & in_function_subquery_node = in_function_node->getArguments().getNodes().at(1); + auto in_function_node_type = in_function_subquery_node->getNodeType(); + if (in_function_node_type != QueryTreeNodeType::QUERY && in_function_node_type != QueryTreeNodeType::UNION) + continue; + + auto temporary_table_expression_node = executeSubqueryNode(in_function_subquery_node, + planner_context->getMutableQueryContext(), + global_in_or_join_node.subquery_depth); + + in_function_subquery_node = std::move(temporary_table_expression_node); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected global IN or JOIN query node. Actual {}", + global_in_or_join_node.query_node->formatASTForErrorMessage()); + } + } + + if (!replacement_map.empty()) + query_tree_to_modify = query_tree_to_modify->cloneAndReplace(replacement_map); + + removeGroupingFunctionSpecializations(query_tree_to_modify); + + return query_tree_to_modify; +} + +} diff --git a/src/Storages/buildQueryTreeForShard.h b/src/Storages/buildQueryTreeForShard.h new file mode 100644 index 00000000000..05d63faeb9f --- /dev/null +++ b/src/Storages/buildQueryTreeForShard.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +namespace DB +{ + +struct SelectQueryInfo; + +class IQueryTreeNode; +using QueryTreeNodePtr = std::shared_ptr; + +QueryTreeNodePtr buildQueryTreeForShard(SelectQueryInfo & query_info, QueryTreeNodePtr query_tree_to_modify); + +} diff --git a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference index 4e93c530f7b..f688db940d9 100644 --- a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference +++ b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference @@ -9,4 +9,4 @@ 7885388429666205427 8124171311239967992 1 1 -- Simple query with analyzer and pure parallel replicas\nSELECT number\nFROM join_inner_table__fuzz_146_replicated\n SETTINGS\n allow_experimental_analyzer = 1,\n max_parallel_replicas = 2,\n cluster_for_parallel_replicas = \'test_cluster_one_shard_three_replicas_localhost\',\n allow_experimental_parallel_reading_from_replicas = 1,\n use_hedged_requests = 0; -0 2 SELECT `default`.`join_inner_table__fuzz_146_replicated`.`number` AS `number` FROM `default`.`join_inner_table__fuzz_146_replicated` +0 2 SELECT `join_inner_table__fuzz_146_replicated`.`number` AS `number` FROM `default`.`join_inner_table__fuzz_146_replicated` SETTINGS allow_experimental_analyzer = 1, max_parallel_replicas = 2, cluster_for_parallel_replicas = \'test_cluster_one_shard_three_replicas_localhost\', allow_experimental_parallel_reading_from_replicas = 1, use_hedged_requests = 0 From d05f89f8f5ec3793256cae1557e2af60650290cf Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 12 Jun 2023 17:33:15 +0000 Subject: [PATCH 0306/1997] Fix style --- src/Storages/StorageDistributed.cpp | 1 - src/Storages/buildQueryTreeForShard.cpp | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 1ec45ce3d57..0472ce6f832 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -154,7 +154,6 @@ namespace ErrorCodes extern const int DISTRIBUTED_TOO_MANY_PENDING_BYTES; extern const int ARGUMENT_OUT_OF_BOUND; extern const int TOO_LARGE_DISTRIBUTED_DEPTH; - extern const int DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED; } namespace ActionLocks diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index a42d67d9aa7..fa4730cbe84 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -21,6 +21,7 @@ namespace DB namespace ErrorCodes { + extern const int LOGICAL_ERROR; extern const int DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED; } From 70252321750b6a8d3ab6c41f658b76705a2e55b9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 12 Jun 2023 18:59:07 +0000 Subject: [PATCH 0307/1997] Fixing cache for sets. --- src/Processors/Transforms/CreatingSetsTransform.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp index a1e43525ab1..3139fa5ed98 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.cpp +++ b/src/Processors/Transforms/CreatingSetsTransform.cpp @@ -86,7 +86,7 @@ void CreatingSetsTransform::startSubquery() } } - if (subquery.set) + if (subquery.set && !set_from_cache) LOG_TRACE(log, "Creating set, key: {}", subquery.key); if (subquery.table) LOG_TRACE(log, "Filling temporary table."); @@ -97,7 +97,7 @@ void CreatingSetsTransform::startSubquery() /// TODO: make via port table_out = QueryPipeline(subquery.table->write({}, subquery.table->getInMemoryMetadataPtr(), nullptr, /*async_insert=*/false)); - done_with_set = !subquery.set; + done_with_set = !subquery.set || set_from_cache; done_with_table = !subquery.table; if ((done_with_set && !set_from_cache) /*&& done_with_join*/ && done_with_table) @@ -175,10 +175,10 @@ void CreatingSetsTransform::consume(Chunk chunk) Chunk CreatingSetsTransform::generate() { - if (subquery.set) + if (subquery.set && !set_from_cache) { subquery.set->finishInsert(); - subquery.promise_to_fill_set.set_value(subquery.set); + //subquery.promise_to_fill_set.set_value(subquery.set); if (promise_to_build) promise_to_build->set_value(subquery.set); } From 2148f29a40f44f387b2cfbd9d3496bf9bc0b7e8d Mon Sep 17 00:00:00 2001 From: pufit Date: Thu, 8 Jun 2023 20:29:27 -0400 Subject: [PATCH 0308/1997] More accurate DNS resolve for the keeper connection --- src/Common/ZooKeeper/ZooKeeper.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index a587ad6caf4..e078470476a 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -13,6 +13,7 @@ #include #include #include "Common/ZooKeeper/IKeeper.h" +#include #include #include #include @@ -80,8 +81,12 @@ void ZooKeeper::init(ZooKeeperArgs args_) if (secure) host_string.erase(0, strlen("secure://")); - LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, Poco::Net::SocketAddress{host_string}.toString()); - nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure}); + /// We want to resolve all hosts without DNS cache for keeper connection. + Coordination::DNSResolver::instance().removeHostFromCache(host_string); + + auto address = Coordination::DNSResolver::instance().resolveAddress(host_string); + LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, address.toString()); + nodes.emplace_back(Coordination::ZooKeeper::Node{address, secure}); } catch (const Poco::Net::HostNotFoundException & e) { From bbf0548007432dc5482cd28fda4c31e57dd5c24f Mon Sep 17 00:00:00 2001 From: flynn Date: Tue, 13 Jun 2023 02:48:28 +0000 Subject: [PATCH 0309/1997] fix test --- tests/queries/0_stateless/01945_show_debug_warning.expect | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect index 617e54a375e..9a8e22aa26f 100755 --- a/tests/queries/0_stateless/01945_show_debug_warning.expect +++ b/tests/queries/0_stateless/01945_show_debug_warning.expect @@ -55,7 +55,7 @@ expect eof spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file" expect "Warnings:" -expect " * Obsolete settings [`max_memory_usage_for_all_queries`] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog." +expect " * Obsolete settings [\`max_memory_usage_for_all_queries\`] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog." expect ":) " send -- "q\r" expect eof From 6ad6c6afa3bdf1cd95e1454bad9e7eb75db7b0ab Mon Sep 17 00:00:00 2001 From: flynn Date: Tue, 13 Jun 2023 04:13:16 +0000 Subject: [PATCH 0310/1997] fix --- src/Interpreters/Context.cpp | 2 +- tests/queries/0_stateless/01945_show_debug_warning.expect | 2 +- tests/queries/0_stateless/01945_system_warnings.reference | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 823c3d678df..1b8c52ee06b 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -783,7 +783,7 @@ Strings Context::getWarnings() const { if (setting.isValueChanged() && setting.isObsolete()) { - res += (obsolete_settings_count ? ", `" : "`") + setting.getName() + "`"; + res += (obsolete_settings_count ? ", '" : "'") + setting.getName() + "'"; ++obsolete_settings_count; } } diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect index 9a8e22aa26f..5315c56bde8 100755 --- a/tests/queries/0_stateless/01945_show_debug_warning.expect +++ b/tests/queries/0_stateless/01945_show_debug_warning.expect @@ -55,7 +55,7 @@ expect eof spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file" expect "Warnings:" -expect " * Obsolete settings [\`max_memory_usage_for_all_queries\`] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog." +expect " * Obsolete settings [\'max_memory_usage_for_all_queries\'] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog." expect ":) " send -- "q\r" expect eof diff --git a/tests/queries/0_stateless/01945_system_warnings.reference b/tests/queries/0_stateless/01945_system_warnings.reference index 0c05d5d7049..dcb296c61aa 100644 --- a/tests/queries/0_stateless/01945_system_warnings.reference +++ b/tests/queries/0_stateless/01945_system_warnings.reference @@ -1,5 +1,5 @@ Server was built in debug mode. It will work slowly. 0 -Obsolete settings [`multiple_joins_rewriter_version`] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog. +Obsolete settings [\'multiple_joins_rewriter_version\'] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog. 1 1 From 404bfe773ef726b63e944b70a8b4253907637b8c Mon Sep 17 00:00:00 2001 From: flynn Date: Tue, 13 Jun 2023 06:28:47 +0000 Subject: [PATCH 0311/1997] fix --- tests/queries/0_stateless/01945_show_debug_warning.expect | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect index 5315c56bde8..9be0eb6e399 100755 --- a/tests/queries/0_stateless/01945_show_debug_warning.expect +++ b/tests/queries/0_stateless/01945_show_debug_warning.expect @@ -55,7 +55,7 @@ expect eof spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file" expect "Warnings:" -expect " * Obsolete settings [\'max_memory_usage_for_all_queries\'] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog." +expect " * Obsolete settings" expect ":) " send -- "q\r" expect eof From 6f1c4865372b408d0f773a8dd9b1db8e63b5e4bb Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 13 Jun 2023 10:25:53 +0200 Subject: [PATCH 0312/1997] Fix build --- src/Coordination/Changelog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index ffa22a6b888..94062140bac 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -13,7 +13,7 @@ #include #include #include -#include #include From bf69755adab474fbd166209ab7675537d1a9aeeb Mon Sep 17 00:00:00 2001 From: Manas Alekar Date: Tue, 13 Jun 2023 01:18:36 -0700 Subject: [PATCH 0313/1997] Address some usability issues with INTO OUTFILE usage. --- src/Client/ClientBase.cpp | 20 ++++++++++++++++++++ src/Parsers/ASTQueryWithOutput.cpp | 2 ++ src/Parsers/ASTQueryWithOutput.h | 1 + src/Parsers/ParserQueryWithOutput.cpp | 6 ++++++ 4 files changed, 29 insertions(+) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 6f295c11070..fc108b8f57d 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -568,6 +568,13 @@ try CompressionMethod compression_method = chooseCompressionMethod(out_file, compression_method_string); UInt64 compression_level = 3; + if (query_with_output->is_outfile_append && query_with_output->is_outfile_truncate) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Cannot use INTO OUTFILE with APPEND and TRUNCATE simultaneously."); + } + if (query_with_output->is_outfile_append && compression_method != CompressionMethod::None) { throw Exception( @@ -589,9 +596,22 @@ try range.second); } + std::error_code ec; + if (std::filesystem::is_regular_file(out_file, ec)) + { + if (!query_with_output->is_outfile_append && !query_with_output->is_outfile_truncate) + { + throw Exception( + ErrorCodes::CANNOT_OPEN_FILE, + "File {} exists, consider using 'INTO OUTFILE ... APPEND' or 'INTO OUTFILE ... TRUNCATE' if appropriate.", + out_file); + } + } auto flags = O_WRONLY | O_EXCL; if (query_with_output->is_outfile_append) flags |= O_APPEND; + else if (query_with_output->is_outfile_truncate) + flags |= O_TRUNC; else flags |= O_CREAT; diff --git a/src/Parsers/ASTQueryWithOutput.cpp b/src/Parsers/ASTQueryWithOutput.cpp index 5f717715a69..4bf1e6cb231 100644 --- a/src/Parsers/ASTQueryWithOutput.cpp +++ b/src/Parsers/ASTQueryWithOutput.cpp @@ -39,6 +39,8 @@ void ASTQueryWithOutput::formatImpl(const FormatSettings & s, FormatState & stat s.ostr << (s.hilite ? hilite_keyword : ""); if (is_outfile_append) s.ostr << " APPEND"; + if (is_outfile_truncate) + s.ostr << " TRUNCATE"; if (is_into_outfile_with_stdout) s.ostr << " AND STDOUT"; s.ostr << (s.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTQueryWithOutput.h b/src/Parsers/ASTQueryWithOutput.h index 7db021405e7..6f9cafc89a9 100644 --- a/src/Parsers/ASTQueryWithOutput.h +++ b/src/Parsers/ASTQueryWithOutput.h @@ -17,6 +17,7 @@ public: ASTPtr out_file; bool is_into_outfile_with_stdout = false; bool is_outfile_append = false; + bool is_outfile_truncate = false; ASTPtr format; ASTPtr settings_ast; ASTPtr compression; diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index 6796f4528c4..2bfe7353be4 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -109,6 +109,12 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query_with_output.is_outfile_append = true; } + ParserKeyword s_truncate("TRUNCATE"); + if (s_truncate.ignore(pos, expected)) + { + query_with_output.is_outfile_truncate = true; + } + ParserKeyword s_stdout("AND STDOUT"); if (s_stdout.ignore(pos, expected)) { From e7d1dfb704caa283174823ba8ff59b6c10ae0e1d Mon Sep 17 00:00:00 2001 From: flynn Date: Tue, 13 Jun 2023 08:30:07 +0000 Subject: [PATCH 0314/1997] fix --- tests/queries/0_stateless/01945_system_warnings.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01945_system_warnings.sh b/tests/queries/0_stateless/01945_system_warnings.sh index 112baab614e..e44fe0ad6b5 100755 --- a/tests/queries/0_stateless/01945_system_warnings.sh +++ b/tests/queries/0_stateless/01945_system_warnings.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-parallel CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 2c018f5261553dd6106639f22c148fbdd61d8fc4 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Sat, 3 Jun 2023 20:59:04 +0200 Subject: [PATCH 0315/1997] do not call finalize after exception --- src/Storages/StorageS3.cpp | 11 +- tests/integration/helpers/client.py | 2 + tests/integration/helpers/cluster.py | 2 + .../integration/helpers/s3_mocks/broken_s3.py | 241 +++++++++++++++--- .../test_checking_s3_blobs_paranoid/test.py | 98 ++++++- tests/integration/test_merge_tree_s3/test.py | 222 +++++++++++++++- 6 files changed, 530 insertions(+), 46 deletions(-) diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index f1a7bcb71a2..dfa5ea2667a 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -794,7 +794,7 @@ public: void onException() override { std::lock_guard lock(cancel_mutex); - finalize(); + release(); } void onFinish() override @@ -824,6 +824,15 @@ private: } } + void release() + { + if (!writer) + return; + + writer.reset(); + write_buf.reset(); + } + Block sample_block; std::optional format_settings; std::unique_ptr write_buf; diff --git a/tests/integration/helpers/client.py b/tests/integration/helpers/client.py index c2676ac08a6..fdeedb9a80d 100644 --- a/tests/integration/helpers/client.py +++ b/tests/integration/helpers/client.py @@ -121,6 +121,7 @@ class Client: user=None, password=None, database=None, + query_id=None, ): return self.get_query_request( sql, @@ -130,6 +131,7 @@ class Client: user=user, password=password, database=database, + query_id=query_id, ).get_error() @stacktraces_on_timeout_decorator diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index f57ebf40e54..c77e67062a1 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3376,6 +3376,7 @@ class ClickHouseInstance: user=None, password=None, database=None, + query_id=None, ): logging.debug(f"Executing query {sql} on {self.name}") return self.client.query_and_get_error( @@ -3386,6 +3387,7 @@ class ClickHouseInstance: user=user, password=password, database=database, + query_id=query_id, ) def query_and_get_error_with_retry( diff --git a/tests/integration/helpers/s3_mocks/broken_s3.py b/tests/integration/helpers/s3_mocks/broken_s3.py index 026a3c6f515..8ff4f9e9203 100644 --- a/tests/integration/helpers/s3_mocks/broken_s3.py +++ b/tests/integration/helpers/s3_mocks/broken_s3.py @@ -6,10 +6,10 @@ import time import urllib.parse import http.server import socketserver +import string -UPSTREAM_HOST = "minio1" -UPSTREAM_PORT = 9001 +INF_COUNT = 100000000 class MockControl: @@ -28,31 +28,88 @@ class MockControl: ], nothrow=True, ) - assert response == "OK" + assert response == "OK", response + + def setup_error_at_object_upload(self, count=None, after=None): + url = f"http://localhost:{self._port}/mock_settings/error_at_object_upload?nothing=1" + + if count is not None: + url += f"&count={count}" + + if after is not None: + url += f"&after={after}" - def setup_fail_upload(self, part_length): response = self._cluster.exec_in_container( self._cluster.get_container_id(self._container), [ "curl", "-s", - f"http://localhost:{self._port}/mock_settings/error_at_put?when_length_bigger={part_length}", + url, ], nothrow=True, ) - assert response == "OK" + assert response == "OK", response + + def setup_error_at_part_upload(self, count=None, after=None): + url = f"http://localhost:{self._port}/mock_settings/error_at_part_upload?nothing=1" + + if count is not None: + url += f"&count={count}" + + if after is not None: + url += f"&after={after}" - def setup_fake_upload(self, part_length): response = self._cluster.exec_in_container( self._cluster.get_container_id(self._container), [ "curl", "-s", - f"http://localhost:{self._port}/mock_settings/fake_put?when_length_bigger={part_length}", + url, ], nothrow=True, ) - assert response == "OK" + assert response == "OK", response + + def setup_error_at_create_multi_part_upload(self, count=None): + url = f"http://localhost:{self._port}/mock_settings/error_at_create_multi_part_upload" + + if count is not None: + url += f"?count={count}" + + response = self._cluster.exec_in_container( + self._cluster.get_container_id(self._container), + [ + "curl", + "-s", + url, + ], + nothrow=True, + ) + assert response == "OK", response + + def setup_fake_puts(self, part_length): + response = self._cluster.exec_in_container( + self._cluster.get_container_id(self._container), + [ + "curl", + "-s", + f"http://localhost:{self._port}/mock_settings/fake_puts?when_length_bigger={part_length}", + ], + nothrow=True, + ) + assert response == "OK", response + + def setup_fake_multpartuploads(self): + response = self._cluster.exec_in_container( + self._cluster.get_container_id(self._container), + [ + "curl", + "-s", + f"http://localhost:{self._port}/mock_settings/setup_fake_multpartuploads?", + ], + nothrow=True, + ) + assert response == "OK", response def setup_slow_answers( self, minimal_length=0, timeout=None, probability=None, count=None @@ -77,7 +134,7 @@ class MockControl: ["curl", "-s", url], nothrow=True, ) - assert response == "OK" + assert response == "OK", response class _ServerRuntime: @@ -88,7 +145,7 @@ class _ServerRuntime: self.probability = probability_ if probability_ is not None else 1 self.timeout = timeout_ if timeout_ is not None else 0.1 self.minimal_length = minimal_length_ if minimal_length_ is not None else 0 - self.count = count_ if count_ is not None else 2**32 + self.count = count_ if count_ is not None else INF_COUNT def __str__(self): return ( @@ -109,12 +166,32 @@ class _ServerRuntime: return _runtime.slow_put.timeout return None + class CountAfter: + def __init__(self, count_=None, after_=None): + self.count = count_ if count_ is not None else INF_COUNT + self.after = after_ if after_ is not None else 0 + + def __str__(self): + return f"count:{self.count} after:{self.after}" + + def has_effect(self): + if self.after: + self.after -= 1 + if self.after == 0: + if self.count: + self.count -= 1 + return True + return False + def __init__(self): self.lock = threading.Lock() - self.error_at_put_when_length_bigger = None + self.error_at_part_upload = None + self.error_at_object_upload = None self.fake_put_when_length_bigger = None self.fake_uploads = dict() self.slow_put = None + self.fake_multipart_upload = None + self.error_at_create_multi_part_upload = None def register_fake_upload(self, upload_id, key): with self.lock: @@ -127,10 +204,14 @@ class _ServerRuntime: return False def reset(self): - self.error_at_put_when_length_bigger = None - self.fake_put_when_length_bigger = None - self.fake_uploads = dict() - self.slow_put = None + with self.lock: + self.error_at_part_upload = None + self.error_at_object_upload = None + self.fake_put_when_length_bigger = None + self.fake_uploads = dict() + self.slow_put = None + self.fake_multipart_upload = None + self.error_at_create_multi_part_upload = None _runtime = _ServerRuntime() @@ -141,6 +222,13 @@ def _and_then(value, func): return None if value is None else func(value) +def get_random_string(length): + # choose from all lowercase letter + letters = string.ascii_lowercase + result_str = "".join(random.choice(letters) for i in range(length)) + return result_str + + class RequestHandler(http.server.BaseHTTPRequestHandler): def _ok(self): self.send_response(200) @@ -166,19 +254,30 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): self._read_out() self.send_response(307) - url = f"http://{UPSTREAM_HOST}:{UPSTREAM_PORT}{self.path}" + url = ( + f"http://{self.server.upstream_host}:{self.server.upstream_port}{self.path}" + ) self.send_header("Location", url) self.end_headers() self.wfile.write(b"Redirected") def _error(self, data): self._read_out() - self.send_response(500) self.send_header("Content-Type", "text/xml") self.end_headers() self.wfile.write(bytes(data, "UTF-8")) + def _error_expected_500(self): + self._error( + '' + "" + "ExpectedError" + "mock s3 injected error" + "txfbd566d03042474888193-00608d7537" + "" + ) + def _fake_put_ok(self): self._read_out() @@ -188,6 +287,28 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): self.send_header("Content-Length", 0) self.end_headers() + def _fake_uploads(self, path, upload_id): + self._read_out() + + parts = [x for x in path.split("/") if x] + bucket = parts[0] + key = "/".join(parts[1:]) + data = ( + '\n' + "\n" + f"{bucket}" + f"{key}" + f"{upload_id}" + "" + ) + + self.send_response(200) + self.send_header("Content-Type", "text/xml") + self.send_header("Content-Length", len(data)) + self.end_headers() + + self.wfile.write(bytes(data, "UTF-8")) + def _fake_post_ok(self, path): self._read_out() @@ -219,18 +340,29 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): if len(path) < 2: return self._error("_mock_settings: wrong command") - if path[1] == "error_at_put": + if path[1] == "error_at_part_upload": params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) - _runtime.error_at_put_when_length_bigger = int( - params.get("when_length_bigger", [1024 * 1024])[0] + _runtime.error_at_part_upload = _ServerRuntime.CountAfter( + count_=_and_then(params.get("count", [None])[0], int), + after_=_and_then(params.get("after", [None])[0], int), ) return self._ok() - if path[1] == "fake_put": + + if path[1] == "error_at_object_upload": + params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) + _runtime.error_at_object_upload = _ServerRuntime.CountAfter( + count_=_and_then(params.get("count", [None])[0], int), + after_=_and_then(params.get("after", [None])[0], int), + ) + return self._ok() + + if path[1] == "fake_puts": params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) _runtime.fake_put_when_length_bigger = int( params.get("when_length_bigger", [1024 * 1024])[0] ) return self._ok() + if path[1] == "slow_put": params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) _runtime.slow_put = _ServerRuntime.SlowPut( @@ -241,6 +373,18 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): ) self.log_message("set slow put %s", _runtime.slow_put) return self._ok() + + if path[1] == "setup_fake_multpartuploads": + _runtime.fake_multipart_upload = True + return self._ok() + + if path[1] == "error_at_create_multi_part_upload": + params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) + _runtime.error_at_create_multi_part_upload = int( + params.get("count", [INF_COUNT])[0] + ) + return self._ok() + if path[1] == "reset": _runtime.reset() return self._ok() @@ -265,33 +409,42 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): self.log_message("slow put %s", timeout) time.sleep(timeout) - if _runtime.error_at_put_when_length_bigger is not None: - if content_length > _runtime.error_at_put_when_length_bigger: - return self._error( - '' - "" - "ExpectedError" - "mock s3 injected error" - "txfbd566d03042474888193-00608d7537" - "" - ) - parts = urllib.parse.urlsplit(self.path) params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) upload_id = params.get("uploadId", [None])[0] - if _runtime.fake_put_when_length_bigger is not None: - if content_length > _runtime.fake_put_when_length_bigger: - if upload_id is not None: - _runtime.register_fake_upload(upload_id, parts.path) - return self._fake_put_ok() + + if upload_id is not None: + if _runtime.error_at_part_upload is not None: + if _runtime.error_at_part_upload.has_effect(): + return self._error_expected_500() + if _runtime.fake_multipart_upload: + if _runtime.is_fake_upload(upload_id, parts.path): + return self._fake_put_ok() + else: + if _runtime.error_at_object_upload is not None: + if _runtime.error_at_object_upload.has_effect(): + return self._error_expected_500() + if _runtime.fake_put_when_length_bigger is not None: + if content_length > _runtime.fake_put_when_length_bigger: + return self._fake_put_ok() return self._redirect() def do_POST(self): parts = urllib.parse.urlsplit(self.path) - params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) - upload_id = params.get("uploadId", [None])[0] + params = urllib.parse.parse_qs(parts.query, keep_blank_values=True) + uploads = params.get("uploads", [None])[0] + if uploads is not None: + if _runtime.error_at_create_multi_part_upload: + _runtime.error_at_create_multi_part_upload -= 1 + return self._error_expected_500() + if _runtime.fake_multipart_upload: + upload_id = get_random_string(5) + _runtime.register_fake_upload(upload_id, parts.path) + return self._fake_uploads(parts.path, upload_id) + + upload_id = params.get("uploadId", [None])[0] if _runtime.is_fake_upload(upload_id, parts.path): return self._fake_post_ok(parts.path) @@ -307,7 +460,15 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): class _ThreadedHTTPServer(socketserver.ThreadingMixIn, http.server.HTTPServer): """Handle requests in a separate thread.""" + def set_upstream(self, upstream_host, upstream_port): + self.upstream_host = upstream_host + self.upstream_port = upstream_port + if __name__ == "__main__": httpd = _ThreadedHTTPServer(("0.0.0.0", int(sys.argv[1])), RequestHandler) + if len(sys.argv) == 4: + httpd.set_upstream(sys.argv[2], sys.argv[3]) + else: + httpd.set_upstream("minio1", 9001) httpd.serve_forever() diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index 042d57a0c43..c0f184815c9 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -54,7 +54,7 @@ def test_upload_after_check_works(cluster, broken_s3): """ ) - broken_s3.setup_fake_upload(1) + broken_s3.setup_fake_puts(1) error = node.query_and_get_error( "INSERT INTO s3_upload_after_check_works VALUES (1, 'Hello')" @@ -63,3 +63,99 @@ def test_upload_after_check_works(cluster, broken_s3): assert "Code: 499" in error, error assert "Immediately after upload" in error, error assert "suddenly disappeared" in error, error + + +def get_counters(node, query_id, log_type="ExceptionWhileProcessing"): + node.query("SYSTEM FLUSH LOGS") + return [ + int(x) + for x in node.query( + f""" + SELECT + ProfileEvents['S3CreateMultipartUpload'], + ProfileEvents['S3UploadPart'], + ProfileEvents['S3WriteRequestsErrors'] + FROM system.query_log + WHERE query_id='{query_id}' + AND type='{log_type}' + """ + ).split() + if x + ] + + +def test_upload_s3_fail_create_multi_part_upload(cluster, broken_s3): + node = cluster.instances["node"] + + broken_s3.setup_error_at_create_multi_part_upload() + + insert_query_id = "INSERT_INTO_TABLE_FUNCTION_FAIL_CREATE_MPU" + error = node.query_and_get_error( + """ + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_upload_s3_fail_create_multi_part_upload', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 100000000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=100 + """, + query_id=insert_query_id, + ) + + assert "Code: 499" in error, error + assert "mock s3 injected error" in error, error + assert "DB::WriteBufferFromS3::createMultipartUpload()" in error, error + + count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters( + node, insert_query_id + ) + assert count_create_multi_part_uploads == 1 + assert count_upload_parts == 0 + assert count_s3_errors == 1 + + +def test_upload_s3_fail_upload_part_when_multi_part_upload(cluster, broken_s3): + node = cluster.instances["node"] + + broken_s3.setup_fake_multpartuploads() + broken_s3.setup_error_at_part_upload(count=1, after=2) + + insert_query_id = "INSERT_INTO_TABLE_FUNCTION_FAIL_UPLOAD_PART" + error = node.query_and_get_error( + """ + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_upload_s3_fail_upload_part_when_multi_part_upload', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 100000000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=100 + """, + query_id=insert_query_id, + ) + + assert "Code: 499" in error, error + assert "mock s3 injected error" in error, error + assert "DB::WriteBufferFromS3::writePart" in error, error + + count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters( + node, insert_query_id + ) + assert count_create_multi_part_uploads == 1 + assert count_upload_parts >= 2 + assert ( + count_s3_errors == 2 + ) # the second is cancel multipart upload, s3_mock just redirects this request diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 2ccd517923a..f3f44f1452c 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -862,7 +862,9 @@ def test_merge_canceled_by_s3_errors(cluster, broken_s3, node_name, storage_poli min_key = node.query("SELECT min(key) FROM test_merge_canceled_by_s3_errors") assert int(min_key) == 0, min_key - broken_s3.setup_fail_upload(50000) + broken_s3.setup_error_at_object_upload() + broken_s3.setup_fake_multpartuploads() + broken_s3.setup_error_at_part_upload() node.query("SYSTEM START MERGES test_merge_canceled_by_s3_errors") @@ -905,7 +907,7 @@ def test_merge_canceled_by_s3_errors_when_move(cluster, broken_s3, node_name): settings={"materialize_ttl_after_modify": 0}, ) - broken_s3.setup_fail_upload(10000) + broken_s3.setup_error_at_object_upload(count=1, after=1) node.query("SYSTEM START MERGES merge_canceled_by_s3_errors_when_move") @@ -941,7 +943,7 @@ def test_s3_engine_heavy_write_check_mem( " ENGINE S3('http://resolver:8083/root/data/test-upload.csv', 'minio', 'minio123', 'CSV')", ) - broken_s3.setup_fake_upload(1000) + broken_s3.setup_fake_multpartuploads() broken_s3.setup_slow_answers(10 * 1024 * 1024, timeout=15, count=10) query_id = f"INSERT_INTO_S3_ENGINE_QUERY_ID_{in_flight}" @@ -987,7 +989,7 @@ def test_s3_disk_heavy_write_check_mem(cluster, broken_s3, node_name): ) node.query("SYSTEM STOP MERGES s3_test") - broken_s3.setup_fake_upload(1000) + broken_s3.setup_fake_multpartuploads() broken_s3.setup_slow_answers(10 * 1024 * 1024, timeout=10, count=50) query_id = f"INSERT_INTO_S3_DISK_QUERY_ID" @@ -1013,3 +1015,215 @@ def test_s3_disk_heavy_write_check_mem(cluster, broken_s3, node_name): assert int(result) > 0.8 * memory check_no_objects_after_drop(cluster, node_name=node_name) + + +def get_memory_usage(node, query_id): + node.query("SYSTEM FLUSH LOGS") + memory_usage = node.query( + "SELECT memory_usage" + " FROM system.query_log" + f" WHERE query_id='{query_id}'" + " AND type='QueryFinish'" + ) + return int(memory_usage) + + +def get_memory_usages(node, query_ids): + node.query("SYSTEM FLUSH LOGS") + result = [] + for query_id in query_ids: + memory_usage = node.query( + "SELECT memory_usage" + " FROM system.query_log" + f" WHERE query_id='{query_id}'" + " AND type='QueryFinish'" + ) + result.append(int(memory_usage)) + return result + + +@pytest.mark.parametrize("node_name", ["node"]) +def test_heavy_insert_select_check_memory(cluster, broken_s3, node_name): + node = cluster.instances[node_name] + + node.query( + """ + CREATE TABLE central_query_log + ( + control_plane_id UUID, + pod_id LowCardinality(String), + scrape_ts_microseconds DateTime64(6) CODEC(Delta(8), LZ4), + event_date Date, + event_time DateTime, + payload Array(String), + payload_01 String, + payload_02 String, + payload_03 String, + payload_04 String, + payload_05 String, + payload_06 String, + payload_07 String, + payload_08 String, + payload_09 String, + payload_10 String, + payload_11 String, + payload_12 String, + payload_13 String, + payload_14 String, + payload_15 String, + payload_16 String, + payload_17 String, + payload_18 String, + payload_19 String + ) + ENGINE=MergeTree() + PARTITION BY toYYYYMM(event_date) + ORDER BY (control_plane_id, event_date, pod_id) + SETTINGS + storage_policy='s3' + """ + ) + + node.query("SYSTEM STOP MERGES central_query_log") + + write_count = 2 + write_query_ids = [] + for x in range(write_count): + query_id = f"INSERT_INTO_TABLE_RANDOM_DATA_QUERY_ID_{x}" + write_query_ids.append(query_id) + node.query( + """ + INSERT INTO central_query_log + SELECT + control_plane_id, + pod_id, + toStartOfHour(event_time) + toIntervalSecond(randUniform(0,60)) as scrape_ts_microseconds, + toDate(event_time) as event_date, + event_time, + payload, + payload[1] as payload_01, + payload[2] as payload_02, + payload[3] as payload_03, + payload[4] as payload_04, + payload[5] as payload_05, + payload[6] as payload_06, + payload[7] as payload_07, + payload[8] as payload_08, + payload[9] as payload_09, + payload[10] as payload_10, + payload[11] as payload_11, + payload[12] as payload_12, + payload[13] as payload_13, + payload[14] as payload_14, + payload[15] as payload_15, + payload[16] as payload_16, + payload[17] as payload_17, + payload[18] as payload_18, + payload[19] as payload_19 + FROM + ( + SELECT + control_plane_id, + substring(payload[1], 1, 5) as pod_id, + toDateTime('2022-12-12 00:00:00') + + toIntervalDay(floor(randUniform(0,3))) + + toIntervalHour(floor(randUniform(0,24))) + + toIntervalSecond(floor(randUniform(0,60))) + as event_time, + payload + FROM + generateRandom( + 'control_plane_id UUID, payload Array(String)', + NULL, + 100, + 100 + ) + LIMIT 10000 + ) + SETTINGS + max_insert_block_size=256000000, + min_insert_block_size_rows=1000000, + min_insert_block_size_bytes=256000000 + """, + query_id=query_id, + ) + + memory = 845346116 + for memory_usage, query_id in zip( + get_memory_usages(node, write_query_ids), write_query_ids + ): + assert int(memory_usage) < 1.2 * memory, f"{memory_usage} : {query_id}" + assert int(memory_usage) > 0.8 * memory, f"{memory_usage} : {query_id}" + + broken_s3.setup_slow_answers(minimal_length=1000, timeout=5, count=20) + broken_s3.setup_fake_multpartuploads() + + insert_query_id = f"INSERT_INTO_S3_FUNCTION_QUERY_ID" + node.query( + """ + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test-upload_{_partition_id}.csv.gz', + 'minio', 'minio123', + 'CSV', auto, 'gzip' + ) + PARTITION BY formatDateTime(subtractHours(toDateTime('2022-12-13 00:00:00'), 1),'%Y-%m-%d_%H:00') + WITH toDateTime('2022-12-13 00:00:00') as time_point + SELECT + * + FROM central_query_log + WHERE + event_date >= subtractDays(toDate(time_point), 1) + AND scrape_ts_microseconds >= subtractHours(toStartOfHour(time_point), 12) + AND scrape_ts_microseconds < toStartOfDay(time_point) + SETTINGS + s3_max_inflight_parts_for_one_file=1 + """, + query_id=insert_query_id, + ) + + query_id = f"SELECT_QUERY_ID" + total = node.query( + """ + SELECT + count() + FROM central_query_log + """, + query_id=query_id, + ) + assert int(total) == 10000 * write_count + + query_id = f"SELECT_WHERE_QUERY_ID" + selected = node.query( + """ + WITH toDateTime('2022-12-13 00:00:00') as time_point + SELECT + count() + FROM central_query_log + WHERE + event_date >= subtractDays(toDate(time_point), 1) + AND scrape_ts_microseconds >= subtractHours(toStartOfHour(time_point), 12) + AND scrape_ts_microseconds < toStartOfDay(time_point) + """, + query_id=query_id, + ) + assert int(selected) < 4500, selected + assert int(selected) > 2500, selected + + node.query("SYSTEM FLUSH LOGS") + profile_events = node.query( + f""" + SELECT ProfileEvents + FROM system.query_log + WHERE query_id='{insert_query_id}' + AND type='QueryFinish' + """ + ) + + memory_usage = get_memory_usage(node, insert_query_id) + memory = 123507857 + assert int(memory_usage) < 1.2 * memory, f"{memory_usage} {profile_events}" + assert int(memory_usage) > 0.8 * memory, f"{memory_usage} {profile_events}" + + node.query(f"DROP TABLE IF EXISTS central_query_log SYNC") + remove_all_s3_objects(cluster) From 6c776f4483382afa395bb5929e1b1351468795ec Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 13 Jun 2023 12:40:53 +0200 Subject: [PATCH 0316/1997] Better --- src/Access/Common/AccessType.h | 14 ++++++------- src/Access/UsersConfigAccessStorage.cpp | 2 +- src/Storages/NamedCollectionsHelpers.cpp | 4 ++-- tests/integration/test_storage_s3/test.py | 25 ++++++++++++++++++++--- 4 files changed, 32 insertions(+), 13 deletions(-) diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 3a94a5037b2..16ee5177d66 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -70,7 +70,7 @@ enum class AccessType M(ALTER_FREEZE_PARTITION, "FREEZE PARTITION, UNFREEZE", TABLE, ALTER_TABLE) \ \ M(ALTER_DATABASE_SETTINGS, "ALTER DATABASE SETTING, ALTER MODIFY DATABASE SETTING, MODIFY DATABASE SETTING", DATABASE, ALTER_DATABASE) /* allows to execute ALTER MODIFY SETTING */\ - M(ALTER_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) /* allows to execute ALTER NAMED COLLECTION */\ + M(ALTER_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) /* allows to execute ALTER NAMED COLLECTION */\ \ M(ALTER_TABLE, "", GROUP, ALTER) \ M(ALTER_DATABASE, "", GROUP, ALTER) \ @@ -92,7 +92,7 @@ enum class AccessType M(CREATE_ARBITRARY_TEMPORARY_TABLE, "", GLOBAL, CREATE) /* allows to create and manipulate temporary tables with arbitrary table engine */\ M(CREATE_FUNCTION, "", GLOBAL, CREATE) /* allows to execute CREATE FUNCTION */ \ - M(CREATE_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) /* allows to execute CREATE NAMED COLLECTION */ \ + M(CREATE_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) /* allows to execute CREATE NAMED COLLECTION */ \ M(CREATE, "", GROUP, ALL) /* allows to execute {CREATE|ATTACH} */ \ \ M(DROP_DATABASE, "", DATABASE, DROP) /* allows to execute {DROP|DETACH} DATABASE */\ @@ -101,7 +101,7 @@ enum class AccessType implicitly enabled by the grant DROP_TABLE */\ M(DROP_DICTIONARY, "", DICTIONARY, DROP) /* allows to execute {DROP|DETACH} DICTIONARY */\ M(DROP_FUNCTION, "", GLOBAL, DROP) /* allows to execute DROP FUNCTION */\ - M(DROP_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) /* allows to execute DROP NAMED COLLECTION */\ + M(DROP_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) /* allows to execute DROP NAMED COLLECTION */\ M(DROP, "", GROUP, ALL) /* allows to execute {DROP|DETACH} */\ \ M(UNDROP_TABLE, "", TABLE, ALL) /* allows to execute {UNDROP} TABLE */\ @@ -140,10 +140,10 @@ enum class AccessType M(SHOW_SETTINGS_PROFILES, "SHOW PROFILES, SHOW CREATE SETTINGS PROFILE, SHOW CREATE PROFILE", GLOBAL, SHOW_ACCESS) \ M(SHOW_ACCESS, "", GROUP, ACCESS_MANAGEMENT) \ M(ACCESS_MANAGEMENT, "", GROUP, ALL) \ - M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \ - M(SHOW_NAMED_COLLECTIONS_SECRETS, "SHOW NAMED COLLECTIONS SECRETS", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \ - M(USE_NAMED_COLLECTION, "USE NAMED COLLECTION", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \ - M(NAMED_COLLECTION_CONTROL, "", NAMED_COLLECTION, ALL) \ + M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ + M(SHOW_NAMED_COLLECTIONS_SECRETS, "SHOW NAMED COLLECTIONS SECRETS", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ + M(USE_NAMED_COLLECTION, "NAMED COLLECTION USAGE, NAMED COLLECTION", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ + M(NAMED_COLLECTION_ADMIN, "NAMED COLLECTION CONTROL", NAMED_COLLECTION, ALL) \ \ M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN", GLOBAL, SYSTEM) \ M(SYSTEM_DROP_DNS_CACHE, "SYSTEM DROP DNS, DROP DNS CACHE, DROP DNS", GLOBAL, SYSTEM_DROP_CACHE) \ diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 187258d0fcd..15765045c97 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -328,7 +328,7 @@ namespace if (!named_collection_control) { - user->access.revoke(AccessType::NAMED_COLLECTION_CONTROL); + user->access.revoke(AccessType::NAMED_COLLECTION_ADMIN); } if (!show_named_collections_secrets) diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp index efd5af29f48..29d47e131a6 100644 --- a/src/Storages/NamedCollectionsHelpers.cpp +++ b/src/Storages/NamedCollectionsHelpers.cpp @@ -76,6 +76,8 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( if (!collection_name.has_value()) return nullptr; + context->checkAccess(AccessType::USE_NAMED_COLLECTION, *collection_name); + NamedCollectionPtr collection; if (throw_unknown_collection) collection = NamedCollectionFactory::instance().get(*collection_name); @@ -85,8 +87,6 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( if (!collection) return nullptr; - context->checkAccess(AccessType::USE_NAMED_COLLECTION, *collection_name); - auto collection_copy = collection->duplicate(); if (asts.size() == 1) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 75473f3c406..28117d694d6 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -57,6 +57,16 @@ def started_cluster(): ], user_configs=["configs/access.xml", "configs/users.xml"], ) + cluster.add_instance( + "dummy_without_named_collections", + with_minio=True, + main_configs=[ + "configs/defaultS3.xml", + "configs/named_collections.xml", + "configs/schema_cache.xml", + ], + user_configs=["configs/access.xml"], + ) cluster.add_instance( "s3_max_redirects", with_minio=True, @@ -919,7 +929,7 @@ def test_truncate_table(started_cluster): def test_predefined_connection_configuration(started_cluster): bucket = started_cluster.minio_bucket - instance = started_cluster.instances["dummy"] # type: ClickHouseInstance + instance = started_cluster.instances["dummy_without_named_collections"] # type: ClickHouseInstance name = "test_table" instance.query("CREATE USER user") @@ -944,7 +954,7 @@ def test_predefined_connection_configuration(started_cluster): in error ) - instance.query("GRANT USE NAMED COLLECTION ON s3_conf1 TO user", user="admin") + instance.query("GRANT NAMED COLLECTION ON s3_conf1 TO user", user="admin") instance.query( f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", user="user", @@ -960,8 +970,17 @@ def test_predefined_connection_configuration(started_cluster): assert result == instance.query("SELECT number FROM numbers(10)") error = instance.query_and_get_error("SELECT * FROM s3(no_collection)") - assert "There is no named collection `no_collection`" in error + assert ( + "To execute this query it's necessary to have grant USE NAMED COLLECTION ON no_collection" + in error + ) error = instance.query_and_get_error("SELECT * FROM s3(no_collection)", user="user") + assert ( + "To execute this query it's necessary to have grant USE NAMED COLLECTION ON no_collection" + in error + ) + instance = started_cluster.instances["dummy"] # has named collection access + error = instance.query_and_get_error("SELECT * FROM s3(no_collection)") assert "There is no named collection `no_collection`" in error From 4ac090b12212567f7b9d30cb27132f566ae438c4 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 13 Jun 2023 11:01:44 +0000 Subject: [PATCH 0317/1997] Fix tests --- src/Coordination/tests/gtest_coordination.cpp | 22 +++++++++---------- .../StorageSystemZooKeeperConnection.cpp | 4 ++-- tests/config/config.d/keeper_port.xml | 4 ++++ .../test_keeper_four_word_command/test.py | 8 +++---- ...2735_system_zookeeper_connection.reference | 2 +- 5 files changed, 22 insertions(+), 18 deletions(-) diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 2793b23c572..b29d5bca43d 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -1193,7 +1193,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotSimple) EXPECT_EQ(snapshot.snapshot_meta->get_last_log_idx(), 2); EXPECT_EQ(snapshot.session_id, 7); - EXPECT_EQ(snapshot.snapshot_container_size, 5); + EXPECT_EQ(snapshot.snapshot_container_size, 6); EXPECT_EQ(snapshot.session_and_timeout.size(), 2); auto buf = manager.serializeSnapshotToBuffer(snapshot); @@ -1205,7 +1205,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotSimple) auto [restored_storage, snapshot_meta, _] = manager.deserializeSnapshotFromBuffer(debuf); - EXPECT_EQ(restored_storage->container.size(), 5); + EXPECT_EQ(restored_storage->container.size(), 6); EXPECT_EQ(restored_storage->container.getValue("/").getChildren().size(), 2); EXPECT_EQ(restored_storage->container.getValue("/hello").getChildren().size(), 1); EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").getChildren().size(), 0); @@ -1237,14 +1237,14 @@ TEST_P(CoordinationTest, TestStorageSnapshotMoreWrites) DB::KeeperStorageSnapshot snapshot(&storage, 50); EXPECT_EQ(snapshot.snapshot_meta->get_last_log_idx(), 50); - EXPECT_EQ(snapshot.snapshot_container_size, 53); + EXPECT_EQ(snapshot.snapshot_container_size, 54); for (size_t i = 50; i < 100; ++i) { addNode(storage, "/hello_" + std::to_string(i), "world_" + std::to_string(i)); } - EXPECT_EQ(storage.container.size(), 103); + EXPECT_EQ(storage.container.size(), 104); auto buf = manager.serializeSnapshotToBuffer(snapshot); manager.serializeSnapshotBufferToDisk(*buf, 50); @@ -1254,7 +1254,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotMoreWrites) auto debuf = manager.deserializeSnapshotBufferFromDisk(50); auto [restored_storage, meta, _] = manager.deserializeSnapshotFromBuffer(debuf); - EXPECT_EQ(restored_storage->container.size(), 53); + EXPECT_EQ(restored_storage->container.size(), 54); for (size_t i = 0; i < 50; ++i) { EXPECT_EQ(restored_storage->container.getValue("/hello_" + std::to_string(i)).getData(), "world_" + std::to_string(i)); @@ -1293,7 +1293,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotManySnapshots) auto [restored_storage, meta, _] = manager.restoreFromLatestSnapshot(); - EXPECT_EQ(restored_storage->container.size(), 253); + EXPECT_EQ(restored_storage->container.size(), 254); for (size_t i = 0; i < 250; ++i) { @@ -1327,16 +1327,16 @@ TEST_P(CoordinationTest, TestStorageSnapshotMode) if (i % 2 == 0) storage.container.erase("/hello_" + std::to_string(i)); } - EXPECT_EQ(storage.container.size(), 28); - EXPECT_EQ(storage.container.snapshotSizeWithVersion().first, 104); + EXPECT_EQ(storage.container.size(), 29); + EXPECT_EQ(storage.container.snapshotSizeWithVersion().first, 105); EXPECT_EQ(storage.container.snapshotSizeWithVersion().second, 1); auto buf = manager.serializeSnapshotToBuffer(snapshot); manager.serializeSnapshotBufferToDisk(*buf, 50); } EXPECT_TRUE(fs::exists("./snapshots/snapshot_50.bin" + params.extension)); - EXPECT_EQ(storage.container.size(), 28); + EXPECT_EQ(storage.container.size(), 29); storage.clearGarbageAfterSnapshot(); - EXPECT_EQ(storage.container.snapshotSizeWithVersion().first, 28); + EXPECT_EQ(storage.container.snapshotSizeWithVersion().first, 29); for (size_t i = 0; i < 50; ++i) { if (i % 2 != 0) @@ -1865,7 +1865,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotDifferentCompressions) auto [restored_storage, snapshot_meta, _] = new_manager.deserializeSnapshotFromBuffer(debuf); - EXPECT_EQ(restored_storage->container.size(), 5); + EXPECT_EQ(restored_storage->container.size(), 6); EXPECT_EQ(restored_storage->container.getValue("/").getChildren().size(), 2); EXPECT_EQ(restored_storage->container.getValue("/hello").getChildren().size(), 1); EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").getChildren().size(), 0); diff --git a/src/Storages/System/StorageSystemZooKeeperConnection.cpp b/src/Storages/System/StorageSystemZooKeeperConnection.cpp index 99872be6ba0..9a6a592f2c4 100644 --- a/src/Storages/System/StorageSystemZooKeeperConnection.cpp +++ b/src/Storages/System/StorageSystemZooKeeperConnection.cpp @@ -31,7 +31,7 @@ void StorageSystemZooKeeperConnection::fillData(MutableColumns & res_columns, Co res_columns[3]->insert(context->getZooKeeper()->getConnectedZooKeeperIndex()); res_columns[4]->insert(context->getZooKeeperSessionUptime()); res_columns[5]->insert(context->getZooKeeper()->expired()); - res_columns[6]->insert(static_cast(KeeperApiVersion::WITH_MULTI_READ)); + res_columns[6]->insert(0); res_columns[7]->insert(context->getZooKeeper()->getClientID()); for (const auto & elem : context->getAuxiliaryZooKeepers()) @@ -42,7 +42,7 @@ void StorageSystemZooKeeperConnection::fillData(MutableColumns & res_columns, Co res_columns[3]->insert(elem.second->getConnectedZooKeeperIndex()); res_columns[4]->insert(elem.second->getSessionUptime()); res_columns[5]->insert(elem.second->expired()); - res_columns[6]->insert(static_cast(KeeperApiVersion::WITH_MULTI_READ)); + res_columns[6]->insert(0); res_columns[7]->insert(elem.second->getClientID()); } diff --git a/tests/config/config.d/keeper_port.xml b/tests/config/config.d/keeper_port.xml index cffd325e968..7db174c5419 100644 --- a/tests/config/config.d/keeper_port.xml +++ b/tests/config/config.d/keeper_port.xml @@ -28,5 +28,9 @@ 9234 + + + 1 + diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index 2098daea5fe..adc29abb584 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -183,8 +183,8 @@ def test_cmd_mntr(started_cluster): # contains: # 10 nodes created by test # 3 nodes created by clickhouse "/clickhouse/task_queue/ddl" - # 1 root node, 2 keeper system nodes - assert int(result["zk_znode_count"]) == 13 + # 1 root node, 3 keeper system nodes + assert int(result["zk_znode_count"]) == 14 assert int(result["zk_watch_count"]) == 2 assert int(result["zk_ephemerals_count"]) == 2 assert int(result["zk_approximate_data_size"]) > 0 @@ -329,7 +329,7 @@ def test_cmd_srvr(started_cluster): assert int(result["Connections"]) == 1 assert int(result["Zxid"]) > 10 assert result["Mode"] == "leader" - assert result["Node count"] == "13" + assert result["Node count"] == "14" finally: destroy_zk_client(zk) @@ -369,7 +369,7 @@ def test_cmd_stat(started_cluster): assert int(result["Connections"]) == 1 assert int(result["Zxid"]) >= 10 assert result["Mode"] == "leader" - assert result["Node count"] == "13" + assert result["Node count"] == "14" # filter connection statistics cons = [n for n in data.split("\n") if "=" in n] diff --git a/tests/queries/0_stateless/02735_system_zookeeper_connection.reference b/tests/queries/0_stateless/02735_system_zookeeper_connection.reference index 1deabd88b88..55b3579f0dd 100644 --- a/tests/queries/0_stateless/02735_system_zookeeper_connection.reference +++ b/tests/queries/0_stateless/02735_system_zookeeper_connection.reference @@ -1,2 +1,2 @@ -default ::1 9181 0 0 3 +default ::1 9181 0 0 0 zookeeper2 ::1 9181 0 0 0 From 62b94073a2c79f3f336f62ad359e2789541dbdd7 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 13 Jun 2023 13:32:56 +0200 Subject: [PATCH 0318/1997] Fix black check --- tests/integration/test_storage_s3/test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 28117d694d6..cec92222d4c 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -929,7 +929,9 @@ def test_truncate_table(started_cluster): def test_predefined_connection_configuration(started_cluster): bucket = started_cluster.minio_bucket - instance = started_cluster.instances["dummy_without_named_collections"] # type: ClickHouseInstance + instance = started_cluster.instances[ + "dummy_without_named_collections" + ] # type: ClickHouseInstance name = "test_table" instance.query("CREATE USER user") @@ -979,7 +981,7 @@ def test_predefined_connection_configuration(started_cluster): "To execute this query it's necessary to have grant USE NAMED COLLECTION ON no_collection" in error ) - instance = started_cluster.instances["dummy"] # has named collection access + instance = started_cluster.instances["dummy"] # has named collection access error = instance.query_and_get_error("SELECT * FROM s3(no_collection)") assert "There is no named collection `no_collection`" in error From 188c613c655a918d618ade00ef7f763b1601d4e5 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 13 Jun 2023 14:30:36 +0200 Subject: [PATCH 0319/1997] Update tests --- .../0_stateless/01271_show_privileges.reference | 14 +++++++------- .../02117_show_create_table_system.reference | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index c78c1a540f2..13113aeb194 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -39,7 +39,7 @@ ALTER MOVE PARTITION ['ALTER MOVE PART','MOVE PARTITION','MOVE PART'] TABLE ALTE ALTER FETCH PARTITION ['ALTER FETCH PART','FETCH PARTITION'] TABLE ALTER TABLE ALTER FREEZE PARTITION ['FREEZE PARTITION','UNFREEZE'] TABLE ALTER TABLE ALTER DATABASE SETTINGS ['ALTER DATABASE SETTING','ALTER MODIFY DATABASE SETTING','MODIFY DATABASE SETTING'] DATABASE ALTER DATABASE -ALTER NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION CONTROL +ALTER NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION ADMIN ALTER TABLE [] \N ALTER ALTER DATABASE [] \N ALTER ALTER VIEW REFRESH ['ALTER LIVE VIEW REFRESH','REFRESH VIEW'] VIEW ALTER VIEW @@ -53,14 +53,14 @@ CREATE DICTIONARY [] DICTIONARY CREATE CREATE TEMPORARY TABLE [] GLOBAL CREATE ARBITRARY TEMPORARY TABLE CREATE ARBITRARY TEMPORARY TABLE [] GLOBAL CREATE CREATE FUNCTION [] GLOBAL CREATE -CREATE NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION CONTROL +CREATE NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION ADMIN CREATE [] \N ALL DROP DATABASE [] DATABASE DROP DROP TABLE [] TABLE DROP DROP VIEW [] VIEW DROP DROP DICTIONARY [] DICTIONARY DROP DROP FUNCTION [] GLOBAL DROP -DROP NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION CONTROL +DROP NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION ADMIN DROP [] \N ALL UNDROP TABLE [] TABLE ALL TRUNCATE ['TRUNCATE TABLE'] TABLE ALL @@ -92,10 +92,10 @@ SHOW QUOTAS ['SHOW CREATE QUOTA'] GLOBAL SHOW ACCESS SHOW SETTINGS PROFILES ['SHOW PROFILES','SHOW CREATE SETTINGS PROFILE','SHOW CREATE PROFILE'] GLOBAL SHOW ACCESS SHOW ACCESS [] \N ACCESS MANAGEMENT ACCESS MANAGEMENT [] \N ALL -SHOW NAMED COLLECTIONS ['SHOW NAMED COLLECTIONS'] NAMED_COLLECTION NAMED COLLECTION CONTROL -SHOW NAMED COLLECTIONS SECRETS ['SHOW NAMED COLLECTIONS SECRETS'] NAMED_COLLECTION NAMED COLLECTION CONTROL -USE NAMED COLLECTION ['USE NAMED COLLECTION'] NAMED_COLLECTION NAMED COLLECTION CONTROL -NAMED COLLECTION CONTROL [] NAMED_COLLECTION ALL +SHOW NAMED COLLECTIONS ['SHOW NAMED COLLECTIONS'] NAMED_COLLECTION NAMED COLLECTION ADMIN +SHOW NAMED COLLECTIONS SECRETS ['SHOW NAMED COLLECTIONS SECRETS'] NAMED_COLLECTION NAMED COLLECTION ADMIN +USE NAMED COLLECTION ['NAMED COLLECTION USAGE','NAMED COLLECTION'] NAMED_COLLECTION NAMED COLLECTION ADMIN +NAMED COLLECTION ADMIN ['NAMED COLLECTION CONTROL'] NAMED_COLLECTION ALL SYSTEM SHUTDOWN ['SYSTEM KILL','SHUTDOWN'] GLOBAL SYSTEM SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP MARK CACHE ['SYSTEM DROP MARK','DROP MARK CACHE','DROP MARKS'] GLOBAL SYSTEM DROP CACHE diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index a6db15d6bbf..0e71a5ed024 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -297,7 +297,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION CONTROL' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -581,10 +581,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION CONTROL' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION CONTROL' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' From f4ed10c0a28b52f140f542c7ab0b21e1edf9a0c0 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 13 Jun 2023 14:44:39 +0200 Subject: [PATCH 0320/1997] Update src/Storages/StorageReplicatedMergeTree.cpp --- src/Storages/StorageReplicatedMergeTree.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index fafb3b124f2..84eae32495d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4751,10 +4751,10 @@ void StorageReplicatedMergeTree::read( } else { - header - = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query, table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); + header + = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); } auto cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas); From 9e8ca5f5ae8c4bde3e7aab7854a59c2f1d85e472 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 13 Jun 2023 13:09:39 +0000 Subject: [PATCH 0321/1997] Add integration test check with enabled analyzer --- .github/workflows/pull_request.yml | 210 ++++++++++++++++++ tests/ci/integration_test_check.py | 2 + .../helpers/0_common_enable_analyzer.xml | 7 + tests/integration/helpers/cluster.py | 2 + 4 files changed, 221 insertions(+) create mode 100644 tests/integration/helpers/0_common_enable_analyzer.xml diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index afc08f3e637..9a39b1177cf 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -3861,6 +3861,216 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan0: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan1: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan2: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan3: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan4: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=4 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan5: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=5 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 8ef6244a1c5..523b1cfaab5 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -226,6 +226,8 @@ def main(): download_all_deb_packages(check_name, reports_path, build_path) my_env = get_env_for_runner(build_path, repo_path, result_path, work_path) + if "analyzer" in check_name.lower(): + my_env["USE_NEW_ANALYZER"] = "1" json_path = os.path.join(work_path, "params.json") with open(json_path, "w", encoding="utf-8") as json_params: diff --git a/tests/integration/helpers/0_common_enable_analyzer.xml b/tests/integration/helpers/0_common_enable_analyzer.xml new file mode 100644 index 00000000000..aa374364ef0 --- /dev/null +++ b/tests/integration/helpers/0_common_enable_analyzer.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index f57ebf40e54..6d66a539cdc 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -4187,6 +4187,8 @@ class ClickHouseInstance: ) write_embedded_config("0_common_instance_users.xml", users_d_dir) + if os.environ.get('USE_NEW_ANALYZER') is not None: + write_embedded_config("0_common_enable_analyzer.xml", users_d_dir) if len(self.custom_dictionaries_paths): write_embedded_config("0_common_enable_dictionaries.xml", self.config_d_dir) From 5d541332482ae8fc37bacd4f8db340b599779c92 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 13 Jun 2023 13:31:14 +0000 Subject: [PATCH 0322/1997] Automatic style fix --- tests/integration/helpers/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 6d66a539cdc..8c54a37cf60 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -4187,7 +4187,7 @@ class ClickHouseInstance: ) write_embedded_config("0_common_instance_users.xml", users_d_dir) - if os.environ.get('USE_NEW_ANALYZER') is not None: + if os.environ.get("USE_NEW_ANALYZER") is not None: write_embedded_config("0_common_enable_analyzer.xml", users_d_dir) if len(self.custom_dictionaries_paths): From ed4455e2915968c664a2498f6d9503f358e6109e Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 13 Jun 2023 15:44:30 +0200 Subject: [PATCH 0323/1997] Update tests/integration/helpers/cluster.py From 7057e0e25fd55f8a9cb9708da223883aaa8fe902 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 13 Jun 2023 14:46:26 +0000 Subject: [PATCH 0324/1997] fix test --- .../02751_ip_types_aggregate_functions_states.sql.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.sql.j2 b/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.sql.j2 index 708eeab7724..7daff5a690f 100644 --- a/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.sql.j2 +++ b/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.sql.j2 @@ -3,8 +3,8 @@ {# this test checks backward compatibility of aggregate functions States against IPv4, IPv6 types #} -{% set ip4_generator = "select number::UInt32::IPv4 ip from numbers(999999999,50) order by ip" %} -{% set ip6_generator = "SELECT toIPv6(IPv6NumToString(toFixedString(reinterpretAsFixedString(number)||reinterpretAsFixedString(number), 16))) AS ip FROM numbers(1010011101, 50) order by ip" %} +{% set ip4_generator = "select num::UInt32::IPv4 ip from (select arrayJoin(range(999999999, number)) as num from numbers(999999999,50)) order by ip" %} +{% set ip6_generator = "SELECT toIPv6(IPv6NumToString(toFixedString(reinterpretAsFixedString(num)||reinterpretAsFixedString(num), 16))) AS ip FROM (select arrayJoin(range(1010011101, number)) as num from numbers(1010011101,50)) order by ip" %} {% set ip_generators = {'ip4': ip4_generator, 'ip6': ip6_generator} %} From 332893344d3cbca205b0d99671cd4c8ba26ec2da Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Tue, 13 Jun 2023 16:50:10 +0200 Subject: [PATCH 0325/1997] Updated lock for accessing azure blob storage iterator --- src/Storages/StorageAzureBlob.cpp | 6 +----- tests/integration/test_storage_azure_blob_storage/test.py | 1 - 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 3ee176a68b7..b9d59f04001 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -882,6 +882,7 @@ StorageAzureBlobSource::Iterator::Iterator( RelativePathWithMetadata StorageAzureBlobSource::Iterator::next() { + std::lock_guard lock(next_mutex); if (is_finished) return {}; @@ -900,7 +901,6 @@ RelativePathWithMetadata StorageAzureBlobSource::Iterator::next() { bool need_new_batch = false; { - std::lock_guard lock(next_mutex); need_new_batch = !blobs_with_metadata || index >= blobs_with_metadata->size(); } @@ -945,7 +945,6 @@ RelativePathWithMetadata StorageAzureBlobSource::Iterator::next() VirtualColumnUtils::filterBlockWithQuery(query, block, getContext(), filter_ast); const auto & idxs = typeid_cast(*block.getByName("_idx").column); - std::lock_guard lock(next_mutex); blob_path_with_globs.reset(); blob_path_with_globs.emplace(); for (UInt64 idx : idxs.getData()) @@ -961,7 +960,6 @@ RelativePathWithMetadata StorageAzureBlobSource::Iterator::next() if (outer_blobs) outer_blobs->insert(outer_blobs->end(), new_batch.begin(), new_batch.end()); - std::lock_guard lock(next_mutex); blobs_with_metadata = std::move(new_batch); for (const auto & [_, info] : *blobs_with_metadata) total_size.fetch_add(info.size_bytes, std::memory_order_relaxed); @@ -969,8 +967,6 @@ RelativePathWithMetadata StorageAzureBlobSource::Iterator::next() } size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - - std::lock_guard lock(next_mutex); return (*blobs_with_metadata)[current_index]; } } diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index f9d337b6d86..bb25ac4b029 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -551,7 +551,6 @@ def test_schema_inference_no_globs_tf(cluster): "499500\t2890\t332833500\ttest_schema_inference_no_globs_tf.csv\tcont/test_schema_inference_no_globs_tf.csv" ] - def test_schema_inference_from_globs_tf(cluster): node = cluster.instances["node"] unique_prefix = random.randint(1, 10000) From 3fd9911efe37500094532d07a1f6e0eaddaca6dd Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 13 Jun 2023 14:58:55 +0000 Subject: [PATCH 0326/1997] fix test reference --- ...es_aggregate_functions_states.reference.j2 | 164 +++++++++--------- 1 file changed, 82 insertions(+), 82 deletions(-) diff --git a/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.reference.j2 b/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.reference.j2 index 481dd723b66..90f98cf63fd 100644 --- a/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.reference.j2 +++ b/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.reference.j2 @@ -2,18 +2,18 @@ Row 1: ────── minState: 12535288824949910799 -maxState: 18210943739258811465 +maxState: 15790547582231549775 first_valueState: 12535288824949910799 -last_valueState: 18210943739258811465 -topKState: 1594227852744382511 -groupArrayState: 8025417272361615478 -groupUniqArrayState: 919082878249747568 -uniqState: 14828781561416784358 -uniqExactState: 11391659146320471795 -uniqCombinedState: 9631896280254268221 -uniqCombined64State: 5156097536649078816 -uniqHLL12State: 9696624347265201099 -uniqThetaState: 10464560810701154023 +last_valueState: 15790547582231549775 +topKState: 4906125994014190470 +groupArrayState: 9875990674330641453 +groupUniqArrayState: 15788623094139372318 +uniqState: 13857072740564896081 +uniqExactState: 2764760571052675772 +uniqCombinedState: 927481020821507998 +uniqCombined64State: 11588178464185397904 +uniqHLL12State: 592773541433144605 +uniqThetaState: 12573391720108828030 ----- hash / State / ip6 ----- Row 1: ────── @@ -21,31 +21,31 @@ minState: 9428555662807296659 maxState: 18253481702148601156 first_valueState: 9428555662807296659 last_valueState: 18253481702148601156 -topKState: 8045294331733869941 -groupArrayState: 10451014709837753966 -groupUniqArrayState: 1954028114836070615 -uniqState: 14986562136250471284 -uniqExactState: 10032843621916709112 -uniqCombinedState: 6379274083567016598 -uniqCombined64State: 6379274083567016598 -uniqHLL12State: 9181286681186915812 -uniqThetaState: 2415188383468008881 +topKState: 4649040466816645853 +groupArrayState: 15334593495826890008 +groupUniqArrayState: 18179202420787216155 +uniqState: 1113473461736161202 +uniqExactState: 17291302630176581193 +uniqCombinedState: 7689865507370303115 +uniqCombined64State: 7689865507370303115 +uniqHLL12State: 12630756003012135681 +uniqThetaState: 11768246996604802350 ----- finalizeAggregation / State / ip4 ----- Row 1: ────── min: 59.154.201.255 -max: 59.154.202.48 +max: 59.154.202.47 first_value: 59.154.201.255 -last_value: 59.154.202.48 -topK: ['59.154.202.48','59.154.202.5','59.154.202.26','59.154.202.25','59.154.202.24','59.154.202.23','59.154.202.22','59.154.202.21','59.154.202.27','59.154.202.19'] -groupArray: ['59.154.201.255','59.154.202.0','59.154.202.1','59.154.202.2','59.154.202.3','59.154.202.4','59.154.202.5','59.154.202.6','59.154.202.7','59.154.202.8','59.154.202.9','59.154.202.10','59.154.202.11','59.154.202.12','59.154.202.13','59.154.202.14','59.154.202.15','59.154.202.16','59.154.202.17','59.154.202.18','59.154.202.19','59.154.202.20','59.154.202.21','59.154.202.22','59.154.202.23','59.154.202.24','59.154.202.25','59.154.202.26','59.154.202.27','59.154.202.28','59.154.202.29','59.154.202.30','59.154.202.31','59.154.202.32','59.154.202.33','59.154.202.34','59.154.202.35','59.154.202.36','59.154.202.37','59.154.202.38','59.154.202.39','59.154.202.40','59.154.202.41','59.154.202.42','59.154.202.43','59.154.202.44','59.154.202.45','59.154.202.46','59.154.202.47','59.154.202.48'] -groupUniqArray: ['59.154.202.28','59.154.202.45','59.154.202.35','59.154.202.2','59.154.202.42','59.154.202.1','59.154.202.4','59.154.202.15','59.154.202.22','59.154.202.20','59.154.202.12','59.154.202.3','59.154.202.40','59.154.202.43','59.154.202.26','59.154.202.37','59.154.202.7','59.154.202.36','59.154.202.32','59.154.202.47','59.154.202.17','59.154.202.11','59.154.201.255','59.154.202.0','59.154.202.14','59.154.202.25','59.154.202.6','59.154.202.30','59.154.202.16','59.154.202.21','59.154.202.23','59.154.202.38','59.154.202.44','59.154.202.39','59.154.202.48','59.154.202.41','59.154.202.27','59.154.202.33','59.154.202.19','59.154.202.5','59.154.202.9','59.154.202.18','59.154.202.24','59.154.202.34','59.154.202.46','59.154.202.8','59.154.202.29','59.154.202.10','59.154.202.13','59.154.202.31'] -uniq: 50 -uniqExact: 50 -uniqCombined: 50 -uniqCombined64: 50 -uniqHLL12: 49 -uniqTheta: 50 +last_value: 59.154.202.47 +topK: ['59.154.201.255','59.154.202.0','59.154.202.1','59.154.202.2','59.154.202.3','59.154.202.4','59.154.202.5','59.154.202.6','59.154.202.7','59.154.202.8'] +groupArray: ['59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.37','59.154.202.37','59.154.202.37','59.154.202.37','59.154.202.37','59.154.202.37','59.154.202.37','59.154.202.37','59.154.202.37','59.154.202.37','59.154.202.37','59.154.202.38','59.154.202.38','59.154.202.38','59.154.202.38','59.154.202.38','59.154.202.38','59.154.202.38','59.154.202.38','59.154.202.38','59.154.202.38','59.154.202.39','59.154.202.39','59.154.202.39','59.154.202.39','59.154.202.39','59.154.202.39','59.154.202.39','59.154.202.39','59.154.202.39','59.154.202.40','59.154.202.40','59.154.202.40','59.154.202.40','59.154.202.40','59.154.202.40','59.154.202.40','59.154.202.40','59.154.202.41','59.154.202.41','59.154.202.41','59.154.202.41','59.154.202.41','59.154.202.41','59.154.202.41','59.154.202.42','59.154.202.42','59.154.202.42','59.154.202.42','59.154.202.42','59.154.202.42','59.154.202.43','59.154.202.43','59.154.202.43','59.154.202.43','59.154.202.43','59.154.202.44','59.154.202.44','59.154.202.44','59.154.202.44','59.154.202.45','59.154.202.45','59.154.202.45','59.154.202.46','59.154.202.46','59.154.202.47'] +groupUniqArray: ['59.154.202.28','59.154.202.45','59.154.202.35','59.154.202.2','59.154.202.42','59.154.202.1','59.154.202.4','59.154.202.15','59.154.202.22','59.154.202.20','59.154.202.12','59.154.202.3','59.154.202.40','59.154.202.43','59.154.202.26','59.154.202.37','59.154.202.7','59.154.202.36','59.154.202.32','59.154.202.47','59.154.202.17','59.154.202.11','59.154.201.255','59.154.202.0','59.154.202.14','59.154.202.25','59.154.202.6','59.154.202.30','59.154.202.16','59.154.202.21','59.154.202.23','59.154.202.38','59.154.202.44','59.154.202.39','59.154.202.41','59.154.202.27','59.154.202.33','59.154.202.19','59.154.202.5','59.154.202.9','59.154.202.18','59.154.202.24','59.154.202.34','59.154.202.46','59.154.202.8','59.154.202.29','59.154.202.10','59.154.202.13','59.154.202.31'] +uniq: 49 +uniqExact: 49 +uniqCombined: 49 +uniqCombined64: 49 +uniqHLL12: 48 +uniqTheta: 49 ----- finalizeAggregation / State / ip6 ----- Row 1: ────── @@ -53,31 +53,31 @@ min: 8c:333c::8c:333c:0:0 max: ff8b:333c::ff8b:333c:0:0 first_value: 8c:333c::8c:333c:0:0 last_value: ff8b:333c::ff8b:333c:0:0 -topK: ['ff8b:333c::ff8b:333c:0:0','68c:333c::68c:333c:0:0','e98b:333c::e98b:333c:0:0','e88b:333c::e88b:333c:0:0','e78b:333c::e78b:333c:0:0','e68b:333c::e68b:333c:0:0','e58b:333c::e58b:333c:0:0','e48b:333c::e48b:333c:0:0','ea8b:333c::ea8b:333c:0:0','e28b:333c::e28b:333c:0:0'] -groupArray: ['8c:333c::8c:333c:0:0','18c:333c::18c:333c:0:0','28c:333c::28c:333c:0:0','38c:333c::38c:333c:0:0','48c:333c::48c:333c:0:0','58c:333c::58c:333c:0:0','68c:333c::68c:333c:0:0','78c:333c::78c:333c:0:0','88c:333c::88c:333c:0:0','98c:333c::98c:333c:0:0','a8c:333c::a8c:333c:0:0','b8c:333c::b8c:333c:0:0','c8c:333c::c8c:333c:0:0','d8c:333c::d8c:333c:0:0','e8c:333c::e8c:333c:0:0','dd8b:333c::dd8b:333c:0:0','de8b:333c::de8b:333c:0:0','df8b:333c::df8b:333c:0:0','e08b:333c::e08b:333c:0:0','e18b:333c::e18b:333c:0:0','e28b:333c::e28b:333c:0:0','e38b:333c::e38b:333c:0:0','e48b:333c::e48b:333c:0:0','e58b:333c::e58b:333c:0:0','e68b:333c::e68b:333c:0:0','e78b:333c::e78b:333c:0:0','e88b:333c::e88b:333c:0:0','e98b:333c::e98b:333c:0:0','ea8b:333c::ea8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','f08b:333c::f08b:333c:0:0','f18b:333c::f18b:333c:0:0','f28b:333c::f28b:333c:0:0','f38b:333c::f38b:333c:0:0','f48b:333c::f48b:333c:0:0','f58b:333c::f58b:333c:0:0','f68b:333c::f68b:333c:0:0','f78b:333c::f78b:333c:0:0','f88b:333c::f88b:333c:0:0','f98b:333c::f98b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','ff8b:333c::ff8b:333c:0:0'] -groupUniqArray: ['58c:333c::58c:333c:0:0','f78b:333c::f78b:333c:0:0','f38b:333c::f38b:333c:0:0','18c:333c::18c:333c:0:0','e78b:333c::e78b:333c:0:0','e38b:333c::e38b:333c:0:0','e48b:333c::e48b:333c:0:0','e08b:333c::e08b:333c:0:0','df8b:333c::df8b:333c:0:0','f48b:333c::f48b:333c:0:0','68c:333c::68c:333c:0:0','28c:333c::28c:333c:0:0','f08b:333c::f08b:333c:0:0','fa8b:333c::fa8b:333c:0:0','88c:333c::88c:333c:0:0','c8c:333c::c8c:333c:0:0','fe8b:333c::fe8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','e98b:333c::e98b:333c:0:0','ed8b:333c::ed8b:333c:0:0','b8c:333c::b8c:333c:0:0','f98b:333c::f98b:333c:0:0','fd8b:333c::fd8b:333c:0:0','de8b:333c::de8b:333c:0:0','f58b:333c::f58b:333c:0:0','78c:333c::78c:333c:0:0','38c:333c::38c:333c:0:0','f18b:333c::f18b:333c:0:0','e58b:333c::e58b:333c:0:0','e18b:333c::e18b:333c:0:0','e68b:333c::e68b:333c:0:0','e28b:333c::e28b:333c:0:0','48c:333c::48c:333c:0:0','dd8b:333c::dd8b:333c:0:0','f68b:333c::f68b:333c:0:0','f28b:333c::f28b:333c:0:0','8c:333c::8c:333c:0:0','a8c:333c::a8c:333c:0:0','f88b:333c::f88b:333c:0:0','fc8b:333c::fc8b:333c:0:0','e8c:333c::e8c:333c:0:0','e88b:333c::e88b:333c:0:0','ec8b:333c::ec8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','98c:333c::98c:333c:0:0','d8c:333c::d8c:333c:0:0','ff8b:333c::ff8b:333c:0:0'] -uniq: 50 -uniqExact: 50 -uniqCombined: 50 -uniqCombined64: 50 -uniqHLL12: 50 -uniqTheta: 50 +topK: ['dd8b:333c::dd8b:333c:0:0','de8b:333c::de8b:333c:0:0','df8b:333c::df8b:333c:0:0','e08b:333c::e08b:333c:0:0','e18b:333c::e18b:333c:0:0','e28b:333c::e28b:333c:0:0','e38b:333c::e38b:333c:0:0','e48b:333c::e48b:333c:0:0','e58b:333c::e58b:333c:0:0','e68b:333c::e68b:333c:0:0'] +groupArray: ['8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','48c:333c::48c:333c:0:0','48c:333c::48c:333c:0:0','48c:333c::48c:333c:0:0','48c:333c::48c:333c:0:0','48c:333c::48c:333c:0:0','48c:333c::48c:333c:0:0','48c:333c::48c:333c:0:0','48c:333c::48c:333c:0:0','48c:333c::48c:333c:0:0','48c:333c::48c:333c:0:0','58c:333c::58c:333c:0:0','58c:333c::58c:333c:0:0','58c:333c::58c:333c:0:0','58c:333c::58c:333c:0:0','58c:333c::58c:333c:0:0','58c:333c::58c:333c:0:0','58c:333c::58c:333c:0:0','58c:333c::58c:333c:0:0','58c:333c::58c:333c:0:0','68c:333c::68c:333c:0:0','68c:333c::68c:333c:0:0','68c:333c::68c:333c:0:0','68c:333c::68c:333c:0:0','68c:333c::68c:333c:0:0','68c:333c::68c:333c:0:0','68c:333c::68c:333c:0:0','68c:333c::68c:333c:0:0','78c:333c::78c:333c:0:0','78c:333c::78c:333c:0:0','78c:333c::78c:333c:0:0','78c:333c::78c:333c:0:0','78c:333c::78c:333c:0:0','78c:333c::78c:333c:0:0','78c:333c::78c:333c:0:0','88c:333c::88c:333c:0:0','88c:333c::88c:333c:0:0','88c:333c::88c:333c:0:0','88c:333c::88c:333c:0:0','88c:333c::88c:333c:0:0','88c:333c::88c:333c:0:0','98c:333c::98c:333c:0:0','98c:333c::98c:333c:0:0','98c:333c::98c:333c:0:0','98c:333c::98c:333c:0:0','98c:333c::98c:333c:0:0','a8c:333c::a8c:333c:0:0','a8c:333c::a8c:333c:0:0','a8c:333c::a8c:333c:0:0','a8c:333c::a8c:333c:0:0','b8c:333c::b8c:333c:0:0','b8c:333c::b8c:333c:0:0','b8c:333c::b8c:333c:0:0','c8c:333c::c8c:333c:0:0','c8c:333c::c8c:333c:0:0','d8c:333c::d8c:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0'] +groupUniqArray: ['58c:333c::58c:333c:0:0','f78b:333c::f78b:333c:0:0','f38b:333c::f38b:333c:0:0','18c:333c::18c:333c:0:0','e78b:333c::e78b:333c:0:0','e38b:333c::e38b:333c:0:0','e48b:333c::e48b:333c:0:0','e08b:333c::e08b:333c:0:0','df8b:333c::df8b:333c:0:0','f48b:333c::f48b:333c:0:0','68c:333c::68c:333c:0:0','28c:333c::28c:333c:0:0','f08b:333c::f08b:333c:0:0','fa8b:333c::fa8b:333c:0:0','88c:333c::88c:333c:0:0','c8c:333c::c8c:333c:0:0','fe8b:333c::fe8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','e98b:333c::e98b:333c:0:0','ed8b:333c::ed8b:333c:0:0','b8c:333c::b8c:333c:0:0','f98b:333c::f98b:333c:0:0','fd8b:333c::fd8b:333c:0:0','de8b:333c::de8b:333c:0:0','f58b:333c::f58b:333c:0:0','78c:333c::78c:333c:0:0','38c:333c::38c:333c:0:0','f18b:333c::f18b:333c:0:0','e58b:333c::e58b:333c:0:0','e18b:333c::e18b:333c:0:0','e68b:333c::e68b:333c:0:0','e28b:333c::e28b:333c:0:0','48c:333c::48c:333c:0:0','dd8b:333c::dd8b:333c:0:0','f68b:333c::f68b:333c:0:0','f28b:333c::f28b:333c:0:0','8c:333c::8c:333c:0:0','a8c:333c::a8c:333c:0:0','f88b:333c::f88b:333c:0:0','fc8b:333c::fc8b:333c:0:0','e88b:333c::e88b:333c:0:0','ec8b:333c::ec8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','98c:333c::98c:333c:0:0','d8c:333c::d8c:333c:0:0','ff8b:333c::ff8b:333c:0:0'] +uniq: 49 +uniqExact: 49 +uniqCombined: 49 +uniqCombined64: 49 +uniqHLL12: 49 +uniqTheta: 49 ----- hash / IfState / ip4 ----- Row 1: ────── minIfState: 12535288824949910799 -maxIfState: 18210943739258811465 +maxIfState: 15790547582231549775 first_valueIfState: 12535288824949910799 -last_valueIfState: 18210943739258811465 -topKIfState: 1594227852744382511 -groupArrayIfState: 8025417272361615478 -groupUniqArrayIfState: 919082878249747568 -uniqIfState: 14828781561416784358 -uniqExactIfState: 11391659146320471795 -uniqCombinedIfState: 9631896280254268221 -uniqCombined64IfState: 5156097536649078816 -uniqHLL12IfState: 9696624347265201099 -uniqThetaIfState: 10464560810701154023 +last_valueIfState: 15790547582231549775 +topKIfState: 4906125994014190470 +groupArrayIfState: 9875990674330641453 +groupUniqArrayIfState: 15788623094139372318 +uniqIfState: 13857072740564896081 +uniqExactIfState: 2764760571052675772 +uniqCombinedIfState: 927481020821507998 +uniqCombined64IfState: 11588178464185397904 +uniqHLL12IfState: 592773541433144605 +uniqThetaIfState: 12573391720108828030 ----- hash / IfState / ip6 ----- Row 1: ────── @@ -85,31 +85,31 @@ minIfState: 9428555662807296659 maxIfState: 18253481702148601156 first_valueIfState: 9428555662807296659 last_valueIfState: 18253481702148601156 -topKIfState: 8045294331733869941 -groupArrayIfState: 10451014709837753966 -groupUniqArrayIfState: 1954028114836070615 -uniqIfState: 14986562136250471284 -uniqExactIfState: 10032843621916709112 -uniqCombinedIfState: 6379274083567016598 -uniqCombined64IfState: 6379274083567016598 -uniqHLL12IfState: 9181286681186915812 -uniqThetaIfState: 2415188383468008881 +topKIfState: 4649040466816645853 +groupArrayIfState: 15334593495826890008 +groupUniqArrayIfState: 18179202420787216155 +uniqIfState: 1113473461736161202 +uniqExactIfState: 17291302630176581193 +uniqCombinedIfState: 7689865507370303115 +uniqCombined64IfState: 7689865507370303115 +uniqHLL12IfState: 12630756003012135681 +uniqThetaIfState: 11768246996604802350 ----- finalizeAggregation / IfState / ip4 ----- Row 1: ────── min: 59.154.201.255 -max: 59.154.202.48 +max: 59.154.202.47 first_value: 59.154.201.255 -last_value: 59.154.202.48 -topK: ['59.154.202.48','59.154.202.5','59.154.202.26','59.154.202.25','59.154.202.24','59.154.202.23','59.154.202.22','59.154.202.21','59.154.202.27','59.154.202.19'] -groupArray: ['59.154.201.255','59.154.202.0','59.154.202.1','59.154.202.2','59.154.202.3','59.154.202.4','59.154.202.5','59.154.202.6','59.154.202.7','59.154.202.8','59.154.202.9','59.154.202.10','59.154.202.11','59.154.202.12','59.154.202.13','59.154.202.14','59.154.202.15','59.154.202.16','59.154.202.17','59.154.202.18','59.154.202.19','59.154.202.20','59.154.202.21','59.154.202.22','59.154.202.23','59.154.202.24','59.154.202.25','59.154.202.26','59.154.202.27','59.154.202.28','59.154.202.29','59.154.202.30','59.154.202.31','59.154.202.32','59.154.202.33','59.154.202.34','59.154.202.35','59.154.202.36','59.154.202.37','59.154.202.38','59.154.202.39','59.154.202.40','59.154.202.41','59.154.202.42','59.154.202.43','59.154.202.44','59.154.202.45','59.154.202.46','59.154.202.47','59.154.202.48'] -groupUniqArray: ['59.154.202.28','59.154.202.45','59.154.202.35','59.154.202.2','59.154.202.42','59.154.202.1','59.154.202.4','59.154.202.15','59.154.202.22','59.154.202.20','59.154.202.12','59.154.202.3','59.154.202.40','59.154.202.43','59.154.202.26','59.154.202.37','59.154.202.7','59.154.202.36','59.154.202.32','59.154.202.47','59.154.202.17','59.154.202.11','59.154.201.255','59.154.202.0','59.154.202.14','59.154.202.25','59.154.202.6','59.154.202.30','59.154.202.16','59.154.202.21','59.154.202.23','59.154.202.38','59.154.202.44','59.154.202.39','59.154.202.48','59.154.202.41','59.154.202.27','59.154.202.33','59.154.202.19','59.154.202.5','59.154.202.9','59.154.202.18','59.154.202.24','59.154.202.34','59.154.202.46','59.154.202.8','59.154.202.29','59.154.202.10','59.154.202.13','59.154.202.31'] -uniq: 50 -uniqExact: 50 -uniqCombined: 50 -uniqCombined64: 50 -uniqHLL12: 49 -uniqTheta: 50 +last_value: 59.154.202.47 +topK: ['59.154.201.255','59.154.202.0','59.154.202.1','59.154.202.2','59.154.202.3','59.154.202.4','59.154.202.5','59.154.202.6','59.154.202.7','59.154.202.8'] +groupArray: ['59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.201.255','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.0','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.1','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.2','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.3','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.4','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.5','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.6','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.7','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.8','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.9','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.10','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.11','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.12','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.13','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.14','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.15','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.16','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.17','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.18','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.19','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.20','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.21','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.22','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.23','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.24','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.25','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.26','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.27','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.28','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.29','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.30','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.31','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.32','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.33','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.34','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.35','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.36','59.154.202.37','59.154.202.37','59.154.202.37','59.154.202.37','59.154.202.37','59.154.202.37','59.154.202.37','59.154.202.37','59.154.202.37','59.154.202.37','59.154.202.37','59.154.202.38','59.154.202.38','59.154.202.38','59.154.202.38','59.154.202.38','59.154.202.38','59.154.202.38','59.154.202.38','59.154.202.38','59.154.202.38','59.154.202.39','59.154.202.39','59.154.202.39','59.154.202.39','59.154.202.39','59.154.202.39','59.154.202.39','59.154.202.39','59.154.202.39','59.154.202.40','59.154.202.40','59.154.202.40','59.154.202.40','59.154.202.40','59.154.202.40','59.154.202.40','59.154.202.40','59.154.202.41','59.154.202.41','59.154.202.41','59.154.202.41','59.154.202.41','59.154.202.41','59.154.202.41','59.154.202.42','59.154.202.42','59.154.202.42','59.154.202.42','59.154.202.42','59.154.202.42','59.154.202.43','59.154.202.43','59.154.202.43','59.154.202.43','59.154.202.43','59.154.202.44','59.154.202.44','59.154.202.44','59.154.202.44','59.154.202.45','59.154.202.45','59.154.202.45','59.154.202.46','59.154.202.46','59.154.202.47'] +groupUniqArray: ['59.154.202.28','59.154.202.45','59.154.202.35','59.154.202.2','59.154.202.42','59.154.202.1','59.154.202.4','59.154.202.15','59.154.202.22','59.154.202.20','59.154.202.12','59.154.202.3','59.154.202.40','59.154.202.43','59.154.202.26','59.154.202.37','59.154.202.7','59.154.202.36','59.154.202.32','59.154.202.47','59.154.202.17','59.154.202.11','59.154.201.255','59.154.202.0','59.154.202.14','59.154.202.25','59.154.202.6','59.154.202.30','59.154.202.16','59.154.202.21','59.154.202.23','59.154.202.38','59.154.202.44','59.154.202.39','59.154.202.41','59.154.202.27','59.154.202.33','59.154.202.19','59.154.202.5','59.154.202.9','59.154.202.18','59.154.202.24','59.154.202.34','59.154.202.46','59.154.202.8','59.154.202.29','59.154.202.10','59.154.202.13','59.154.202.31'] +uniq: 49 +uniqExact: 49 +uniqCombined: 49 +uniqCombined64: 49 +uniqHLL12: 48 +uniqTheta: 49 ----- finalizeAggregation / IfState / ip6 ----- Row 1: ────── @@ -117,20 +117,20 @@ min: 8c:333c::8c:333c:0:0 max: ff8b:333c::ff8b:333c:0:0 first_value: 8c:333c::8c:333c:0:0 last_value: ff8b:333c::ff8b:333c:0:0 -topK: ['ff8b:333c::ff8b:333c:0:0','68c:333c::68c:333c:0:0','e98b:333c::e98b:333c:0:0','e88b:333c::e88b:333c:0:0','e78b:333c::e78b:333c:0:0','e68b:333c::e68b:333c:0:0','e58b:333c::e58b:333c:0:0','e48b:333c::e48b:333c:0:0','ea8b:333c::ea8b:333c:0:0','e28b:333c::e28b:333c:0:0'] -groupArray: ['8c:333c::8c:333c:0:0','18c:333c::18c:333c:0:0','28c:333c::28c:333c:0:0','38c:333c::38c:333c:0:0','48c:333c::48c:333c:0:0','58c:333c::58c:333c:0:0','68c:333c::68c:333c:0:0','78c:333c::78c:333c:0:0','88c:333c::88c:333c:0:0','98c:333c::98c:333c:0:0','a8c:333c::a8c:333c:0:0','b8c:333c::b8c:333c:0:0','c8c:333c::c8c:333c:0:0','d8c:333c::d8c:333c:0:0','e8c:333c::e8c:333c:0:0','dd8b:333c::dd8b:333c:0:0','de8b:333c::de8b:333c:0:0','df8b:333c::df8b:333c:0:0','e08b:333c::e08b:333c:0:0','e18b:333c::e18b:333c:0:0','e28b:333c::e28b:333c:0:0','e38b:333c::e38b:333c:0:0','e48b:333c::e48b:333c:0:0','e58b:333c::e58b:333c:0:0','e68b:333c::e68b:333c:0:0','e78b:333c::e78b:333c:0:0','e88b:333c::e88b:333c:0:0','e98b:333c::e98b:333c:0:0','ea8b:333c::ea8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','f08b:333c::f08b:333c:0:0','f18b:333c::f18b:333c:0:0','f28b:333c::f28b:333c:0:0','f38b:333c::f38b:333c:0:0','f48b:333c::f48b:333c:0:0','f58b:333c::f58b:333c:0:0','f68b:333c::f68b:333c:0:0','f78b:333c::f78b:333c:0:0','f88b:333c::f88b:333c:0:0','f98b:333c::f98b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','ff8b:333c::ff8b:333c:0:0'] -groupUniqArray: ['58c:333c::58c:333c:0:0','f78b:333c::f78b:333c:0:0','f38b:333c::f38b:333c:0:0','18c:333c::18c:333c:0:0','e78b:333c::e78b:333c:0:0','e38b:333c::e38b:333c:0:0','e48b:333c::e48b:333c:0:0','e08b:333c::e08b:333c:0:0','df8b:333c::df8b:333c:0:0','f48b:333c::f48b:333c:0:0','68c:333c::68c:333c:0:0','28c:333c::28c:333c:0:0','f08b:333c::f08b:333c:0:0','fa8b:333c::fa8b:333c:0:0','88c:333c::88c:333c:0:0','c8c:333c::c8c:333c:0:0','fe8b:333c::fe8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','e98b:333c::e98b:333c:0:0','ed8b:333c::ed8b:333c:0:0','b8c:333c::b8c:333c:0:0','f98b:333c::f98b:333c:0:0','fd8b:333c::fd8b:333c:0:0','de8b:333c::de8b:333c:0:0','f58b:333c::f58b:333c:0:0','78c:333c::78c:333c:0:0','38c:333c::38c:333c:0:0','f18b:333c::f18b:333c:0:0','e58b:333c::e58b:333c:0:0','e18b:333c::e18b:333c:0:0','e68b:333c::e68b:333c:0:0','e28b:333c::e28b:333c:0:0','48c:333c::48c:333c:0:0','dd8b:333c::dd8b:333c:0:0','f68b:333c::f68b:333c:0:0','f28b:333c::f28b:333c:0:0','8c:333c::8c:333c:0:0','a8c:333c::a8c:333c:0:0','f88b:333c::f88b:333c:0:0','fc8b:333c::fc8b:333c:0:0','e8c:333c::e8c:333c:0:0','e88b:333c::e88b:333c:0:0','ec8b:333c::ec8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','98c:333c::98c:333c:0:0','d8c:333c::d8c:333c:0:0','ff8b:333c::ff8b:333c:0:0'] -uniq: 50 -uniqExact: 50 -uniqCombined: 50 -uniqCombined64: 50 -uniqHLL12: 50 -uniqTheta: 50 +topK: ['dd8b:333c::dd8b:333c:0:0','de8b:333c::de8b:333c:0:0','df8b:333c::df8b:333c:0:0','e08b:333c::e08b:333c:0:0','e18b:333c::e18b:333c:0:0','e28b:333c::e28b:333c:0:0','e38b:333c::e38b:333c:0:0','e48b:333c::e48b:333c:0:0','e58b:333c::e58b:333c:0:0','e68b:333c::e68b:333c:0:0'] +groupArray: ['8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','8c:333c::8c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','18c:333c::18c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','28c:333c::28c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','38c:333c::38c:333c:0:0','48c:333c::48c:333c:0:0','48c:333c::48c:333c:0:0','48c:333c::48c:333c:0:0','48c:333c::48c:333c:0:0','48c:333c::48c:333c:0:0','48c:333c::48c:333c:0:0','48c:333c::48c:333c:0:0','48c:333c::48c:333c:0:0','48c:333c::48c:333c:0:0','48c:333c::48c:333c:0:0','58c:333c::58c:333c:0:0','58c:333c::58c:333c:0:0','58c:333c::58c:333c:0:0','58c:333c::58c:333c:0:0','58c:333c::58c:333c:0:0','58c:333c::58c:333c:0:0','58c:333c::58c:333c:0:0','58c:333c::58c:333c:0:0','58c:333c::58c:333c:0:0','68c:333c::68c:333c:0:0','68c:333c::68c:333c:0:0','68c:333c::68c:333c:0:0','68c:333c::68c:333c:0:0','68c:333c::68c:333c:0:0','68c:333c::68c:333c:0:0','68c:333c::68c:333c:0:0','68c:333c::68c:333c:0:0','78c:333c::78c:333c:0:0','78c:333c::78c:333c:0:0','78c:333c::78c:333c:0:0','78c:333c::78c:333c:0:0','78c:333c::78c:333c:0:0','78c:333c::78c:333c:0:0','78c:333c::78c:333c:0:0','88c:333c::88c:333c:0:0','88c:333c::88c:333c:0:0','88c:333c::88c:333c:0:0','88c:333c::88c:333c:0:0','88c:333c::88c:333c:0:0','88c:333c::88c:333c:0:0','98c:333c::98c:333c:0:0','98c:333c::98c:333c:0:0','98c:333c::98c:333c:0:0','98c:333c::98c:333c:0:0','98c:333c::98c:333c:0:0','a8c:333c::a8c:333c:0:0','a8c:333c::a8c:333c:0:0','a8c:333c::a8c:333c:0:0','a8c:333c::a8c:333c:0:0','b8c:333c::b8c:333c:0:0','b8c:333c::b8c:333c:0:0','b8c:333c::b8c:333c:0:0','c8c:333c::c8c:333c:0:0','c8c:333c::c8c:333c:0:0','d8c:333c::d8c:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','dd8b:333c::dd8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','de8b:333c::de8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','df8b:333c::df8b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e08b:333c::e08b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e18b:333c::e18b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e28b:333c::e28b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e38b:333c::e38b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e48b:333c::e48b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e58b:333c::e58b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e68b:333c::e68b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e78b:333c::e78b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e88b:333c::e88b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','e98b:333c::e98b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ec8b:333c::ec8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ed8b:333c::ed8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f08b:333c::f08b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f18b:333c::f18b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f28b:333c::f28b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f38b:333c::f38b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f48b:333c::f48b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f58b:333c::f58b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f68b:333c::f68b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f78b:333c::f78b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f88b:333c::f88b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','f98b:333c::f98b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fa8b:333c::fa8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fc8b:333c::fc8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fd8b:333c::fd8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','fe8b:333c::fe8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0','ff8b:333c::ff8b:333c:0:0'] +groupUniqArray: ['58c:333c::58c:333c:0:0','f78b:333c::f78b:333c:0:0','f38b:333c::f38b:333c:0:0','18c:333c::18c:333c:0:0','e78b:333c::e78b:333c:0:0','e38b:333c::e38b:333c:0:0','e48b:333c::e48b:333c:0:0','e08b:333c::e08b:333c:0:0','df8b:333c::df8b:333c:0:0','f48b:333c::f48b:333c:0:0','68c:333c::68c:333c:0:0','28c:333c::28c:333c:0:0','f08b:333c::f08b:333c:0:0','fa8b:333c::fa8b:333c:0:0','88c:333c::88c:333c:0:0','c8c:333c::c8c:333c:0:0','fe8b:333c::fe8b:333c:0:0','ea8b:333c::ea8b:333c:0:0','ee8b:333c::ee8b:333c:0:0','e98b:333c::e98b:333c:0:0','ed8b:333c::ed8b:333c:0:0','b8c:333c::b8c:333c:0:0','f98b:333c::f98b:333c:0:0','fd8b:333c::fd8b:333c:0:0','de8b:333c::de8b:333c:0:0','f58b:333c::f58b:333c:0:0','78c:333c::78c:333c:0:0','38c:333c::38c:333c:0:0','f18b:333c::f18b:333c:0:0','e58b:333c::e58b:333c:0:0','e18b:333c::e18b:333c:0:0','e68b:333c::e68b:333c:0:0','e28b:333c::e28b:333c:0:0','48c:333c::48c:333c:0:0','dd8b:333c::dd8b:333c:0:0','f68b:333c::f68b:333c:0:0','f28b:333c::f28b:333c:0:0','8c:333c::8c:333c:0:0','a8c:333c::a8c:333c:0:0','f88b:333c::f88b:333c:0:0','fc8b:333c::fc8b:333c:0:0','e88b:333c::e88b:333c:0:0','ec8b:333c::ec8b:333c:0:0','eb8b:333c::eb8b:333c:0:0','ef8b:333c::ef8b:333c:0:0','fb8b:333c::fb8b:333c:0:0','98c:333c::98c:333c:0:0','d8c:333c::d8c:333c:0:0','ff8b:333c::ff8b:333c:0:0'] +uniq: 49 +uniqExact: 49 +uniqCombined: 49 +uniqCombined64: 49 +uniqHLL12: 49 +uniqTheta: 49 ----- Arg / hash / State / ip4 ----- Row 1: ────── argMinState: 13774589216353164344 -argMaxState: 9177365218111013695 +argMaxState: 1131101663917664667 ----- Arg / hash / State / ip6 ----- Row 1: ────── @@ -140,7 +140,7 @@ argMaxState: 16598449636475438091 Row 1: ────── argMinState: 59.154.201.255 -argMaxState: 59.154.202.48 +argMaxState: 59.154.202.47 ----- Arg / finalizeAggregation / State / ip6 ----- Row 1: ────── From 478bad32376ff2787e83107c8274f4b743c569ac Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 13 Jun 2023 15:04:11 +0000 Subject: [PATCH 0327/1997] Automatic style fix --- tests/integration/test_storage_azure_blob_storage/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index bb25ac4b029..f9d337b6d86 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -551,6 +551,7 @@ def test_schema_inference_no_globs_tf(cluster): "499500\t2890\t332833500\ttest_schema_inference_no_globs_tf.csv\tcont/test_schema_inference_no_globs_tf.csv" ] + def test_schema_inference_from_globs_tf(cluster): node = cluster.instances["node"] unique_prefix = random.randint(1, 10000) From bc4724490239ea34b4924da17ddabbb1f90e2bee Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Tue, 13 Jun 2023 17:06:40 +0200 Subject: [PATCH 0328/1997] Updated tests for CI checks --- tests/integration/test_storage_azure_blob_storage/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index f9d337b6d86..8ab5d416b03 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -558,6 +558,7 @@ def test_schema_inference_from_globs_tf(cluster): node = cluster.instances["node"] # type: ClickHouseInstance table_format = "column1 UInt32, column2 UInt32, column3 UInt32" max_path = "" + for i in range(10): for j in range(10): path = "{}/{}_{}/{}.csv".format( From 8dde50eb3aeef2bd83a1c7c0ec244dfe1c66bf32 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 13 Jun 2023 15:19:16 +0000 Subject: [PATCH 0329/1997] Automatic style fix --- tests/integration/test_storage_azure_blob_storage/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 8ab5d416b03..3d9c751be3c 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -558,7 +558,7 @@ def test_schema_inference_from_globs_tf(cluster): node = cluster.instances["node"] # type: ClickHouseInstance table_format = "column1 UInt32, column2 UInt32, column3 UInt32" max_path = "" - + for i in range(10): for j in range(10): path = "{}/{}_{}/{}.csv".format( From 9a5c5c7e931c41f406e316cb20cc7659c40fccc5 Mon Sep 17 00:00:00 2001 From: Yuriy Chernyshov Date: Tue, 13 Jun 2023 19:29:41 +0300 Subject: [PATCH 0330/1997] Update contrib/re2 to 2023-06-02 --- contrib/re2 | 2 +- src/Common/OptimizedRegularExpression.cpp | 12 ++++++------ src/Common/parseGlobs.cpp | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/contrib/re2 b/contrib/re2 index 13ebb377c6a..03da4fc0857 160000 --- a/contrib/re2 +++ b/contrib/re2 @@ -1 +1 @@ -Subproject commit 13ebb377c6ad763ca61d12dd6f88b1126bd0b911 +Subproject commit 03da4fc0857c285e3a26782f6bc8931c4c950df4 diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index f2fe922ef19..5df9ce76098 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -669,16 +669,16 @@ unsigned OptimizedRegularExpressionImpl::match(const char * subject matches.resize(limit); for (size_t i = 0; i < limit; ++i) { - if (pieces[i] != nullptr) - { - matches[i].offset = pieces[i].data() - subject; - matches[i].length = pieces[i].length(); - } - else + if (pieces[i].empty()) { matches[i].offset = std::string::npos; matches[i].length = 0; } + else + { + matches[i].offset = pieces[i].data() - subject; + matches[i].length = pieces[i].length(); + } } return limit; } diff --git a/src/Common/parseGlobs.cpp b/src/Common/parseGlobs.cpp index f8d331c2b76..07cce38afff 100644 --- a/src/Common/parseGlobs.cpp +++ b/src/Common/parseGlobs.cpp @@ -40,7 +40,7 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob size_t current_index = 0; while (RE2::FindAndConsume(&input, enum_or_range, &matched)) { - std::string buffer = matched.ToString(); + std::string buffer{matched}; oss_for_replacing << escaped_with_globs.substr(current_index, matched.data() - escaped_with_globs.data() - current_index - 1) << '('; if (buffer.find(',') == std::string::npos) From a01056f67c787e069ca173cb63fafbfc5c6e5c96 Mon Sep 17 00:00:00 2001 From: santrancisco Date: Wed, 14 Jun 2023 02:33:48 +1000 Subject: [PATCH 0331/1997] Update orc submodule --- contrib/orc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/orc b/contrib/orc index c5d7755ba0b..f2c191f9653 160000 --- a/contrib/orc +++ b/contrib/orc @@ -1 +1 @@ -Subproject commit c5d7755ba0b9a95631c8daea4d094101f26ec761 +Subproject commit f2c191f9653a5ddbca016e024ca0fb61508f5eeb From 1d8013074de7eb5a35460fef94bc82b8872531c3 Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 13 Jun 2023 18:47:02 +0200 Subject: [PATCH 0332/1997] fix style & black --- src/Storages/HDFS/StorageHDFS.cpp | 6 ++++-- src/Storages/StorageFile.cpp | 6 ++++-- tests/integration/test_storage_hdfs/test.py | 13 ++++++++++--- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 7e836c028a0..2503b0e60f2 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -154,12 +154,14 @@ namespace const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/' size_t slashes_in_glob = 0; - const size_t next_slash_after_glob_pos = [&](){ + const size_t next_slash_after_glob_pos = [&]() + { if (!has_glob) return suffix_with_globs.find('/', 1); size_t in_curly = 0; - for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++) { + for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++) + { if (*it == '{') ++in_curly; else if (*it == '/') diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 93228cf4d39..dc7cfe0de29 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -170,12 +170,14 @@ void listFilesWithRegexpMatchingImpl( /// slashes_in_glob counter is a upper-bound estimate of recursion depth /// needed to process complex cases when `/` is included into glob, e.g. /pa{th1/a,th2/b}.csv size_t slashes_in_glob = 0; - const size_t next_slash_after_glob_pos = [&](){ + const size_t next_slash_after_glob_pos = [&]() + { if (!has_glob) return suffix_with_globs.find('/', 1); size_t in_curly = 0; - for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++) { + for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++) + { if (*it == '{') ++in_curly; else if (*it == '/') diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index dde3dd257b1..789e8e11035 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -88,10 +88,17 @@ def test_read_write_storage_with_globs(started_cluster): def test_storage_with_multidirectory_glob(started_cluster): hdfs_api = started_cluster.hdfs_api for i in ["1", "2"]: - hdfs_api.write_data(f"/multiglob/p{i}/path{i}/postfix/data{i}", f"File{i}\t{i}{i}\n") - assert hdfs_api.read_data(f"/multiglob/p{i}/path{i}/postfix/data{i}") == f"File{i}\t{i}{i}\n" + hdfs_api.write_data( + f"/multiglob/p{i}/path{i}/postfix/data{i}", f"File{i}\t{i}{i}\n" + ) + assert ( + hdfs_api.read_data(f"/multiglob/p{i}/path{i}/postfix/data{i}") + == f"File{i}\t{i}{i}\n" + ) - r = node1.query("SELECT * FROM hdfs('hdfs://hdfs1:9000/multiglob/{p1/path1,p2/path2}/postfix/data{1,2}', TSV)") + r = node1.query( + "SELECT * FROM hdfs('hdfs://hdfs1:9000/multiglob/{p1/path1,p2/path2}/postfix/data{1,2}', TSV)" + ) assert (r == f"File1\t11\nFile2\t22\n") or (r == f"File2\t22\nFile1\t11\n") From 906db0318dee9d08a8af603ab0400143578e4f3d Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 13 Jun 2023 19:45:43 +0000 Subject: [PATCH 0333/1997] Remove AST based optimization duplicate_order_by_and_distinct It was quite some time ago since it was replaced by plan level optimizations: - query_plan_remove_redundant_sorting - query_plan_remove_redundant_distinct --- src/Core/Settings.h | 2 +- src/Interpreters/TreeOptimizer.cpp | 53 -------- ..._duplicate_order_by_and_distinct.reference | 58 --------- .../01305_duplicate_order_by_and_distinct.sql | 123 ------------------ ...t_optimize_for_distributed_table.reference | 4 - ...istinct_optimize_for_distributed_table.sql | 46 ------- 6 files changed, 1 insertion(+), 285 deletions(-) delete mode 100644 tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.reference delete mode 100644 tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.sql delete mode 100644 tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.reference delete mode 100644 tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 3a23127e2fd..c53bed2007a 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -526,7 +526,6 @@ class IColumn; M(Bool, convert_query_to_cnf, false, "Convert SELECT query to CNF", 0) \ M(Bool, optimize_or_like_chain, false, "Optimize multiple OR LIKE into multiMatchAny. This optimization should not be enabled by default, because it defies index analysis in some cases.", 0) \ M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \ - M(Bool, optimize_duplicate_order_by_and_distinct, false, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \ M(Bool, optimize_redundant_functions_in_order_by, true, "Remove functions from ORDER BY if its argument is also in ORDER BY", 0) \ M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \ M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \ @@ -818,6 +817,7 @@ class IColumn; MAKE_OBSOLETE(M, Seconds, drain_timeout, 3) \ MAKE_OBSOLETE(M, UInt64, backup_threads, 16) \ MAKE_OBSOLETE(M, UInt64, restore_threads, 16) \ + MAKE_OBSOLETE(M, Bool, optimize_duplicate_order_by_and_distinct, false) \ /** The section above is for obsolete settings. Do not add anything there. */ diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index c38b3c79026..b6b45c664f9 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -288,13 +288,6 @@ void optimizeDuplicatesInOrderBy(const ASTSelectQuery * select_query) elems = std::move(unique_elems); } -/// Optimize duplicate ORDER BY -void optimizeDuplicateOrderBy(ASTPtr & query, ContextPtr context) -{ - DuplicateOrderByVisitor::Data order_by_data{context}; - DuplicateOrderByVisitor(order_by_data).visit(query); -} - /// Return simple subselect (without UNIONs or JOINs or SETTINGS) if any const ASTSelectQuery * getSimpleSubselect(const ASTSelectQuery & select) { @@ -378,41 +371,6 @@ std::unordered_set getDistinctNames(const ASTSelectQuery & select) return names; } -/// Remove DISTINCT from query if columns are known as DISTINCT from subquery -void optimizeDuplicateDistinct(ASTSelectQuery & select) -{ - if (!select.select() || select.select()->children.empty()) - return; - - const ASTSelectQuery * subselect = getSimpleSubselect(select); - if (!subselect) - return; - - std::unordered_set distinct_names = getDistinctNames(*subselect); - std::unordered_set selected_names; - - /// Check source column names from select list (ignore aliases and table names) - for (const auto & id : select.select()->children) - { - const auto * identifier = id->as(); - if (!identifier) - return; - - const String & name = identifier->shortName(); - if (!distinct_names.contains(name)) - return; /// Not a distinct column, keep DISTINCT for it. - - selected_names.emplace(name); - } - - /// select columns list != distinct columns list - /// SELECT DISTINCT a FROM (SELECT DISTINCT a, b FROM ...)) -- cannot remove DISTINCT - if (selected_names.size() != distinct_names.size()) - return; - - select.distinct = false; -} - /// Replace monotonous functions in ORDER BY if they don't participate in GROUP BY expression, /// has a single argument and not an aggregate functions. void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, ContextPtr context, @@ -811,17 +769,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, && !select_query->group_by_with_cube) optimizeAggregateFunctionsOfGroupByKeys(select_query, query); - /// Remove duplicate ORDER BY and DISTINCT from subqueries. - if (settings.optimize_duplicate_order_by_and_distinct) - { - optimizeDuplicateOrderBy(query, context); - - /// DISTINCT has special meaning in Distributed query with enabled distributed_group_by_no_merge - /// TODO: disable Distributed/remote() tables only - if (!settings.distributed_group_by_no_merge) - optimizeDuplicateDistinct(*select_query); - } - /// Remove functions from ORDER BY if its argument is also in ORDER BY if (settings.optimize_redundant_functions_in_order_by) optimizeRedundantFunctionsInOrderBy(select_query, context); diff --git a/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.reference b/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.reference deleted file mode 100644 index 10f8bbfd392..00000000000 --- a/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.reference +++ /dev/null @@ -1,58 +0,0 @@ -SELECT number -FROM -( - SELECT number - FROM - ( - SELECT DISTINCT number - FROM numbers(3) - ) -) -ORDER BY number ASC -0 -1 -2 -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM - ( - SELECT DISTINCT number - FROM numbers(3) - ORDER BY number ASC - ) - ORDER BY number ASC -) -ORDER BY number ASC -0 -1 -2 -SELECT number -FROM -( - SELECT number - FROM - ( - SELECT DISTINCT number % 2 AS number - FROM numbers(3) - ) -) -ORDER BY number ASC -0 -1 -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM - ( - SELECT DISTINCT number % 2 AS number - FROM numbers(3) - ORDER BY number ASC - ) - ORDER BY number ASC -) -ORDER BY number ASC -0 -1 diff --git a/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.sql b/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.sql deleted file mode 100644 index 3b13b208eb5..00000000000 --- a/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.sql +++ /dev/null @@ -1,123 +0,0 @@ -set optimize_duplicate_order_by_and_distinct = 1; - -EXPLAIN SYNTAX SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT * - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT * - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -set optimize_duplicate_order_by_and_distinct = 0; - -EXPLAIN SYNTAX SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT * - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT * - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -set optimize_duplicate_order_by_and_distinct = 1; - -EXPLAIN SYNTAX SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT number % 2 - AS number - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT number % 2 - AS number - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -set optimize_duplicate_order_by_and_distinct = 0; - -EXPLAIN SYNTAX SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT number % 2 - AS number - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT number % 2 - AS number - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; diff --git a/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.reference b/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.reference deleted file mode 100644 index 44e0be8e356..00000000000 --- a/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.reference +++ /dev/null @@ -1,4 +0,0 @@ -0 -0 -0 -0 diff --git a/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.sql b/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.sql deleted file mode 100644 index 8ef1273c855..00000000000 --- a/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.sql +++ /dev/null @@ -1,46 +0,0 @@ --- Tags: distributed - -set query_plan_remove_redundant_distinct = 1; -set optimize_duplicate_order_by_and_distinct = 0; -SET distributed_group_by_no_merge = 0; - -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM remote('127.0.0.{1,2}', system.numbers) - LIMIT 1 - SETTINGS distributed_group_by_no_merge = 1 -); - -SET distributed_group_by_no_merge = 1; - -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM remote('127.0.0.{1,2}', system.numbers) - LIMIT 1 -); - -set optimize_duplicate_order_by_and_distinct = 0; -SET distributed_group_by_no_merge = 0; - -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM remote('127.0.0.{1,2}', system.numbers) - LIMIT 1 - SETTINGS distributed_group_by_no_merge = 1 -); - -SET distributed_group_by_no_merge = 1; -set optimize_duplicate_order_by_and_distinct = 0; -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM remote('127.0.0.{1,2}', system.numbers) - LIMIT 1 -); From e7d2cc4a1a44e9c52d16ede99612e28342735c15 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 13 Jun 2023 22:11:23 +0000 Subject: [PATCH 0334/1997] add map functions tests --- ...es_aggregate_functions_states.reference.j2 | 48 ++++++++++++++ ...ip_types_aggregate_functions_states.sql.j2 | 64 ++++++++++++++++--- 2 files changed, 102 insertions(+), 10 deletions(-) diff --git a/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.reference.j2 b/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.reference.j2 index 90f98cf63fd..03b0e065151 100644 --- a/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.reference.j2 +++ b/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.reference.j2 @@ -170,3 +170,51 @@ Row 1: any: dd8b:333c::dd8b:333c:0:0 anyHeavy: dd8b:333c::dd8b:333c:0:0 anyLast: dd8b:333c::dd8b:333c:0:0 +----- Map/Map hash / State / ip4 ----- +Row 1: +────── +sumMapState: 9327034461443333306 +minMapState: 17403430892851901033 +maxMapState: 17403430892851901033 +----- Map/Map hash / State / ip6 ----- +Row 1: +────── +sumMapState: 5204122423200337352 +minMapState: 14534921151579960284 +maxMapState: 14534921151579960284 +----- Map/Map finalizeAggregation / State / ip4 ----- +Row 1: +────── +sumMap: {'59.154.201.255':1} +minMap: {'59.154.201.255':1} +maxMap: {'59.154.201.255':1} +----- Map/Map finalizeAggregation / State / ip6 ----- +Row 1: +────── +sumMap: {'dd8b:333c::dd8b:333c:0:0':1} +minMap: {'dd8b:333c::dd8b:333c:0:0':1} +maxMap: {'dd8b:333c::dd8b:333c:0:0':1} +----- Map/Array hash / State / ip4 ----- +Row 1: +────── +sumMapState: 9327034461443333306 +minMapState: 9327034461443333306 +maxMapState: 9327034461443333306 +----- Map/Array hash / State / ip6 ----- +Row 1: +────── +sumMapState: 15555709096566410627 +minMapState: 15555709096566410627 +maxMapState: 15555709096566410627 +----- Map/Array finalizeAggregation / State / ip4 ----- +Row 1: +────── +sumMap: (['59.154.201.255'],[1]) +minMap: (['59.154.201.255'],[1]) +maxMap: (['59.154.201.255'],[1]) +----- Map/Array finalizeAggregation / State / ip6 ----- +Row 1: +────── +sumMap: (['dd8b:333c::dd8b:333c:0:0'],[1]) +minMap: (['dd8b:333c::dd8b:333c:0:0'],[1]) +maxMap: (['dd8b:333c::dd8b:333c:0:0'],[1]) diff --git a/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.sql.j2 b/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.sql.j2 index 7daff5a690f..7d030d4be2d 100644 --- a/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.sql.j2 +++ b/tests/queries/0_stateless/02751_ip_types_aggregate_functions_states.sql.j2 @@ -1,5 +1,4 @@ --- Tags: no-fasttest --- no-fasttest because uniqTheta +-- Tags: no-parallel, no-fasttest {# this test checks backward compatibility of aggregate functions States against IPv4, IPv6 types #} @@ -22,7 +21,6 @@ from ( {{ ip_generator }} ) format Vertical; {% endfor -%} - {% for generator_name, ip_generator in ip_generators.items() %} select '----- finalizeAggregation / State / {{ generator_name }} -----'; @@ -35,7 +33,6 @@ from ( {{ ip_generator }} ) format Vertical; {% endfor -%} - {% for generator_name, ip_generator in ip_generators.items() %} select '----- hash / IfState / {{ generator_name }} -----'; @@ -48,7 +45,6 @@ from ( {{ ip_generator }} ) format Vertical; {% endfor -%} - {% for generator_name, ip_generator in ip_generators.items() %} select '----- finalizeAggregation / IfState / {{ generator_name }} -----'; @@ -61,8 +57,6 @@ from ( {{ ip_generator }} ) format Vertical; {% endfor -%} - - {% set agg_func_list = [ "argMin", "argMax" ] %} {% for generator_name, ip_generator in ip_generators.items() %} @@ -77,8 +71,6 @@ from ( {{ ip_generator }} ) format Vertical; {% endfor -%} - - {% for generator_name, ip_generator in ip_generators.items() %} select '----- Arg / finalizeAggregation / State / {{ generator_name }} -----'; @@ -91,6 +83,8 @@ from ( {{ ip_generator }} ) format Vertical; {% endfor -%} + + {# let's test functions with not deterministic result against 1 row, to make it deterministic #} {% set ip4_generator = "select number::UInt32::IPv4 ip from numbers(999999999,1) order by ip" %} {% set ip6_generator = "SELECT toIPv6(IPv6NumToString(toFixedString(reinterpretAsFixedString(number)||reinterpretAsFixedString(number), 16))) AS ip FROM numbers(1010011101, 1) order by ip" %} @@ -112,7 +106,6 @@ from ( {{ ip_generator }} ) format Vertical; {% endfor -%} - {% for generator_name, ip_generator in ip_generators.items() %} select '----- finalizeAggregation / State / {{ generator_name }} -----'; @@ -124,3 +117,54 @@ from ( {{ ip_generator }} ) format Vertical; {% endfor -%} + + +{% set agg_func_list = [ "sumMap", "minMap", "maxMap" ] %} + +{% for generator_name, ip_generator in ip_generators.items() %} + +select '----- Map/Map hash / State / {{ generator_name }} -----'; +select + {% for func in agg_func_list -%} + cityHash64(hex( {{ func }}State(map(ip, 1::Int64)) )) AS {{ func }}State{{ "," if not loop.last }} + {% endfor -%} +from ( {{ ip_generator }} ) format Vertical; + +{% endfor -%} + + + +{% for generator_name, ip_generator in ip_generators.items() %} + +select '----- Map/Map finalizeAggregation / State / {{ generator_name }} -----'; +select + {% for func in agg_func_list -%} + finalizeAggregation( {{ func }}State(map(ip, 1::Int64)) ) AS {{ func }}{{ "," if not loop.last }} + {% endfor -%} +from ( {{ ip_generator }} ) format Vertical; + +{% endfor -%} + + +{% for generator_name, ip_generator in ip_generators.items() %} + +select '----- Map/Array hash / State / {{ generator_name }} -----'; +select + {% for func in agg_func_list -%} + cityHash64(hex( {{ func }}State([ip], [1::Int64]) )) AS {{ func }}State{{ "," if not loop.last }} + {% endfor -%} +from ( {{ ip_generator }} ) format Vertical; + +{% endfor -%} + + +{% for generator_name, ip_generator in ip_generators.items() %} + +select '----- Map/Array finalizeAggregation / State / {{ generator_name }} -----'; +select + {% for func in agg_func_list -%} + finalizeAggregation( {{ func }}State([ip], [1::Int64]) ) AS {{ func }}{{ "," if not loop.last }} + {% endfor -%} +from ( {{ ip_generator }} ) format Vertical; + +{% endfor -%} From 9652b38a6cd51f8c0f5a65cc70108d126589c793 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 13 Jun 2023 23:10:03 +0000 Subject: [PATCH 0335/1997] Fix CHECK_NAME --- .github/workflows/pull_request.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 9a39b1177cf..59beddac8d5 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -3870,7 +3870,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/integration_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Integration tests (asan) + CHECK_NAME=Integration tests (asan, analyzer) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse RUN_BY_HASH_NUM=0 RUN_BY_HASH_TOTAL=6 @@ -3905,7 +3905,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/integration_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Integration tests (asan) + CHECK_NAME=Integration tests (asan, analyzer) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse RUN_BY_HASH_NUM=1 RUN_BY_HASH_TOTAL=6 @@ -3940,7 +3940,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/integration_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Integration tests (asan) + CHECK_NAME=Integration tests (asan, analyzer) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse RUN_BY_HASH_NUM=2 RUN_BY_HASH_TOTAL=6 @@ -3975,7 +3975,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/integration_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Integration tests (asan) + CHECK_NAME=Integration tests (asan, analyzer) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse RUN_BY_HASH_NUM=3 RUN_BY_HASH_TOTAL=6 @@ -4010,7 +4010,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/integration_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Integration tests (asan) + CHECK_NAME=Integration tests (asan, analyzer) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse RUN_BY_HASH_NUM=4 RUN_BY_HASH_TOTAL=6 @@ -4045,7 +4045,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/integration_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Integration tests (asan) + CHECK_NAME=Integration tests (asan, analyzer) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse RUN_BY_HASH_NUM=5 RUN_BY_HASH_TOTAL=6 From 29b9cba75c18e23f9ee2eb589e5a69e7f46a5054 Mon Sep 17 00:00:00 2001 From: santrancisco Date: Wed, 14 Jun 2023 11:31:09 +1000 Subject: [PATCH 0336/1997] Update CMakeLists.txt with help from Nikita --- contrib/arrow-cmake/CMakeLists.txt | 68 +++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index 16198887075..5fe942d1cd0 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -116,43 +116,79 @@ configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/A # ARROW_ORC + adapters/orc/CMakefiles set(ORC_SRCS "${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.h" - "${ORC_SOURCE_SRC_DIR}/sargs/ExpressionTree.cc" - "${ORC_SOURCE_SRC_DIR}/sargs/Literal.cc" - "${ORC_SOURCE_SRC_DIR}/sargs/PredicateLeaf.cc" - "${ORC_SOURCE_SRC_DIR}/sargs/SargsApplier.cc" - "${ORC_SOURCE_SRC_DIR}/sargs/SearchArgument.cc" - "${ORC_SOURCE_SRC_DIR}/sargs/TruthValue.cc" - "${ORC_SOURCE_SRC_DIR}/Exceptions.cc" - "${ORC_SOURCE_SRC_DIR}/OrcFile.cc" - "${ORC_SOURCE_SRC_DIR}/Reader.cc" + "${ORC_ADDITION_SOURCE_DIR}/orc_proto.pb.cc" + "${ORC_SOURCE_SRC_DIR}/Adaptor.cc" + "${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" + "${ORC_SOURCE_SRC_DIR}/BlockBuffer.cc" + "${ORC_SOURCE_SRC_DIR}/BlockBuffer.hh" + "${ORC_SOURCE_SRC_DIR}/BloomFilter.cc" + "${ORC_SOURCE_SRC_DIR}/BloomFilter.hh" + "${ORC_SOURCE_SRC_DIR}/Bpacking.hh" + "${ORC_SOURCE_SRC_DIR}/BpackingDefault.cc" + "${ORC_SOURCE_SRC_DIR}/BpackingDefault.hh" "${ORC_SOURCE_SRC_DIR}/ByteRLE.cc" + "${ORC_SOURCE_SRC_DIR}/ByteRLE.hh" + "${ORC_SOURCE_SRC_DIR}/CMakeLists.txt" "${ORC_SOURCE_SRC_DIR}/ColumnPrinter.cc" "${ORC_SOURCE_SRC_DIR}/ColumnReader.cc" + "${ORC_SOURCE_SRC_DIR}/ColumnReader.hh" "${ORC_SOURCE_SRC_DIR}/ColumnWriter.cc" + "${ORC_SOURCE_SRC_DIR}/ColumnWriter.hh" "${ORC_SOURCE_SRC_DIR}/Common.cc" "${ORC_SOURCE_SRC_DIR}/Compression.cc" + "${ORC_SOURCE_SRC_DIR}/Compression.hh" + "${ORC_SOURCE_SRC_DIR}/ConvertColumnReader.cc" + "${ORC_SOURCE_SRC_DIR}/ConvertColumnReader.hh" + "${ORC_SOURCE_SRC_DIR}/CpuInfoUtil.cc" + "${ORC_SOURCE_SRC_DIR}/CpuInfoUtil.hh" + "${ORC_SOURCE_SRC_DIR}/Dispatch.hh" + "${ORC_SOURCE_SRC_DIR}/Exceptions.cc" "${ORC_SOURCE_SRC_DIR}/Int128.cc" "${ORC_SOURCE_SRC_DIR}/LzoDecompressor.cc" + "${ORC_SOURCE_SRC_DIR}/LzoDecompressor.hh" "${ORC_SOURCE_SRC_DIR}/MemoryPool.cc" + "${ORC_SOURCE_SRC_DIR}/Murmur3.cc" + "${ORC_SOURCE_SRC_DIR}/Murmur3.hh" + "${ORC_SOURCE_SRC_DIR}/Options.hh" + "${ORC_SOURCE_SRC_DIR}/OrcFile.cc" "${ORC_SOURCE_SRC_DIR}/RLE.cc" + "${ORC_SOURCE_SRC_DIR}/RLE.hh" + "${ORC_SOURCE_SRC_DIR}/RLEV2Util.cc" + "${ORC_SOURCE_SRC_DIR}/RLEV2Util.hh" "${ORC_SOURCE_SRC_DIR}/RLEv1.cc" + "${ORC_SOURCE_SRC_DIR}/RLEv1.hh" + "${ORC_SOURCE_SRC_DIR}/RLEv2.hh" + "${ORC_SOURCE_SRC_DIR}/Reader.cc" + "${ORC_SOURCE_SRC_DIR}/Reader.hh" "${ORC_SOURCE_SRC_DIR}/RleDecoderV2.cc" "${ORC_SOURCE_SRC_DIR}/RleEncoderV2.cc" - "${ORC_SOURCE_SRC_DIR}/RLEV2Util.cc" + "${ORC_SOURCE_SRC_DIR}/SchemaEvolution.cc" + "${ORC_SOURCE_SRC_DIR}/SchemaEvolution.hh" "${ORC_SOURCE_SRC_DIR}/Statistics.cc" + "${ORC_SOURCE_SRC_DIR}/Statistics.hh" "${ORC_SOURCE_SRC_DIR}/StripeStream.cc" + "${ORC_SOURCE_SRC_DIR}/StripeStream.hh" "${ORC_SOURCE_SRC_DIR}/Timezone.cc" + "${ORC_SOURCE_SRC_DIR}/Timezone.hh" "${ORC_SOURCE_SRC_DIR}/TypeImpl.cc" + "${ORC_SOURCE_SRC_DIR}/TypeImpl.hh" + "${ORC_SOURCE_SRC_DIR}/Utils.hh" "${ORC_SOURCE_SRC_DIR}/Vector.cc" "${ORC_SOURCE_SRC_DIR}/Writer.cc" - "${ORC_SOURCE_SRC_DIR}/Adaptor.cc" - "${ORC_SOURCE_SRC_DIR}/BloomFilter.cc" - "${ORC_SOURCE_SRC_DIR}/Murmur3.cc" - "${ORC_SOURCE_SRC_DIR}/BlockBuffer.cc" - "${ORC_SOURCE_SRC_DIR}/wrap/orc-proto-wrapper.cc" "${ORC_SOURCE_SRC_DIR}/io/InputStream.cc" + "${ORC_SOURCE_SRC_DIR}/io/InputStream.hh" "${ORC_SOURCE_SRC_DIR}/io/OutputStream.cc" - "${ORC_ADDITION_SOURCE_DIR}/orc_proto.pb.cc" + "${ORC_SOURCE_SRC_DIR}/io/OutputStream.hh" + "${ORC_SOURCE_SRC_DIR}/sargs/ExpressionTree.cc" + "${ORC_SOURCE_SRC_DIR}/sargs/ExpressionTree.hh" + "${ORC_SOURCE_SRC_DIR}/sargs/Literal.cc" + "${ORC_SOURCE_SRC_DIR}/sargs/PredicateLeaf.cc" + "${ORC_SOURCE_SRC_DIR}/sargs/PredicateLeaf.hh" + "${ORC_SOURCE_SRC_DIR}/sargs/SargsApplier.cc" + "${ORC_SOURCE_SRC_DIR}/sargs/SargsApplier.hh" + "${ORC_SOURCE_SRC_DIR}/sargs/SearchArgument.cc" + "${ORC_SOURCE_SRC_DIR}/sargs/SearchArgument.hh" + "${ORC_SOURCE_SRC_DIR}/sargs/TruthValue.cc" ) add_library(_orc ${ORC_SRCS}) From 6ffdfb8b6b8656dfb2ef004349a3cad82dd03e1f Mon Sep 17 00:00:00 2001 From: santrancisco Date: Wed, 14 Jun 2023 13:29:05 +1000 Subject: [PATCH 0337/1997] test removing CpuInfoUtil.cc and see if build breaks :p --- contrib/arrow-cmake/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index 5fe942d1cd0..01e9fc5fca9 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -139,8 +139,6 @@ set(ORC_SRCS "${ORC_SOURCE_SRC_DIR}/Compression.hh" "${ORC_SOURCE_SRC_DIR}/ConvertColumnReader.cc" "${ORC_SOURCE_SRC_DIR}/ConvertColumnReader.hh" - "${ORC_SOURCE_SRC_DIR}/CpuInfoUtil.cc" - "${ORC_SOURCE_SRC_DIR}/CpuInfoUtil.hh" "${ORC_SOURCE_SRC_DIR}/Dispatch.hh" "${ORC_SOURCE_SRC_DIR}/Exceptions.cc" "${ORC_SOURCE_SRC_DIR}/Int128.cc" From 0a1d0c4abd0f8ece5af6f3a3d5ccc5207dfff0f2 Mon Sep 17 00:00:00 2001 From: Manas Alekar Date: Tue, 13 Jun 2023 23:36:14 -0700 Subject: [PATCH 0338/1997] Fix one stateless test. --- src/Client/ClientBase.cpp | 5 ++--- tests/queries/0_stateless/02346_into_outfile_and_stdout.sh | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index fc108b8f57d..b746d46148c 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -596,14 +596,13 @@ try range.second); } - std::error_code ec; - if (std::filesystem::is_regular_file(out_file, ec)) + if (fs::exists(out_file)) { if (!query_with_output->is_outfile_append && !query_with_output->is_outfile_truncate) { throw Exception( ErrorCodes::CANNOT_OPEN_FILE, - "File {} exists, consider using 'INTO OUTFILE ... APPEND' or 'INTO OUTFILE ... TRUNCATE' if appropriate.", + "File {} exists, consider using APPEND or TRUNCATE if appropriate.", out_file); } } diff --git a/tests/queries/0_stateless/02346_into_outfile_and_stdout.sh b/tests/queries/0_stateless/02346_into_outfile_and_stdout.sh index 021dc9125d4..8ec086c97ef 100755 --- a/tests/queries/0_stateless/02346_into_outfile_and_stdout.sh +++ b/tests/queries/0_stateless/02346_into_outfile_and_stdout.sh @@ -66,7 +66,7 @@ performBadQuery "bad_query_incorrect_usage" "SELECT 1, 2, 3 INTO OUTFILE AND STD performBadQuery "bad_query_no_into_outfile" "SELECT 1, 2, 3 AND STDOUT'" "SYNTAX_ERROR" -performFileExists "bad_query_file_exists" "SELECT 1, 2, 3 INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_and_stdout_bad_query_file_exists.out' AND STDOUT" "File exists. (CANNOT_OPEN_FILE)" +performFileExists "bad_query_file_exists" "SELECT 1, 2, 3 INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_and_stdout_bad_query_file_exists.out' AND STDOUT" "File ${CLICKHOUSE_TMP}/test_into_outfile_and_stdout_bad_query_file_exists exists, consider using APPEND or TRUNCATE if appropriate. (CANNOT_OPEN_FILE)" performCompression "compression" "SELECT * FROM (SELECT 'Hello, World! From clickhouse.') INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_and_stdout_compression.gz' AND STDOUT COMPRESSION 'GZ' FORMAT TabSeparated" From 197c76107eaa6ecd7ad00391d86854d7e8b3fcc8 Mon Sep 17 00:00:00 2001 From: santrancisco Date: Wed, 14 Jun 2023 18:38:09 +1000 Subject: [PATCH 0339/1997] Re-added CpuInfoUtil to CMakeList.txt --- contrib/arrow-cmake/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index 01e9fc5fca9..5fe942d1cd0 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -139,6 +139,8 @@ set(ORC_SRCS "${ORC_SOURCE_SRC_DIR}/Compression.hh" "${ORC_SOURCE_SRC_DIR}/ConvertColumnReader.cc" "${ORC_SOURCE_SRC_DIR}/ConvertColumnReader.hh" + "${ORC_SOURCE_SRC_DIR}/CpuInfoUtil.cc" + "${ORC_SOURCE_SRC_DIR}/CpuInfoUtil.hh" "${ORC_SOURCE_SRC_DIR}/Dispatch.hh" "${ORC_SOURCE_SRC_DIR}/Exceptions.cc" "${ORC_SOURCE_SRC_DIR}/Int128.cc" From 1663905acdafcce577d58852a0835fdb760750e8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 14 Jun 2023 08:48:16 +0000 Subject: [PATCH 0340/1997] Hold context in pipeline resources. --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 1 + src/QueryPipeline/QueryPipelineBuilder.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 8f610eb4380..bbea1d38fb1 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1884,6 +1884,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons processors.emplace_back(processor); pipeline.init(std::move(pipe)); + pipeline.addContext(context); // Attach QueryIdHolder if needed if (query_id_holder) pipeline.setQueryIdHolder(std::move(query_id_holder)); diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index da8443a7e33..134de59520d 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -185,6 +185,7 @@ public: void addResources(QueryPlanResourceHolder resources_) { resources = std::move(resources_); } void setQueryIdHolder(std::shared_ptr query_id_holder) { resources.query_id_holders.emplace_back(std::move(query_id_holder)); } + void addContext(ContextPtr context) { resources.interpreter_context.emplace_back(std::move(context)); } /// Convert query pipeline to pipe. static Pipe getPipe(QueryPipelineBuilder pipeline, QueryPlanResourceHolder & resources); From 918b8c4585025e8a357115945cce501c05d31be1 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 14 Jun 2023 10:51:59 +0200 Subject: [PATCH 0341/1997] Updated filename in test --- tests/integration/test_storage_azure_blob_storage/test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 3d9c751be3c..e99ae72eb8b 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -583,13 +583,13 @@ def test_partition_by_tf(cluster): table_format = "column1 UInt32, column2 UInt32, column3 UInt32" partition_by = "column3" values = "(1, 2, 3), (3, 2, 1), (78, 43, 45)" - filename = "test_tf_{_partition_id}.csv" + filename = "test_partition_tf_{_partition_id}.csv" azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', '{filename}', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', '{table_format}') PARTITION BY {partition_by} VALUES {values}", ) - assert "1,2,3\n" == get_azure_file_content("test_tf_3.csv") - assert "3,2,1\n" == get_azure_file_content("test_tf_1.csv") - assert "78,43,45\n" == get_azure_file_content("test_tf_45.csv") + assert "1,2,3\n" == get_azure_file_content("test_partition_tf_3.csv") + assert "3,2,1\n" == get_azure_file_content("test_partition_tf_1.csv") + assert "78,43,45\n" == get_azure_file_content("test_partition_tfs_45.csv") From c98a194b571e8c39504afc829fa91492f4dcbe2d Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 14 Jun 2023 11:00:11 +0200 Subject: [PATCH 0342/1997] Updated unique names for test to avoid same names by random numbers --- tests/integration/test_storage_azure_blob_storage/test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index e99ae72eb8b..e2077f8face 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -300,10 +300,10 @@ def test_put_get_with_globs(cluster): azure_query( node, - f"CREATE TABLE test_{i}_{j} ({table_format}) Engine = AzureBlobStorage(azure_conf2, container='cont', blob_path='{path}', format='CSV')", + f"CREATE TABLE test_put_{i}_{j} ({table_format}) Engine = AzureBlobStorage(azure_conf2, container='cont', blob_path='{path}', format='CSV')", ) - query = f"insert into test_{i}_{j} VALUES {values}" + query = f"insert into test_put_{i}_{j} VALUES {values}" azure_query(node, query) azure_query( @@ -332,9 +332,9 @@ def test_azure_glob_scheherazade(cluster): unique_num = random.randint(1, 10000) azure_query( node, - f"CREATE TABLE test_{i}_{unique_num} ({table_format}) Engine = AzureBlobStorage(azure_conf2, container='cont', blob_path='{path}', format='CSV')", + f"CREATE TABLE test_scheherazade_{i}_{unique_num} ({table_format}) Engine = AzureBlobStorage(azure_conf2, container='cont', blob_path='{path}', format='CSV')", ) - query = f"insert into test_{i}_{unique_num} VALUES {values}" + query = f"insert into test_scheherazade_{i}_{unique_num} VALUES {values}" azure_query(node, query) jobs.append( From f6bad2c064efeb997755be8b9f313a3859a6f81f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 14 Jun 2023 09:13:38 +0000 Subject: [PATCH 0343/1997] Automatic style fix --- tests/integration/test_storage_azure_blob_storage/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index e2077f8face..0002ccbf483 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -334,7 +334,9 @@ def test_azure_glob_scheherazade(cluster): node, f"CREATE TABLE test_scheherazade_{i}_{unique_num} ({table_format}) Engine = AzureBlobStorage(azure_conf2, container='cont', blob_path='{path}', format='CSV')", ) - query = f"insert into test_scheherazade_{i}_{unique_num} VALUES {values}" + query = ( + f"insert into test_scheherazade_{i}_{unique_num} VALUES {values}" + ) azure_query(node, query) jobs.append( From f9f1e870c8468b829b4aa449c8a9b3736b733056 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 14 Jun 2023 10:09:01 +0000 Subject: [PATCH 0344/1997] Fix build --- contrib/azure-cmake/CMakeLists.txt | 2 +- programs/keeper/CMakeLists.txt | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/contrib/azure-cmake/CMakeLists.txt b/contrib/azure-cmake/CMakeLists.txt index 887122e7653..a1dd4664e79 100644 --- a/contrib/azure-cmake/CMakeLists.txt +++ b/contrib/azure-cmake/CMakeLists.txt @@ -1,6 +1,6 @@ option (ENABLE_AZURE_BLOB_STORAGE "Enable Azure blob storage" ${ENABLE_LIBRARIES}) -if (NOT ENABLE_AZURE_BLOB_STORAGE OR BUILD_STANDALONE_KEEPER OR OS_FREEBSD OR (NOT ARCH_AMD64)) +if (NOT ENABLE_AZURE_BLOB_STORAGE OR OS_FREEBSD OR (NOT ARCH_AMD64)) message(STATUS "Not using Azure blob storage") return() endif() diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 90f4f870df6..a3a8a769bff 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -148,7 +148,6 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferFromTemporaryFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferWithFinalizeCallback.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/AsynchronousBoundedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getThreadPoolReader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolRemoteFSReader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolReader.cpp From ed318d10353101c76a4493ccd9fa6c239868abd3 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 14 Jun 2023 10:35:36 +0000 Subject: [PATCH 0345/1997] Add input_format_csv_ignore_extra_columns setting (prototype) --- src/Core/Settings.h | 1 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + src/Processors/Formats/Impl/CSVRowInputFormat.cpp | 15 ++++++++++++++- tests/queries/0_stateless/00301_csv.reference | 4 ++++ tests/queries/0_stateless/00301_csv.sh | 10 ++++++++++ 6 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index bc879b9bdf6..d38f7767252 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -835,6 +835,7 @@ class IColumn; M(Bool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).", 0) \ M(Bool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, -WithNames, -WithNamesAndTypes formats).", IMPORTANT) \ M(Bool, input_format_csv_empty_as_default, true, "Treat empty fields in CSV input as default values.", 0) \ + M(Bool, input_format_csv_ignore_extra_columns, false, "", 0) \ M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \ M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices.", 0) \ M(Bool, input_format_null_as_default, true, "Initialize null fields with default values if the data type of this field is not nullable and it is supported by the input format", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index c235afae57e..0218d268c51 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -63,6 +63,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.delimiter = settings.format_csv_delimiter; format_settings.csv.tuple_delimiter = settings.format_csv_delimiter; format_settings.csv.empty_as_default = settings.input_format_csv_empty_as_default; + format_settings.csv.ignore_extra_columns = settings.input_format_csv_ignore_extra_columns; format_settings.csv.enum_as_number = settings.input_format_csv_enum_as_number; format_settings.csv.null_representation = settings.format_csv_null_representation; format_settings.csv.arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 787c1a64759..3bc53140fe5 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -128,6 +128,7 @@ struct FormatSettings bool allow_single_quotes = true; bool allow_double_quotes = true; bool empty_as_default = false; + bool ignore_extra_columns = false; bool crlf_end_of_line = false; bool enum_as_number = false; bool arrays_as_nested_csv = false; diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index ae75240e0ee..0cc5889b732 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -302,14 +302,27 @@ bool CSVFormatReader::readField( return false; } + auto skip_all = [&]() + { + if (!is_last_file_column || !format_settings.csv.ignore_extra_columns) + { + return; + } + //std::cout << "skip !!!" << std::endl; + buf->position() = find_first_symbols<'\n'>(buf->position(), buf->buffer().end()); + }; if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) { /// If value is null but type is not nullable then use default value instead. - return SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization); + bool res = SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization); + skip_all(); + return res; } /// Read the column normally. serialization->deserializeTextCSV(column, *buf, format_settings); + + skip_all(); return true; } diff --git a/tests/queries/0_stateless/00301_csv.reference b/tests/queries/0_stateless/00301_csv.reference index 9863da4b640..61279f3b84a 100644 --- a/tests/queries/0_stateless/00301_csv.reference +++ b/tests/queries/0_stateless/00301_csv.reference @@ -11,3 +11,7 @@ default-eof 1 2019-06-19 2016-01-01 01:02:03 NUL 2016-01-02 01:02:03 Nhello \N \N +Hello world 1 2016-01-01 +Hello world 2 2016-01-02 +Hello world 3 2016-01-03 +Hello world 4 2016-01-04 diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index b2618343dc0..e99c39a0f6f 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -37,3 +37,13 @@ echo 'NULL, NULL $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s NULLS LAST"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; + + +$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 1, d Date DEFAULT '2019-06-19') ENGINE = Memory"; + +echo 'Hello world, 1, 2016-01-01 +Hello world, 2 ,2016-01-02, +Hello world, 3 ,2016-01-03, 2016-01-13 +Hello world, 4 ,2016-01-04, 2016-01-14, 2016-01-15' | $CLICKHOUSE_CLIENT --input_format_csv_empty_as_default=1 --input_format_csv_ignore_extra_columns=1 --query="INSERT INTO csv FORMAT CSV"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s, n"; +$CLICKHOUSE_CLIENT --query="DROP TABLE csv"; \ No newline at end of file From 2b40734900f121f60ad50e37c2c6fa2f9376e3d5 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 12 Jun 2023 14:29:16 +0200 Subject: [PATCH 0346/1997] use const-size tasks in prefetch pool --- .../MergeTree/MergeTreePrefetchedReadPool.cpp | 107 +++++++----------- 1 file changed, 43 insertions(+), 64 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index 63a205a1a61..f0dd2123ca4 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -1,18 +1,18 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include namespace ProfileEvents @@ -296,31 +296,12 @@ MergeTreeReadTaskPtr MergeTreePrefetchedReadPool::getTask(size_t thread) return task; } -size_t MergeTreePrefetchedReadPool::getApproxSizeOfGranule(const IMergeTreeDataPart & part) const +size_t getApproximateSizeOfGranule(const IMergeTreeDataPart & part, const Names & columns_to_read) { - const auto & columns = part.getColumns(); - auto all_columns_are_fixed_size = columns.end() == std::find_if( - columns.begin(), columns.end(), - [](const auto & col){ return col.type->haveMaximumSizeOfValue() == false; }); - - if (all_columns_are_fixed_size) - { - size_t approx_size = 0; - for (const auto & col : columns) - approx_size += col.type->getMaximumSizeOfValueInMemory() * fixed_index_granularity; - - if (!index_granularity_bytes) - return approx_size; - - return std::min(index_granularity_bytes, approx_size); - } - - const size_t approx_size = static_cast(std::round(static_cast(part.getBytesOnDisk()) / part.getMarksCount())); - - if (!index_granularity_bytes) - return approx_size; - - return std::min(index_granularity_bytes, approx_size); + ColumnSize columns_size{}; + for (const auto & col_name : columns_to_read) + columns_size.add(part.getColumnSize(col_name)); + return columns_size.data_compressed / part.getMarksCount(); } MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInfos( @@ -347,7 +328,7 @@ MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInf for (const auto & range : part.ranges) part_info->sum_marks += range.end - range.begin; - part_info->approx_size_of_mark = getApproxSizeOfGranule(*part_info->data_part); + part_info->approx_size_of_mark = getApproximateSizeOfGranule(*part_info->data_part, column_names); const auto task_columns = getReadTaskColumns( part_reader_info, @@ -357,7 +338,7 @@ MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInf prewhere_info, actions_settings, reader_settings, - /*with_subcolumns=*/ true); + /* with_subcolumns */ true); part_info->size_predictor = !predict_block_size_bytes ? nullptr @@ -421,10 +402,6 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr } size_t min_prefetch_step_marks = 0; - if (settings.filesystem_prefetches_limit && settings.filesystem_prefetches_limit < sum_marks) - { - min_prefetch_step_marks = static_cast(std::round(static_cast(sum_marks) / settings.filesystem_prefetches_limit)); - } for (const auto & part : parts_infos) { @@ -437,12 +414,6 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr part->prefetch_step_marks = std::max( 1, static_cast(std::round(static_cast(settings.filesystem_prefetch_step_bytes) / part->approx_size_of_mark))); } - else - { - /// Experimentally derived ratio. - part->prefetch_step_marks = static_cast( - std::round(std::pow(std::max(1, static_cast(std::round(sum_marks / 1000))), double(1.5)))); - } /// This limit is important to avoid spikes of slow aws getObject requests when parallelizing within one file. /// (The default is taken from here https://docs.aws.amazon.com/whitepapers/latest/s3-optimizing-performance-best-practices/use-byte-range-fetches.html). @@ -450,13 +421,13 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr && settings.filesystem_prefetch_min_bytes_for_single_read_task && part->approx_size_of_mark < settings.filesystem_prefetch_min_bytes_for_single_read_task) { - - const size_t new_min_prefetch_step_marks = static_cast( + const size_t min_prefetch_step_marks_by_total_cols = static_cast( std::ceil(static_cast(settings.filesystem_prefetch_min_bytes_for_single_read_task) / part->approx_size_of_mark)); + /// At least one task to start working on it right now and another one to prefetch in the meantime. + const size_t new_min_prefetch_step_marks = std::min(min_prefetch_step_marks_by_total_cols, sum_marks / threads / 2); if (min_prefetch_step_marks < new_min_prefetch_step_marks) { - LOG_TEST( - log, "Increasing min prefetch step from {} to {}", min_prefetch_step_marks, new_min_prefetch_step_marks); + LOG_DEBUG(log, "Increasing min prefetch step from {} to {}", min_prefetch_step_marks, new_min_prefetch_step_marks); min_prefetch_step_marks = new_min_prefetch_step_marks; } @@ -464,25 +435,33 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr if (part->prefetch_step_marks < min_prefetch_step_marks) { - LOG_TEST( - log, "Increasing prefetch step from {} to {} because of the prefetches limit {}", - part->prefetch_step_marks, min_prefetch_step_marks, settings.filesystem_prefetches_limit); + LOG_DEBUG(log, "Increasing prefetch step from {} to {}", part->prefetch_step_marks, min_prefetch_step_marks); part->prefetch_step_marks = min_prefetch_step_marks; } - LOG_TEST(log, - "Part: {}, sum_marks: {}, approx mark size: {}, prefetch_step_bytes: {}, prefetch_step_marks: {}, (ranges: {})", - part->data_part->name, part->sum_marks, part->approx_size_of_mark, - settings.filesystem_prefetch_step_bytes, part->prefetch_step_marks, toString(part->ranges)); + LOG_DEBUG( + log, + "Part: {}, sum_marks: {}, approx mark size: {}, prefetch_step_bytes: {}, prefetch_step_marks: {}, (ranges: {})", + part->data_part->name, + part->sum_marks, + part->approx_size_of_mark, + settings.filesystem_prefetch_step_bytes, + part->prefetch_step_marks, + toString(part->ranges)); } const size_t min_marks_per_thread = (sum_marks - 1) / threads + 1; LOG_DEBUG( log, - "Sum marks: {}, threads: {}, min_marks_per_thread: {}, result prefetch step marks: {}, prefetches limit: {}, total_size_approx: {}", - sum_marks, threads, min_marks_per_thread, settings.filesystem_prefetch_step_bytes, settings.filesystem_prefetches_limit, total_size_approx); + "Sum marks: {}, threads: {}, min_marks_per_thread: {}, min prefetch step marks: {}, prefetches limit: {}, total_size_approx: {}", + sum_marks, + threads, + min_marks_per_thread, + min_prefetch_step_marks, + settings.filesystem_prefetches_limit, + total_size_approx); size_t allowed_memory_usage = settings.filesystem_prefetch_max_memory_usage; if (!allowed_memory_usage) @@ -492,6 +471,7 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr : std::nullopt; ThreadsTasks result_threads_tasks; + size_t total_tasks = 0; for (size_t i = 0, part_idx = 0; i < threads && part_idx < parts_infos.size(); ++i) { auto need_marks = min_marks_per_thread; @@ -606,12 +586,11 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr ++priority.value; result_threads_tasks[i].push_back(std::move(read_task)); + ++total_tasks; } } - LOG_TEST( - log, "Result tasks {} for {} threads: {}", - result_threads_tasks.size(), threads, dumpTasks(result_threads_tasks)); + LOG_TEST(log, "Result tasks {} for {} threads: {}", total_tasks, threads, dumpTasks(result_threads_tasks)); return result_threads_tasks; } From e88fc3989534986e78561a967a9263eda7548d3f Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 12 Jun 2023 14:32:09 +0200 Subject: [PATCH 0347/1997] cosmetics --- .../IO/AsynchronousBoundedReadBuffer.cpp | 23 +++++++++++-------- .../IO/CachedOnDiskReadBufferFromFile.cpp | 4 ++++ 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp index f9bd68222ae..6651658e156 100644 --- a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp +++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp @@ -33,6 +33,15 @@ namespace ProfileEvents extern const Event RemoteFSBuffers; } +namespace +{ +size_t chooseBufferSizeForRemoteReading(const DB::ReadSettings & settings, size_t file_size) +{ + /// Buffers used for prefetch or pre-download better to have enough size, but not bigger than the whole file. + return std::min(std::max(settings.prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE), file_size); +} +} + namespace DB { @@ -42,23 +51,17 @@ namespace ErrorCodes extern const int ARGUMENT_OUT_OF_BOUND; } -static size_t chooseBufferSize(const ReadSettings & settings, size_t file_size) -{ - /// Buffers used for prefetch or pre-download better to have enough size, but not bigger than the whole file. - return std::min(std::max(settings.prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE), file_size); -} - AsynchronousBoundedReadBuffer::AsynchronousBoundedReadBuffer( ImplPtr impl_, IAsynchronousReader & reader_, const ReadSettings & settings_, AsyncReadCountersPtr async_read_counters_, FilesystemReadPrefetchesLogPtr prefetches_log_) - : ReadBufferFromFileBase(chooseBufferSize(settings_, impl_->getFileSize()), nullptr, 0) + : ReadBufferFromFileBase(chooseBufferSizeForRemoteReading(settings_, impl_->getFileSize()), nullptr, 0) , impl(std::move(impl_)) , read_settings(settings_) , reader(reader_) - , prefetch_buffer(chooseBufferSize(settings_, impl->getFileSize())) + , prefetch_buffer(chooseBufferSizeForRemoteReading(read_settings, impl->getFileSize())) , query_id(CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr ? CurrentThread::getQueryId() : "") , current_reader_id(getRandomASCIIString(8)) , log(&Poco::Logger::get("AsynchronousBoundedReadBuffer")) @@ -111,7 +114,7 @@ void AsynchronousBoundedReadBuffer::prefetch(Priority priority) last_prefetch_info.submit_time = std::chrono::system_clock::now(); last_prefetch_info.priority = priority; - chassert(prefetch_buffer.size() == chooseBufferSize(read_settings, impl->getFileSize())); + chassert(prefetch_buffer.size() == chooseBufferSizeForRemoteReading(read_settings, impl->getFileSize())); prefetch_future = asyncReadInto(prefetch_buffer.data(), prefetch_buffer.size(), priority); ProfileEvents::increment(ProfileEvents::RemoteFSPrefetches); } @@ -190,7 +193,7 @@ bool AsynchronousBoundedReadBuffer::nextImpl() { ProfileEventTimeIncrement watch(ProfileEvents::SynchronousRemoteReadWaitMicroseconds); - chassert(memory.size() == chooseBufferSize(read_settings, impl->getFileSize())); + chassert(memory.size() == chooseBufferSizeForRemoteReading(read_settings, impl->getFileSize())); std::tie(size, offset) = impl->readInto(memory.data(), memory.size(), file_offset_of_buffer_end, bytes_to_ignore); ProfileEvents::increment(ProfileEvents::RemoteFSUnprefetchedReads); diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 6317aba20e9..bfde6d0984c 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -1085,6 +1085,10 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() first_offset, file_segments->toString()); + /// Release buffer a little bit earlier. + if (read_until_position == file_offset_of_buffer_end) + implementation_buffer.reset(); + return result; } From 1d33043fe673d5ebc86b68fbbdb563c1cbcdbb0f Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 12 Jun 2023 16:18:47 +0200 Subject: [PATCH 0348/1997] changes around buffer sizes --- .../IO/AsynchronousBoundedReadBuffer.cpp | 9 ---- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 41 ++++++++++++++----- src/Disks/IO/ReadBufferFromRemoteFSGather.h | 1 + 3 files changed, 31 insertions(+), 20 deletions(-) diff --git a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp index 6651658e156..86ee541dcbd 100644 --- a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp +++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp @@ -33,15 +33,6 @@ namespace ProfileEvents extern const Event RemoteFSBuffers; } -namespace -{ -size_t chooseBufferSizeForRemoteReading(const DB::ReadSettings & settings, size_t file_size) -{ - /// Buffers used for prefetch or pre-download better to have enough size, but not bigger than the whole file. - return std::min(std::max(settings.prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE), file_size); -} -} - namespace DB { diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index eb9c509e459..537c0cf1be7 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -2,14 +2,27 @@ #include +#include #include #include -#include +#include #include -#include -#include #include +#include +#include +using namespace DB; + + +namespace +{ +bool withCache(const ReadSettings & settings) +{ + return settings.remote_fs_cache && settings.enable_filesystem_cache + && (!CurrentThread::getQueryId().empty() || settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache + || !settings.avoid_readthrough_cache_outside_query_context); +} +} namespace DB { @@ -18,29 +31,35 @@ namespace ErrorCodes extern const int CANNOT_SEEK_THROUGH_FILE; } +size_t chooseBufferSizeForRemoteReading(const DB::ReadSettings & settings, size_t file_size) +{ + /// Only when cache is used we could download bigger portions of FileSegments than what we actually gonna read within particular task. + if (!withCache(settings)) + return settings.remote_fs_buffer_size; + + /// Buffers used for prefetch and pre-download better to have enough size, but not bigger than the whole file. + return std::min(std::max(settings.remote_fs_buffer_size, DBMS_DEFAULT_BUFFER_SIZE), file_size); +} + ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather( ReadBufferCreator && read_buffer_creator_, const StoredObjects & blobs_to_read_, const ReadSettings & settings_, std::shared_ptr cache_log_, bool use_external_buffer_) - : ReadBufferFromFileBase(use_external_buffer_ ? 0 : settings_.remote_fs_buffer_size, nullptr, 0) + : ReadBufferFromFileBase( + use_external_buffer_ ? 0 : chooseBufferSizeForRemoteReading(settings_, getTotalSize(blobs_to_read_)), nullptr, 0) , settings(settings_) , blobs_to_read(blobs_to_read_) , read_buffer_creator(std::move(read_buffer_creator_)) , cache_log(settings.enable_filesystem_cache_log ? cache_log_ : nullptr) - , query_id(CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr ? CurrentThread::getQueryId() : "") + , query_id(CurrentThread::getQueryId()) , use_external_buffer(use_external_buffer_) + , with_cache(withCache(settings)) , log(&Poco::Logger::get("ReadBufferFromRemoteFSGather")) { if (!blobs_to_read.empty()) current_object = blobs_to_read.front(); - - with_cache = settings.remote_fs_cache - && settings.enable_filesystem_cache - && (!query_id.empty() - || settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache - || !settings.avoid_readthrough_cache_outside_query_context); } SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(const StoredObject & object) diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index 272ed2b3ac1..9bf55ab69ce 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -86,4 +86,5 @@ private: Poco::Logger * log; }; +size_t chooseBufferSizeForRemoteReading(const DB::ReadSettings & settings, size_t file_size); } From 1dddcc94726bfca062da2af1b9880df5fa5e4268 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 12 Jun 2023 16:19:05 +0200 Subject: [PATCH 0349/1997] use connection pool --- src/Common/PoolBase.h | 88 +++++++++++++------- src/Disks/ObjectStorages/S3/diskSettings.cpp | 3 + src/IO/HTTPCommon.cpp | 59 ++++++++++--- src/IO/HTTPCommon.h | 16 +++- src/IO/ReadBufferFromS3.cpp | 50 ++++++++++- src/IO/ReadBufferFromS3.h | 6 +- src/IO/S3/PocoHTTPClient.cpp | 40 +++++++-- src/IO/S3/PocoHTTPClient.h | 25 ++++++ src/IO/S3/SessionAwareIOStream.h | 4 + 9 files changed, 239 insertions(+), 52 deletions(-) diff --git a/src/Common/PoolBase.h b/src/Common/PoolBase.h index 8cabb472d8f..5575b56f299 100644 --- a/src/Common/PoolBase.h +++ b/src/Common/PoolBase.h @@ -1,9 +1,11 @@ #pragma once -#include #include -#include +#include +#include +#include #include +#include #include #include @@ -15,14 +17,6 @@ namespace ProfileEvents extern const Event ConnectionPoolIsFullMicroseconds; } -namespace DB -{ - namespace ErrorCodes - { - extern const int LOGICAL_ERROR; - } -} - /** A class from which you can inherit and get a pool of something. Used for database connection pools. * Descendant class must provide a method for creating a new object to place in the pool. */ @@ -35,6 +29,22 @@ public: using ObjectPtr = std::shared_ptr; using Ptr = std::shared_ptr>; + enum class BehaviourOnLimit + { + /** + * Default behaviour - when limit on pool size is reached, callers will wait until object will be returned back in pool. + */ + Wait, + + /** + * If no free objects in pool - allocate a new object, but not store it in pool. + * This behaviour is needed when we simply don't want to waste time waiting or if we cannot guarantee that query could be processed using fixed amount of connections. + * For example, when we read from table on s3, one GetObject request corresponds to the whole FileSystemCache segment. This segments are shared between different + * reading tasks, so in general case connection could be taken from pool by one task and returned back by another one. And these tasks are processed completely independently. + */ + AllocateNewBypassingPool, + }; + private: /** The object with the flag, whether it is currently used. */ @@ -89,37 +99,53 @@ public: Object & operator*() && = delete; const Object & operator*() const && = delete; - Object * operator->() & { return &*data->data.object; } - const Object * operator->() const & { return &*data->data.object; } - Object & operator*() & { return *data->data.object; } - const Object & operator*() const & { return *data->data.object; } + Object * operator->() & { return castToObjectPtr(); } + const Object * operator->() const & { return castToObjectPtr(); } + Object & operator*() & { return *castToObjectPtr(); } + const Object & operator*() const & { return *castToObjectPtr(); } /** * Expire an object to make it reallocated later. */ void expire() { - data->data.is_expired = true; + if (data.index() == 1) + std::get<1>(data)->data.is_expired = true; } - bool isNull() const { return data == nullptr; } - - PoolBase * getPool() const - { - if (!data) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Attempt to get pool from uninitialized entry"); - return &data->data.pool; - } + bool isNull() const { return data.index() == 0 ? !std::get<0>(data) : !std::get<1>(data); } private: - std::shared_ptr data; + /** + * Plain object will be stored instead of PoolEntryHelper if fallback was made in get() (see BehaviourOnLimit::AllocateNewBypassingPool). + */ + std::variant> data; - explicit Entry(PooledObject & object) : data(std::make_shared(object)) {} + explicit Entry(ObjectPtr && object) : data(std::move(object)) { } + + explicit Entry(PooledObject & object) : data(std::make_shared(object)) { } + + auto castToObjectPtr() const + { + return std::visit( + [](const auto & ptr) + { + using T = std::decay_t; + if constexpr (std::is_same_v) + return ptr.get(); + else + return ptr->data.object.get(); + }, + data); + } }; virtual ~PoolBase() = default; - /** Allocates the object. Wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite. */ + /** Allocates the object. + * If 'behaviour_on_limit' is Wait - wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite. + * If 'behaviour_on_limit' is AllocateNewBypassingPool and there is no free object - a new object will be created but not stored in the pool. + */ Entry get(Poco::Timespan::TimeDiff timeout) { std::unique_lock lock(mutex); @@ -150,6 +176,9 @@ public: return Entry(*items.back()); } + if (behaviour_on_limit == BehaviourOnLimit::AllocateNewBypassingPool) + return Entry(allocObject()); + Stopwatch blocked; if (timeout < 0) { @@ -184,6 +213,8 @@ private: /** The maximum size of the pool. */ unsigned max_items; + BehaviourOnLimit behaviour_on_limit; + /** Pool. */ Objects items; @@ -192,11 +223,10 @@ private: std::condition_variable available; protected: - Poco::Logger * log; - PoolBase(unsigned max_items_, Poco::Logger * log_) - : max_items(max_items_), log(log_) + PoolBase(unsigned max_items_, Poco::Logger * log_, BehaviourOnLimit behaviour_on_limit_ = BehaviourOnLimit::Wait) + : max_items(max_items_), behaviour_on_limit(behaviour_on_limit_), log(log_) { items.reserve(max_items); } diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 409eb2a3dc3..fe57fb24bbd 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -132,6 +132,9 @@ std::unique_ptr getClient( client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 30000); client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100); client_configuration.endpointOverride = uri.endpoint; + client_configuration.http_keep_alive_timeout_ms = config.getUInt(config_prefix + ".http_keep_alive_timeout_ms", 10000); + client_configuration.http_connection_pool_size = config.getUInt(config_prefix + ".http_connection_pool_size", 1000); + client_configuration.wait_on_pool_size_limit = false; auto proxy_config = getProxyConfiguration(config_prefix, config); if (proxy_config) diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 3ec9b3d0a83..f3e2064c8bf 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -131,8 +131,12 @@ namespace UInt16 proxy_port_, bool proxy_https_, size_t max_pool_size_, - bool resolve_host_ = true) - : Base(static_cast(max_pool_size_), &Poco::Logger::get("HTTPSessionPool")) + bool resolve_host_, + bool wait_on_pool_size_limit) + : Base( + static_cast(max_pool_size_), + &Poco::Logger::get("HTTPSessionPool"), + wait_on_pool_size_limit ? BehaviourOnLimit::Wait : BehaviourOnLimit::AllocateNewBypassingPool) , host(host_) , port(port_) , https(https_) @@ -155,11 +159,12 @@ namespace String proxy_host; UInt16 proxy_port; bool is_proxy_https; + bool wait_on_pool_size_limit; bool operator ==(const Key & rhs) const { - return std::tie(target_host, target_port, is_target_https, proxy_host, proxy_port, is_proxy_https) - == std::tie(rhs.target_host, rhs.target_port, rhs.is_target_https, rhs.proxy_host, rhs.proxy_port, rhs.is_proxy_https); + return std::tie(target_host, target_port, is_target_https, proxy_host, proxy_port, is_proxy_https, wait_on_pool_size_limit) + == std::tie(rhs.target_host, rhs.target_port, rhs.is_target_https, rhs.proxy_host, rhs.proxy_port, rhs.is_proxy_https, rhs.wait_on_pool_size_limit); } }; @@ -178,6 +183,7 @@ namespace s.update(k.proxy_host); s.update(k.proxy_port); s.update(k.is_proxy_https); + s.update(k.wait_on_pool_size_limit); return s.get64(); } }; @@ -218,14 +224,14 @@ namespace const Poco::URI & proxy_uri, const ConnectionTimeouts & timeouts, size_t max_connections_per_endpoint, - bool resolve_host = true) + bool resolve_host, + bool wait_on_pool_size_limit) { - std::lock_guard lock(mutex); + std::unique_lock lock(mutex); const std::string & host = uri.getHost(); UInt16 port = uri.getPort(); bool https = isHTTPS(uri); - String proxy_host; UInt16 proxy_port = 0; bool proxy_https = false; @@ -236,11 +242,27 @@ namespace proxy_https = isHTTPS(proxy_uri); } - HTTPSessionPool::Key key{host, port, https, proxy_host, proxy_port, proxy_https}; + HTTPSessionPool::Key key{host, port, https, proxy_host, proxy_port, proxy_https, wait_on_pool_size_limit}; auto pool_ptr = endpoints_pool.find(key); if (pool_ptr == endpoints_pool.end()) std::tie(pool_ptr, std::ignore) = endpoints_pool.emplace( - key, std::make_shared(host, port, https, proxy_host, proxy_port, proxy_https, max_connections_per_endpoint, resolve_host)); + key, + std::make_shared( + host, + port, + https, + proxy_host, + proxy_port, + proxy_https, + max_connections_per_endpoint, + resolve_host, + wait_on_pool_size_limit)); + + /// Some routines held session objects until the end of its lifetime. Also this routines may create another sessions in this time frame. + /// If some other session holds `lock` because it waits on another lock inside `pool_ptr->second->get` it isn't possible to create any + /// new session and thus finish routine, return session to the pool and unlock the thread waiting inside `pool_ptr->second->get`. + /// To avoid such a deadlock we unlock `lock` before entering `pool_ptr->second->get`. + lock.unlock(); auto retry_timeout = timeouts.connection_timeout.totalMicroseconds(); auto session = pool_ptr->second->get(retry_timeout); @@ -295,14 +317,25 @@ HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & } -PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size, bool resolve_host) +PooledHTTPSessionPtr makePooledHTTPSession( + const Poco::URI & uri, + const ConnectionTimeouts & timeouts, + size_t per_endpoint_pool_size, + bool resolve_host, + bool wait_on_pool_size_limit) { - return makePooledHTTPSession(uri, {}, timeouts, per_endpoint_pool_size, resolve_host); + return makePooledHTTPSession(uri, {}, timeouts, per_endpoint_pool_size, resolve_host, wait_on_pool_size_limit); } -PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const Poco::URI & proxy_uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size, bool resolve_host) +PooledHTTPSessionPtr makePooledHTTPSession( + const Poco::URI & uri, + const Poco::URI & proxy_uri, + const ConnectionTimeouts & timeouts, + size_t per_endpoint_pool_size, + bool resolve_host, + bool wait_on_pool_size_limit) { - return HTTPSessionPool::instance().getSession(uri, proxy_uri, timeouts, per_endpoint_pool_size, resolve_host); + return HTTPSessionPool::instance().getSession(uri, proxy_uri, timeouts, per_endpoint_pool_size, resolve_host, wait_on_pool_size_limit); } bool isRedirect(const Poco::Net::HTTPResponse::HTTPStatus status) { return status == Poco::Net::HTTPResponse::HTTP_MOVED_PERMANENTLY || status == Poco::Net::HTTPResponse::HTTP_FOUND || status == Poco::Net::HTTPResponse::HTTP_SEE_OTHER || status == Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT; } diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h index 3616a33c1c7..db8fc2a2a40 100644 --- a/src/IO/HTTPCommon.h +++ b/src/IO/HTTPCommon.h @@ -61,8 +61,20 @@ void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_ HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, bool resolve_host = true); /// As previous method creates session, but tooks it from pool, without and with proxy uri. -PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size, bool resolve_host = true); -PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const Poco::URI & proxy_uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size, bool resolve_host = true); +PooledHTTPSessionPtr makePooledHTTPSession( + const Poco::URI & uri, + const ConnectionTimeouts & timeouts, + size_t per_endpoint_pool_size, + bool resolve_host = true, + bool wait_on_pool_size_limit = true); + +PooledHTTPSessionPtr makePooledHTTPSession( + const Poco::URI & uri, + const Poco::URI & proxy_uri, + const ConnectionTimeouts & timeouts, + size_t per_endpoint_pool_size, + bool resolve_host = true, + bool wait_on_pool_size_limit = true); bool isRedirect(Poco::Net::HTTPResponse::HTTPStatus status); diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index d1cb1ec9ab0..364253ba746 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -31,6 +31,23 @@ namespace ProfileEvents extern const Event RemoteReadThrottlerSleepMicroseconds; } +namespace +{ +void resetSession(Aws::S3::Model::GetObjectResult & read_result) +{ + if (auto * session_aware_stream = dynamic_cast *>(&read_result.GetBody())) + { + auto & session + = static_cast(*static_cast(session_aware_stream->getSession())); + session.reset(); + } + else if (!dynamic_cast *>(&read_result.GetBody())) + { + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session of unexpected type encountered"); + } +} +} + namespace DB { namespace ErrorCodes @@ -74,7 +91,10 @@ bool ReadBufferFromS3::nextImpl() if (read_until_position) { if (read_until_position == offset) + { + read_all_range_successfully = true; return false; + } if (read_until_position < offset) throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1); @@ -154,7 +174,10 @@ bool ReadBufferFromS3::nextImpl() } if (!next_result) + { + read_all_range_successfully = true; return false; + } BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); @@ -240,6 +263,8 @@ off_t ReadBufferFromS3::seek(off_t offset_, int whence) if (offset_ == getPosition() && whence == SEEK_SET) return offset_; + read_all_range_successfully = false; + if (impl && restricted_seek) { throw Exception( @@ -312,6 +337,8 @@ void ReadBufferFromS3::setReadUntilPosition(size_t position) { if (position != static_cast(read_until_position)) { + read_all_range_successfully = false; + if (impl) { if (!atEndOfRequestedRangeGuess()) @@ -328,6 +355,8 @@ void ReadBufferFromS3::setReadUntilEnd() { if (read_until_position) { + read_all_range_successfully = false; + read_until_position = 0; if (impl) { @@ -351,8 +380,27 @@ bool ReadBufferFromS3::atEndOfRequestedRangeGuess() return false; } +ReadBufferFromS3::~ReadBufferFromS3() +{ + try + { + if (!read_all_range_successfully && read_result) + /// When we abandon a session with an ongoing GetObject request and there is another one trying to delete the same object this delete + /// operation will hang until GetObject's session idle timeouts. So we have to call `reset()` on GetObject's session session immediately. + resetSession(*read_result); + } + catch (...) + { + tryLogCurrentException(log); + } +} + std::unique_ptr ReadBufferFromS3::initialize() { + if (!read_all_range_successfully && read_result) + resetSession(*read_result); + read_all_range_successfully = false; + /** * If remote_filesystem_read_method = 'threadpool', then for MergeTree family tables * exact byte ranges to read are always passed here. @@ -363,7 +411,7 @@ std::unique_ptr ReadBufferFromS3::initialize() read_result = sendRequest(offset, read_until_position ? std::make_optional(read_until_position - 1) : std::nullopt); size_t buffer_size = use_external_buffer ? 0 : read_settings.remote_fs_buffer_size; - return std::make_unique(read_result.GetBody(), buffer_size); + return std::make_unique(read_result->GetBody(), buffer_size); } Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t range_begin, std::optional range_end_incl) const diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index 0f665861a1e..11299aa2c2a 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -41,7 +41,7 @@ private: std::atomic offset = 0; std::atomic read_until_position = 0; - Aws::S3::Model::GetObjectResult read_result; + std::optional read_result; std::unique_ptr impl; Poco::Logger * log = &Poco::Logger::get("ReadBufferFromS3"); @@ -60,6 +60,8 @@ public: bool restricted_seek_ = false, std::optional file_size = std::nullopt); + ~ReadBufferFromS3() override; + bool nextImpl() override; off_t seek(off_t off, int whence) override; @@ -100,6 +102,8 @@ private: /// There is different seek policy for disk seek and for non-disk seek /// (non-disk seek is applied for seekable input formats: orc, arrow, parquet). bool restricted_seek; + + bool read_all_range_successfully = false; }; } diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index bfda7149343..754b1bfd5b8 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -1,3 +1,4 @@ +#include #include "Common/DNSResolver.h" #include "config.h" @@ -138,8 +139,9 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config , timeouts(ConnectionTimeouts( Poco::Timespan(client_configuration.connectTimeoutMs * 1000), /// connection timeout. Poco::Timespan(client_configuration.requestTimeoutMs * 1000), /// send timeout. - Poco::Timespan(client_configuration.requestTimeoutMs * 1000) /// receive timeout. - )) + Poco::Timespan(client_configuration.requestTimeoutMs * 1000), /// receive timeout. + Poco::Timespan(client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0), + Poco::Timespan(client_configuration.http_keep_alive_timeout_ms * 1000))) /// flag indicating whether keep-alive is enabled is set to each session upon creation , remote_host_filter(client_configuration.remote_host_filter) , s3_max_redirects(client_configuration.s3_max_redirects) , enable_s3_requests_logging(client_configuration.enable_s3_requests_logging) @@ -147,6 +149,8 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config , get_request_throttler(client_configuration.get_request_throttler) , put_request_throttler(client_configuration.put_request_throttler) , extra_headers(client_configuration.extra_headers) + , http_connection_pool_size(client_configuration.http_connection_pool_size) + , wait_on_pool_size_limit(client_configuration.wait_on_pool_size_limit) { } @@ -254,9 +258,26 @@ void PocoHTTPClient::addMetric(const Aws::Http::HttpRequest & request, S3MetricT void PocoHTTPClient::makeRequestInternal( Aws::Http::HttpRequest & request, std::shared_ptr & response, + Aws::Utils::RateLimits::RateLimiterInterface * readLimiter , + Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const +{ + const auto request_configuration = per_request_configuration(request); + if (http_connection_pool_size && request_configuration.proxy_host.empty()) + makeRequestInternalImpl(request, request_configuration, response, readLimiter, writeLimiter); + else + makeRequestInternalImpl(request, request_configuration, response, readLimiter, writeLimiter); +} + +template +void PocoHTTPClient::makeRequestInternalImpl( + Aws::Http::HttpRequest & request, + const ClientConfigurationPerRequest & request_configuration, + std::shared_ptr & response, Aws::Utils::RateLimits::RateLimiterInterface *, Aws::Utils::RateLimits::RateLimiterInterface *) const { + using SessionPtr = std::conditional_t; + Poco::Logger * log = &Poco::Logger::get("AWSClient"); auto uri = request.GetUri().GetURIString(); @@ -303,8 +324,7 @@ void PocoHTTPClient::makeRequestInternal( for (unsigned int attempt = 0; attempt <= s3_max_redirects; ++attempt) { Poco::URI target_uri(uri); - HTTPSessionPtr session; - auto request_configuration = per_request_configuration(request); + SessionPtr session; if (!request_configuration.proxy_host.empty()) { @@ -313,7 +333,11 @@ void PocoHTTPClient::makeRequestInternal( /// Reverse proxy can replace host header with resolved ip address instead of host name. /// This can lead to request signature difference on S3 side. - session = makeHTTPSession(target_uri, timeouts, /* resolve_host = */ false); + if constexpr (pooled) + session = makePooledHTTPSession( + target_uri, timeouts, http_connection_pool_size, /* resolve_host = */ true, wait_on_pool_size_limit); + else + session = makeHTTPSession(target_uri, timeouts, /* resolve_host = */ false); bool use_tunnel = request_configuration.proxy_scheme == Aws::Http::Scheme::HTTP && target_uri.getScheme() == "https"; session->setProxy( @@ -325,7 +349,11 @@ void PocoHTTPClient::makeRequestInternal( } else { - session = makeHTTPSession(target_uri, timeouts, /* resolve_host = */ true); + if constexpr (pooled) + session = makePooledHTTPSession( + target_uri, timeouts, http_connection_pool_size, /* resolve_host = */ true, wait_on_pool_size_limit); + else + session = makeHTTPSession(target_uri, timeouts, /* resolve_host = */ false); } /// In case of error this address will be written to logs diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 762178a9365..92d3d5c5747 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -53,6 +53,13 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration ThrottlerPtr put_request_throttler; HTTPHeaderEntries extra_headers; + /// Not a client parameter in terms of HTTP and we won't send it to the server. Used internally to determine when connection have to be re-established. + uint32_t http_keep_alive_timeout_ms = 0; + /// Zero means pooling will not be used. + size_t http_connection_pool_size = 0; + /// See PoolBase::BehaviourOnLimit + bool wait_on_pool_size_limit = true; + void updateSchemeAndRegion(); std::function error_report; @@ -90,6 +97,12 @@ public: ); } + void SetResponseBody(Aws::IStream & incoming_stream, PooledHTTPSessionPtr & session_) /// NOLINT + { + body_stream = Aws::Utils::Stream::ResponseStream( + Aws::New>("http result streambuf", session_, incoming_stream.rdbuf())); + } + void SetResponseBody(std::string & response_body) /// NOLINT { auto stream = Aws::New("http result buf", response_body); // STYLE_CHECK_ALLOW_STD_STRING_STREAM @@ -149,6 +162,15 @@ private: EnumSize, }; + template + void makeRequestInternalImpl( + Aws::Http::HttpRequest & request, + const ClientConfigurationPerRequest & per_request_configuration, + std::shared_ptr & response, + Aws::Utils::RateLimits::RateLimiterInterface * readLimiter, + Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const; + +protected: static S3MetricKind getMetricKind(const Aws::Http::HttpRequest & request); void addMetric(const Aws::Http::HttpRequest & request, S3MetricType type, ProfileEvents::Count amount = 1) const; @@ -170,6 +192,9 @@ private: ThrottlerPtr put_request_throttler; const HTTPHeaderEntries extra_headers; + + size_t http_connection_pool_size = 0; + bool wait_on_pool_size_limit = true; }; } diff --git a/src/IO/S3/SessionAwareIOStream.h b/src/IO/S3/SessionAwareIOStream.h index 1640accb6fa..f7e42f99f51 100644 --- a/src/IO/S3/SessionAwareIOStream.h +++ b/src/IO/S3/SessionAwareIOStream.h @@ -18,6 +18,10 @@ public: { } + Session & getSession() { return session; } + + const Session & getSession() const { return session; } + private: /// Poco HTTP session is holder of response stream. Session session; From c8cbc9f8ce36fa49a0785c7f9792c6cf154e06da Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 12 Jun 2023 16:19:14 +0200 Subject: [PATCH 0350/1997] fix test --- tests/integration/test_merge_tree_s3/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 2ccd517923a..22805eb6e94 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -923,7 +923,7 @@ def test_merge_canceled_by_s3_errors_when_move(cluster, broken_s3, node_name): @pytest.mark.parametrize("node_name", ["node"]) @pytest.mark.parametrize( - "in_flight_memory", [(10, 245918115), (5, 156786752), (1, 106426187)] + "in_flight_memory", [(10, 288044299), (5, 193557290), (1, 128348733)] ) def test_s3_engine_heavy_write_check_mem( cluster, broken_s3, node_name, in_flight_memory From a4e982442f4a3d6b3007b432f8e0b6211e9aa4e7 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 14 Jun 2023 11:13:59 +0000 Subject: [PATCH 0351/1997] Update documentation --- docs/en/operations/configuration-files.md | 36 +++++++++++++++++++++++ docs/ru/operations/configuration-files.md | 36 +++++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index b3583e156ad..b5d52acca49 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -54,6 +54,42 @@ XML substitution example: Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element. +## Decryption {#decryption} + +Elements with text nodes may be encrypted with [encryption codecs](../../sql-reference/statements/create/table.md#encryption-codecs). In this case `` section should be included in configuration file and each element node with encrypted text should have `encryption_codec` attribute with name of codec. + +Example: + +```xml + + + + 00112233445566778899aabbccddeeff + + + + admin + 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 + + +``` + +To get the encrypted value `encrypt_decrypt` example application may be used. + +Example: + +``` bash +./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd +``` + +``` text +961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 +``` + +:::note +The decryption is executed after creation of preprocessed configuration file. It means that elements with `encryption_codec` attribute in the preprocessed configuration file are encrypted. But the values of corresponding parameters in server's memory are decrypted. +::: + ## User Settings {#user-settings} The `config.xml` file can specify a separate config with user settings, profiles, and quotas. The relative path to this config is set in the `users_config` element. By default, it is `users.xml`. If `users_config` is omitted, the user settings, profiles, and quotas are specified directly in `config.xml`. diff --git a/docs/ru/operations/configuration-files.md b/docs/ru/operations/configuration-files.md index 2b824ce91bd..96512fbbe23 100644 --- a/docs/ru/operations/configuration-files.md +++ b/docs/ru/operations/configuration-files.md @@ -85,6 +85,42 @@ $ cat /etc/clickhouse-server/users.d/alice.xml Сервер следит за изменениями конфигурационных файлов, а также файлов и ZooKeeper-узлов, которые были использованы при выполнении подстановок и переопределений, и перезагружает настройки пользователей и кластеров на лету. То есть, можно изменять кластера, пользователей и их настройки без перезапуска сервера. +## Расшифровка {#decryption} + +Элементы с текстовыми узлами могут быть зашифрован с помощью [кодеков шифрования](../../sql-reference/statements/create/table.md#encryption-codecs). В этом случае секция `` должна быть включена в конфигурационный файл и каждый элемент с зашифрованным текстом должен иметь аттрибут `encryption_codec` с именем кодека. + +Пример: + +```xml + + + + 00112233445566778899aabbccddeeff + + + + admin + 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 + + +``` + +Чтобы получить зашифрованное значение может быть использовано приложение-пример `encrypt_decrypt` . + +Пример: + +``` bash +./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd +``` + +``` text +961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 +``` + +:::note +Расшифровка выполняется после создания конфигурационного файла предобработки. Это означает что элементы с аттрибутом `encryption_codec` в конфигурационном файле предобработки зашифрованы. Но значения соответствующих параметров в памяти сервера расшифрованы. +::: + ## Примеры записи конфигурации на YAML {#example} Здесь можно рассмотреть пример реальной конфигурации записанной на YAML: [config.yaml.example](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.yaml.example). From b5d4ad583f3741f87843f51c56ccc41b91833523 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 14 Jun 2023 11:35:55 +0000 Subject: [PATCH 0352/1997] Small code style improvements --- src/Common/Config/ConfigProcessor.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index df25a9a3825..9548bf33b7b 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -26,9 +26,9 @@ #include #include #include +#include #include #include -#include #include #define PREPROCESSED_SUFFIX "-preprocessed" @@ -194,7 +194,7 @@ std::string ConfigProcessor::encryptValue(const std::string & codec_name, const DB::Memory<> memory; memory.resize(codec.getCompressedReserveSize(static_cast(value.size()))); auto bytes_written = codec.compress(value.data(), static_cast(value.size()), memory.data()); - std::string encrypted_value = std::string(memory.data(), bytes_written); + auto encrypted_value = std::string(memory.data(), bytes_written); std::string hex_value; boost::algorithm::hex(encrypted_value.begin(), encrypted_value.end(), std::back_inserter(hex_value)); return hex_value; @@ -224,7 +224,7 @@ std::string ConfigProcessor::decryptValue(const std::string & codec_name, const void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) { - for (Node * node = config_root->firstChild(); node;) + for (Node * node = config_root->firstChild(); node; node = node->nextSibling()) { if (node->nodeType() == Node::ELEMENT_NODE) { @@ -244,7 +244,6 @@ void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) } decryptRecursive(node); } - node = node->nextSibling(); } } From 8073e0bad1600746f4682f3ca41076bf15e71f50 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 14 Jun 2023 13:45:53 +0200 Subject: [PATCH 0353/1997] Fix tests --- .../test_mysql_database_engine/configs/user.xml | 10 ++++++++++ tests/integration/test_s3_cluster/configs/users.xml | 9 +++++++++ tests/integration/test_s3_cluster/test.py | 1 + .../test_storage_delta/configs/users.d/users.xml | 9 +++++++++ tests/integration/test_storage_delta/test.py | 1 + tests/integration/test_storage_hudi/test.py | 1 + .../test_storage_iceberg/configs/users.d/users.xml | 9 +++++++++ tests/integration/test_storage_iceberg/test.py | 1 + tests/integration/test_storage_kafka/configs/users.xml | 7 +++++++ tests/integration/test_storage_postgresql/test.py | 2 +- .../test_storage_rabbitmq/configs/users.xml | 7 +++++++ 11 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_mysql_database_engine/configs/user.xml create mode 100644 tests/integration/test_s3_cluster/configs/users.xml create mode 100644 tests/integration/test_storage_delta/configs/users.d/users.xml create mode 100644 tests/integration/test_storage_iceberg/configs/users.d/users.xml diff --git a/tests/integration/test_mysql_database_engine/configs/user.xml b/tests/integration/test_mysql_database_engine/configs/user.xml new file mode 100644 index 00000000000..775c63350b0 --- /dev/null +++ b/tests/integration/test_mysql_database_engine/configs/user.xml @@ -0,0 +1,10 @@ + + + + + default + default + 1 + + + diff --git a/tests/integration/test_s3_cluster/configs/users.xml b/tests/integration/test_s3_cluster/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_s3_cluster/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py index 41f19cdd12d..3b8fd80060f 100644 --- a/tests/integration/test_s3_cluster/test.py +++ b/tests/integration/test_s3_cluster/test.py @@ -68,6 +68,7 @@ def started_cluster(): cluster.add_instance( "s0_0_0", main_configs=["configs/cluster.xml", "configs/named_collections.xml"], + user_configs=["configs/users.xml"], macros={"replica": "node1", "shard": "shard1"}, with_minio=True, with_zookeeper=True, diff --git a/tests/integration/test_storage_delta/configs/users.d/users.xml b/tests/integration/test_storage_delta/configs/users.d/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_delta/configs/users.d/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index 9477b66dab8..0cd1208edfa 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -53,6 +53,7 @@ def started_cluster(): cluster.add_instance( "node1", main_configs=["configs/config.d/named_collections.xml"], + user_configs=["configs/users.d/users.xml"], with_minio=True, ) diff --git a/tests/integration/test_storage_hudi/test.py b/tests/integration/test_storage_hudi/test.py index de9cde43609..3dbbcb7a06e 100644 --- a/tests/integration/test_storage_hudi/test.py +++ b/tests/integration/test_storage_hudi/test.py @@ -51,6 +51,7 @@ def started_cluster(): cluster.add_instance( "node1", main_configs=["configs/config.d/named_collections.xml"], + user_configs=["configs/users.d/users.xml"], with_minio=True, ) diff --git a/tests/integration/test_storage_iceberg/configs/users.d/users.xml b/tests/integration/test_storage_iceberg/configs/users.d/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_iceberg/configs/users.d/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py index b3b2f160740..c22b8cda9b5 100644 --- a/tests/integration/test_storage_iceberg/test.py +++ b/tests/integration/test_storage_iceberg/test.py @@ -53,6 +53,7 @@ def started_cluster(): cluster.add_instance( "node1", main_configs=["configs/config.d/named_collections.xml"], + user_configs=["configs/users.d/users.xml"], with_minio=True, ) diff --git a/tests/integration/test_storage_kafka/configs/users.xml b/tests/integration/test_storage_kafka/configs/users.xml index 992464a0ac2..3168de649f8 100644 --- a/tests/integration/test_storage_kafka/configs/users.xml +++ b/tests/integration/test_storage_kafka/configs/users.xml @@ -6,4 +6,11 @@ 0 + + + + default + 1 + + diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 0c8fc597b5c..49bec6cbe5e 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -15,7 +15,7 @@ node1 = cluster.add_instance( node2 = cluster.add_instance( "node2", main_configs=["configs/named_collections.xml"], - user_configs=["configs/settings.xml"], + user_configs=["configs/settings.xml", "configs/users.xml"], with_postgres_cluster=True, ) diff --git a/tests/integration/test_storage_rabbitmq/configs/users.xml b/tests/integration/test_storage_rabbitmq/configs/users.xml index 2cef0a6de3c..e42fefa905b 100644 --- a/tests/integration/test_storage_rabbitmq/configs/users.xml +++ b/tests/integration/test_storage_rabbitmq/configs/users.xml @@ -4,4 +4,11 @@ 1 + + + + default + 1 + + From f55623aa2d23fda63f2b19720f4035568a4595a4 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 14 Jun 2023 11:46:43 +0000 Subject: [PATCH 0354/1997] Use anonymous namespace for getEncryptionMethod() --- src/Common/Config/ConfigProcessor.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 9548bf33b7b..17abc3d161d 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -47,6 +47,9 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +namespace +{ + /// Get method for string name. Throw exception for wrong name EncryptionMethod getEncryptionMethod(const std::string & name) { @@ -58,6 +61,8 @@ EncryptionMethod getEncryptionMethod(const std::string & name) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", name); } +} + /// For cutting preprocessed path to this base static std::string main_config_path; From 739dc6739f085e097cb39dbabf6e24588722d8cb Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 14 Jun 2023 13:31:02 +0200 Subject: [PATCH 0355/1997] Progress --- src/Interpreters/Cache/FileCache.cpp | 22 +++- src/Interpreters/Cache/FileCache.h | 5 +- src/Interpreters/Cache/FileCacheSettings.cpp | 8 +- src/Interpreters/Cache/FileCache_fwd.h | 3 +- src/Interpreters/Cache/FileSegment.cpp | 67 +++++----- src/Interpreters/Cache/Metadata.cpp | 128 ++++++++++++++++++- src/Interpreters/Cache/Metadata.h | 13 ++ 7 files changed, 201 insertions(+), 45 deletions(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index e6bc8b1f79b..65f8ecf7e89 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -49,9 +50,9 @@ FileCache::FileCache(const FileCacheSettings & settings) , allow_persistent_files(settings.do_not_evict_index_and_mark_files) , bypass_cache_threshold(settings.enable_bypass_cache_with_threashold ? settings.bypass_cache_threashold : 0) , delayed_cleanup_interval_ms(settings.delayed_cleanup_interval_ms) + , boundary_alignment(settings.boundary_alignment) , log(&Poco::Logger::get("FileCache")) , metadata(settings.base_path) - , boundary_alignment(settings.boundary_alignment) { main_priority = std::make_unique(settings.max_size, settings.max_elements); @@ -124,7 +125,12 @@ void FileCache::initialize() is_initialized = true; - cleanup_task = Context::getGlobalContextInstance()->getSchedulePool().createTask("FileCacheCleanup", [this]{ cleanupThreadFunc(); }); + size_t num_threads=2; + for (size_t i = 0; i < num_threads; ++i) + download_threads.emplace_back([this] { metadata.downloadThreadFunc(); }); + + auto & schedule_pool = Context::getGlobalContextInstance()->getSchedulePool(); + cleanup_task = schedule_pool.createTask("FileCacheCleanup", [this]{ cleanupThreadFunc(); }); cleanup_task->activate(); cleanup_task->scheduleAfter(delayed_cleanup_interval_ms); } @@ -412,7 +418,12 @@ FileSegmentsHolderPtr FileCache::set( } FileSegmentsHolderPtr -FileCache::getOrSet(const Key & key, size_t offset, size_t size, size_t file_size, const CreateFileSegmentSettings & settings) +FileCache::getOrSet( + const Key & key, + size_t offset, + size_t size, + size_t file_size, + const CreateFileSegmentSettings & settings) { assertInitialized(); @@ -979,6 +990,10 @@ void FileCache::deactivateBackgroundOperations() { if (cleanup_task) cleanup_task->deactivate(); + + metadata.cancelDownload(); + for (auto & thread : download_threads) + thread.join(); } void FileCache::cleanup() @@ -1017,6 +1032,7 @@ FileSegmentsHolderPtr FileCache::getSnapshot() { for (const auto & [_, file_segment_metadata] : locked_key) file_segments.push_back(FileSegment::getSnapshot(file_segment_metadata->file_segment)); + return true; }); return std::make_unique(std::move(file_segments), /* complete_on_dtor */false); } diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 71fc1722844..ed693b475ec 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -134,6 +134,7 @@ private: const bool allow_persistent_files; const size_t bypass_cache_threshold = 0; const size_t delayed_cleanup_interval_ms; + const size_t boundary_alignment; Poco::Logger * log; @@ -178,9 +179,9 @@ private: */ BackgroundSchedulePool::TaskHolder cleanup_task; - void assertInitialized() const; + std::vector download_threads; - size_t boundary_alignment; + void assertInitialized() const; void assertCacheCorrectness(); diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp index 1fe51bf5f3e..d41aa8ffcb9 100644 --- a/src/Interpreters/Cache/FileCacheSettings.cpp +++ b/src/Interpreters/Cache/FileCacheSettings.cpp @@ -31,10 +31,9 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk Cache requires non-empty `path` field (cache base path) in config"); max_elements = config.getUInt64(config_prefix + ".max_elements", FILECACHE_DEFAULT_MAX_ELEMENTS); + if (config.has(config_prefix + ".max_file_segment_size")) max_file_segment_size = parseWithSizeSuffix(config.getString(config_prefix + ".max_file_segment_size")); - else - max_file_segment_size = FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE; cache_on_write_operations = config.getUInt64(config_prefix + ".cache_on_write_operations", false); enable_filesystem_query_cache_limit = config.getUInt64(config_prefix + ".enable_filesystem_query_cache_limit", false); @@ -44,12 +43,11 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & if (config.has(config_prefix + ".bypass_cache_threashold")) bypass_cache_threashold = parseWithSizeSuffix(config.getString(config_prefix + ".bypass_cache_threashold")); - else - bypass_cache_threashold = FILECACHE_BYPASS_THRESHOLD; do_not_evict_index_and_mark_files = config.getUInt64(config_prefix + ".do_not_evict_index_and_mark_files", true); - boundary_alignment = config.getUInt64(config_prefix + ".boundary_alignment", DBMS_DEFAULT_BUFFER_SIZE); + if (config.has(config_prefix + ".boundary_alignment")) + boundary_alignment = parseWithSizeSuffix(config.getString(config_prefix + ".boundary_alignment")); delayed_cleanup_interval_ms = config.getUInt64(config_prefix + ".delayed_cleanup_interval_ms", FILECACHE_DELAYED_CLEANUP_INTERVAL_MS); } diff --git a/src/Interpreters/Cache/FileCache_fwd.h b/src/Interpreters/Cache/FileCache_fwd.h index 01f518d0c4e..c791d08ed4f 100644 --- a/src/Interpreters/Cache/FileCache_fwd.h +++ b/src/Interpreters/Cache/FileCache_fwd.h @@ -4,7 +4,8 @@ namespace DB { -static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 8 * 1024 * 1024; +static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 32 * 1024 * 1024; /// 32Mi +static constexpr int FILECACHE_DEFAULT_MIN_FILE_SEGMENT_SIZE = 4 * 1024 * 1024; /// 4Mi static constexpr int FILECACHE_DEFAULT_MAX_ELEMENTS = 10000000; static constexpr int FILECACHE_DEFAULT_HITS_THRESHOLD = 0; static constexpr size_t FILECACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024; diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 7b82c58080c..f95379ba07f 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -214,8 +214,10 @@ void FileSegment::resetDownloadingStateUnlocked(const FileSegmentGuard::Lock & l /// range().size() can equal 0 in case of write-though cache. if (!is_unbound && current_downloaded_size != 0 && current_downloaded_size == range().size()) setDownloadedUnlocked(lock); - else + else if (current_downloaded_size) setDownloadState(State::PARTIALLY_DOWNLOADED, lock); + else + setDownloadState(State::EMPTY, lock); } void FileSegment::resetDownloader() @@ -280,22 +282,9 @@ void FileSegment::resetRemoteFileReader() FileSegment::RemoteFileReaderPtr FileSegment::extractRemoteFileReader() { - auto locked_key = lockKeyMetadata(false); - if (!locked_key) - { - assert(isDetached()); + if (download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION) return std::move(remote_file_reader); - } - - auto segment_lock = segment_guard.lock(); - - assert(download_state != State::DETACHED); - - bool is_last_holder = locked_key->isLastOwnerOfFileSegment(offset()); - if (!downloader_id.empty() || !is_last_holder) - return nullptr; - - return std::move(remote_file_reader); + return nullptr; } void FileSegment::setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_) @@ -607,22 +596,17 @@ void FileSegment::complete() resetDownloaderUnlocked(segment_lock); } - if (is_downloader || is_last_holder) + auto remove_from_cache = [&, this]() { - if (cache_writer) - { - cache_writer->finalize(); - cache_writer.reset(); - } - remote_file_reader.reset(); - } + LOG_TEST(log, "Remove file segment {} (nothing downloaded)", range().toString()); + locked_key->removeFileSegment(offset(), segment_lock); + setDetachedState(segment_lock); + }; if (segment_kind == FileSegmentKind::Temporary && is_last_holder) { LOG_TEST(log, "Removing temporary file segment: {}", getInfoForLogUnlocked(segment_lock)); - detach(segment_lock, *locked_key); - setDownloadState(State::DETACHED, segment_lock); - locked_key->removeFileSegment(offset(), segment_lock); + remove_from_cache(); return; } @@ -633,6 +617,7 @@ void FileSegment::complete() chassert(current_downloaded_size == range().size()); chassert(current_downloaded_size == fs::file_size(getPathInLocalCache())); chassert(!cache_writer); + chassert(!remote_file_reader); break; } case State::DOWNLOADING: @@ -640,8 +625,24 @@ void FileSegment::complete() chassert(!is_last_holder); break; } - case State::EMPTY: case State::PARTIALLY_DOWNLOADED: + { + if (is_last_holder) + { + LOG_TEST( + log, "Submitted file segment for background download " + "(having {}/{})", downloaded_size, range().size()); + + locked_key->addToDownloadQueue(offset(), segment_lock); /// Finish download in background. + } + break; + } + case State::EMPTY: + { + if (is_last_holder) + remove_from_cache(); + break; + } case State::PARTIALLY_DOWNLOADED_NO_CONTINUATION: { chassert(current_downloaded_size != range().size()); @@ -650,10 +651,9 @@ void FileSegment::complete() { if (current_downloaded_size == 0) { - LOG_TEST(log, "Remove file segment {} (nothing downloaded)", range().toString()); - locked_key->removeFileSegment(offset(), segment_lock); + remove_from_cache(); } - else + else if (download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION) { LOG_TEST(log, "Resize file segment {} to downloaded: {}", range().toString(), current_downloaded_size); @@ -672,9 +672,8 @@ void FileSegment::complete() /// We mark current file segment with state DETACHED, even though the data is still in cache /// (but a separate file segment) because is_last_holder is satisfied, so it does not matter. + setDetachedState(segment_lock); } - - setDetachedState(segment_lock); } break; } @@ -844,6 +843,8 @@ void FileSegment::setDetachedState(const FileSegmentGuard::Lock & lock) setDownloadState(State::DETACHED, lock); key_metadata.reset(); cache = nullptr; + cache_writer.reset(); + remote_file_reader.reset(); } void FileSegment::detach(const FileSegmentGuard::Lock & lock, const LockedKey &) diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index c9a23d1c785..2a3803eb3d7 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -46,10 +46,12 @@ KeyMetadata::KeyMetadata( const Key & key_, const std::string & key_path_, CleanupQueue & cleanup_queue_, + DownloadQueue & download_queue_, bool created_base_directory_) : key(key_) , key_path(key_path_) , cleanup_queue(cleanup_queue_) + , download_queue(download_queue_) , created_base_directory(created_base_directory_) { if (created_base_directory) @@ -123,6 +125,7 @@ private: CacheMetadata::CacheMetadata(const std::string & path_) : path(path_) , cleanup_queue(std::make_unique()) + , download_queue(std::make_unique()) , log(&Poco::Logger::get("CacheMetadata")) { } @@ -175,7 +178,7 @@ LockedKeyPtr CacheMetadata::lockKeyMetadata( it = emplace( key, std::make_shared( - key, getPathForKey(key), *cleanup_queue, is_initial_load)).first; + key, getPathForKey(key), *cleanup_queue, *download_queue, is_initial_load)).first; } key_metadata = it->second; @@ -293,6 +296,121 @@ void CacheMetadata::doCleanup() } } +class DownloadQueue +{ +friend struct CacheMetadata; +public: + void add(std::weak_ptr file_segment) + { + { + std::lock_guard lock(mutex); + queue.push(file_segment); + } + cv.notify_one(); + } + +private: + void cancel() + { + std::lock_guard lock(mutex); + cancelled = true; + } + + std::mutex mutex; + std::condition_variable cv; + std::queue> queue; + bool cancelled = false; +}; + +void CacheMetadata::downloadThreadFunc() +{ + std::optional> memory; + while (true) + { + std::weak_ptr file_segment_weak; + { + std::unique_lock lock(download_queue->mutex); + + if (download_queue->cancelled) + return; + + if (download_queue->queue.empty()) + { + download_queue->cv.wait(lock); + continue; + } + + file_segment_weak = download_queue->queue.front(); + download_queue->queue.pop(); + } + + FileSegmentsHolderPtr holder; + { + auto file_segment = file_segment_weak.lock(); + if (!file_segment + || file_segment->state() != FileSegment::State::PARTIALLY_DOWNLOADED) + continue; + + auto lock = lockKeyMetadata(file_segment->key(), KeyNotFoundPolicy::RETURN_NULL); + if (!lock) + continue; + + holder = std::make_unique(FileSegments{file_segment}); + } + + auto & file_segment = holder->front(); + chassert(file_segment.assertCorrectness()); + + if (file_segment.getOrSetDownloader() != FileSegment::getCallerId()) + continue; + + LOG_TRACE(log, "Downloading file segment: {}", file_segment.getInfoForLog()); + + auto reader = file_segment.getRemoteFileReader(); + + /// If remote_fs_read_method == 'threadpool', + /// reader iteself does not allocate the buffer, but uses the buffer passed to it. + /// So will need to allocate a buffer here as well. + if (reader->buffer().begin() == nullptr) + { + if (!memory) + memory.emplace(DBMS_DEFAULT_BUFFER_SIZE); + reader->set(memory->data(), memory->size()); + } + + size_t offset = file_segment.getCurrentWriteOffset(false); + while (!reader->eof()) + { + auto size = reader->available(); + + if (!file_segment.reserve(size)) + return; + + try + { + file_segment.write(reader->position(), size, offset); + offset += size; + } + catch (ErrnoException & e) + { + int code = e.getErrno(); + if (code == /* No space left on device */28 || code == /* Quota exceeded */122) + { + LOG_INFO(log, "Insert into cache is skipped due to insufficient disk space. ({})", e.displayText()); + continue; + } + throw; + } + } + } +} + +void CacheMetadata::cancelDownload() +{ + download_queue->cancel(); + download_queue->cv.notify_all(); +} + LockedKey::LockedKey(std::shared_ptr key_metadata_) : key_metadata(key_metadata_) , lock(key_metadata->guard.lock()) @@ -426,6 +544,14 @@ void LockedKey::shrinkFileSegmentToDownloadedSize( chassert(file_segment->assertCorrectnessUnlocked(segment_lock)); } +void LockedKey::addToDownloadQueue(size_t offset, const FileSegmentGuard::Lock &) +{ + auto it = key_metadata->find(offset); + if (it == key_metadata->end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is not offset {}", offset); + key_metadata->download_queue.add(it->second->file_segment); +} + std::shared_ptr LockedKey::getByOffset(size_t offset) const { auto it = key_metadata->find(offset); diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index 3fd6176f201..f96243c3f1f 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -8,8 +8,12 @@ namespace DB { + class CleanupQueue; using CleanupQueuePtr = std::shared_ptr; +class DownloadQueue; +using DownloadQueuePtr = std::shared_ptr; +using FileSegmentsHolderPtr = std::unique_ptr; struct FileSegmentMetadata : private boost::noncopyable @@ -44,6 +48,7 @@ struct KeyMetadata : public std::map, const Key & key_, const std::string & key_path_, CleanupQueue & cleanup_queue_, + DownloadQueue & download_queue_, bool created_base_directory_ = false); enum class KeyState @@ -69,6 +74,7 @@ private: KeyState key_state = KeyState::ACTIVE; KeyGuard guard; CleanupQueue & cleanup_queue; + DownloadQueue & download_queue; std::atomic created_base_directory = false; }; @@ -109,10 +115,15 @@ public: void doCleanup(); + void downloadThreadFunc(); + + void cancelDownload(); + private: const std::string path; /// Cache base path CacheMetadataGuard guard; const CleanupQueuePtr cleanup_queue; + const DownloadQueuePtr download_queue; Poco::Logger * log; }; @@ -159,6 +170,8 @@ struct LockedKey : private boost::noncopyable void shrinkFileSegmentToDownloadedSize(size_t offset, const FileSegmentGuard::Lock &); + void addToDownloadQueue(size_t offset, const FileSegmentGuard::Lock &); + bool isLastOwnerOfFileSegment(size_t offset) const; void removeFromCleanupQueue(); From 4096c082b9f7bd49664e6341e4862c1aed3d7a73 Mon Sep 17 00:00:00 2001 From: Yuriy Chernyshov Date: Wed, 14 Jun 2023 15:09:17 +0300 Subject: [PATCH 0356/1997] Cleanup RE2_SOURCES --- contrib/re2-cmake/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/contrib/re2-cmake/CMakeLists.txt b/contrib/re2-cmake/CMakeLists.txt index 19939c11ebf..a081f92bc94 100644 --- a/contrib/re2-cmake/CMakeLists.txt +++ b/contrib/re2-cmake/CMakeLists.txt @@ -28,7 +28,6 @@ set(RE2_SOURCES ${SRC_DIR}/re2/regexp.cc ${SRC_DIR}/re2/set.cc ${SRC_DIR}/re2/simplify.cc - ${SRC_DIR}/re2/stringpiece.cc ${SRC_DIR}/re2/tostring.cc ${SRC_DIR}/re2/unicode_casefold.cc ${SRC_DIR}/re2/unicode_groups.cc From 7d59af1f776f9868f5b45c0c78e9e5a49d118dd3 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 14 Jun 2023 13:19:03 +0200 Subject: [PATCH 0357/1997] test compressed write to S3 --- src/IO/BrotliWriteBuffer.cpp | 5 +--- src/IO/BrotliWriteBuffer.h | 1 + src/IO/Bzip2WriteBuffer.cpp | 5 +--- src/IO/LZMADeflatingWriteBuffer.cpp | 5 ---- src/IO/LZMADeflatingWriteBuffer.h | 2 -- src/IO/Lz4DeflatingWriteBuffer.cpp | 15 +++++----- src/IO/Lz4DeflatingWriteBuffer.h | 2 -- src/IO/ZlibDeflatingWriteBuffer.cpp | 12 -------- src/IO/ZlibDeflatingWriteBuffer.h | 2 -- src/IO/ZstdDeflatingWriteBuffer.cpp | 6 ---- src/IO/ZstdDeflatingWriteBuffer.h | 2 -- .../test_checking_s3_blobs_paranoid/test.py | 28 ++++++++++++------- 12 files changed, 29 insertions(+), 56 deletions(-) diff --git a/src/IO/BrotliWriteBuffer.cpp b/src/IO/BrotliWriteBuffer.cpp index 47426d62a6e..a19c6770dad 100644 --- a/src/IO/BrotliWriteBuffer.cpp +++ b/src/IO/BrotliWriteBuffer.cpp @@ -42,10 +42,7 @@ BrotliWriteBuffer::BrotliWriteBuffer(std::unique_ptr out_, int comp BrotliEncoderSetParameter(brotli->state, BROTLI_PARAM_LGWIN, 24); } -BrotliWriteBuffer::~BrotliWriteBuffer() -{ - finalize(); -} +BrotliWriteBuffer::~BrotliWriteBuffer() = default; void BrotliWriteBuffer::nextImpl() { diff --git a/src/IO/BrotliWriteBuffer.h b/src/IO/BrotliWriteBuffer.h index e03fa1507ba..8cbc78bd9e7 100644 --- a/src/IO/BrotliWriteBuffer.h +++ b/src/IO/BrotliWriteBuffer.h @@ -27,6 +27,7 @@ private: class BrotliStateWrapper; std::unique_ptr brotli; + size_t in_available; const uint8_t * in_data; diff --git a/src/IO/Bzip2WriteBuffer.cpp b/src/IO/Bzip2WriteBuffer.cpp index 4b6bed70d35..b84cbdd1e41 100644 --- a/src/IO/Bzip2WriteBuffer.cpp +++ b/src/IO/Bzip2WriteBuffer.cpp @@ -45,10 +45,7 @@ Bzip2WriteBuffer::Bzip2WriteBuffer(std::unique_ptr out_, int compre { } -Bzip2WriteBuffer::~Bzip2WriteBuffer() -{ - finalize(); -} +Bzip2WriteBuffer::~Bzip2WriteBuffer() = default; void Bzip2WriteBuffer::nextImpl() { diff --git a/src/IO/LZMADeflatingWriteBuffer.cpp b/src/IO/LZMADeflatingWriteBuffer.cpp index 30e247b1016..c534a247bc3 100644 --- a/src/IO/LZMADeflatingWriteBuffer.cpp +++ b/src/IO/LZMADeflatingWriteBuffer.cpp @@ -44,11 +44,6 @@ LZMADeflatingWriteBuffer::LZMADeflatingWriteBuffer( LZMA_VERSION_STRING); } -LZMADeflatingWriteBuffer::~LZMADeflatingWriteBuffer() -{ - finalize(); -} - void LZMADeflatingWriteBuffer::nextImpl() { if (!offset()) diff --git a/src/IO/LZMADeflatingWriteBuffer.h b/src/IO/LZMADeflatingWriteBuffer.h index 2e135455e00..5a0864d6071 100644 --- a/src/IO/LZMADeflatingWriteBuffer.h +++ b/src/IO/LZMADeflatingWriteBuffer.h @@ -21,8 +21,6 @@ public: char * existing_memory = nullptr, size_t alignment = 0); - ~LZMADeflatingWriteBuffer() override; - private: void nextImpl() override; diff --git a/src/IO/Lz4DeflatingWriteBuffer.cpp b/src/IO/Lz4DeflatingWriteBuffer.cpp index c3a1b8282c3..32241cb3b1a 100644 --- a/src/IO/Lz4DeflatingWriteBuffer.cpp +++ b/src/IO/Lz4DeflatingWriteBuffer.cpp @@ -40,11 +40,6 @@ Lz4DeflatingWriteBuffer::Lz4DeflatingWriteBuffer( LZ4F_VERSION); } -Lz4DeflatingWriteBuffer::~Lz4DeflatingWriteBuffer() -{ - finalize(); -} - void Lz4DeflatingWriteBuffer::nextImpl() { if (!offset()) @@ -107,8 +102,14 @@ void Lz4DeflatingWriteBuffer::nextImpl() if (LZ4F_isError(compressed_size)) throw Exception( ErrorCodes::LZ4_ENCODER_FAILED, - "LZ4 failed to encode stream. LZ4F version: {}", - LZ4F_VERSION); + "LZ4 failed to encode stream. LZ4F version: {}, CodeName: {}," + " in_capacity: {}, out_capacity: {}, cur_buffer_size: {}, min_compressed_block_size: {}", + LZ4F_VERSION, + LZ4F_getErrorName(compressed_size), + in_capacity, + out_capacity, + cur_buffer_size, + min_compressed_block_size); in_capacity -= cur_buffer_size; in_data = reinterpret_cast(working_buffer.end() - in_capacity); diff --git a/src/IO/Lz4DeflatingWriteBuffer.h b/src/IO/Lz4DeflatingWriteBuffer.h index 68873b5f8ee..a6440f8854f 100644 --- a/src/IO/Lz4DeflatingWriteBuffer.h +++ b/src/IO/Lz4DeflatingWriteBuffer.h @@ -21,8 +21,6 @@ public: char * existing_memory = nullptr, size_t alignment = 0); - ~Lz4DeflatingWriteBuffer() override; - private: void nextImpl() override; diff --git a/src/IO/ZlibDeflatingWriteBuffer.cpp b/src/IO/ZlibDeflatingWriteBuffer.cpp index 43bb0405555..d433bdbecd2 100644 --- a/src/IO/ZlibDeflatingWriteBuffer.cpp +++ b/src/IO/ZlibDeflatingWriteBuffer.cpp @@ -72,18 +72,6 @@ void ZlibDeflatingWriteBuffer::nextImpl() } } -ZlibDeflatingWriteBuffer::~ZlibDeflatingWriteBuffer() -{ - try - { - finalize(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } -} - void ZlibDeflatingWriteBuffer::finalizeBefore() { next(); diff --git a/src/IO/ZlibDeflatingWriteBuffer.h b/src/IO/ZlibDeflatingWriteBuffer.h index 58e709b54e6..05d6e528a23 100644 --- a/src/IO/ZlibDeflatingWriteBuffer.h +++ b/src/IO/ZlibDeflatingWriteBuffer.h @@ -24,8 +24,6 @@ public: char * existing_memory = nullptr, size_t alignment = 0); - ~ZlibDeflatingWriteBuffer() override; - private: void nextImpl() override; diff --git a/src/IO/ZstdDeflatingWriteBuffer.cpp b/src/IO/ZstdDeflatingWriteBuffer.cpp index c6d2ffc39f9..097647cbafc 100644 --- a/src/IO/ZstdDeflatingWriteBuffer.cpp +++ b/src/IO/ZstdDeflatingWriteBuffer.cpp @@ -30,12 +30,6 @@ ZstdDeflatingWriteBuffer::ZstdDeflatingWriteBuffer( output = {nullptr, 0, 0}; } - -ZstdDeflatingWriteBuffer::~ZstdDeflatingWriteBuffer() -{ - finalize(); -} - void ZstdDeflatingWriteBuffer::nextImpl() { if (!offset()) diff --git a/src/IO/ZstdDeflatingWriteBuffer.h b/src/IO/ZstdDeflatingWriteBuffer.h index ba83c18d354..11e34e2890f 100644 --- a/src/IO/ZstdDeflatingWriteBuffer.h +++ b/src/IO/ZstdDeflatingWriteBuffer.h @@ -21,8 +21,6 @@ public: char * existing_memory = nullptr, size_t alignment = 0); - ~ZstdDeflatingWriteBuffer() override; - void sync() override { out->sync(); diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index c0f184815c9..244ca8a2c81 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -84,19 +84,23 @@ def get_counters(node, query_id, log_type="ExceptionWhileProcessing"): ] -def test_upload_s3_fail_create_multi_part_upload(cluster, broken_s3): +# Add "lz4" compression method in the list after https://github.com/ClickHouse/ClickHouse/issues/50975 is fixed +@pytest.mark.parametrize( + "compression", ["none", "gzip", "br", "xz", "zstd", "bz2", "deflate"] +) +def test_upload_s3_fail_create_multi_part_upload(cluster, broken_s3, compression): node = cluster.instances["node"] broken_s3.setup_error_at_create_multi_part_upload() - insert_query_id = "INSERT_INTO_TABLE_FUNCTION_FAIL_CREATE_MPU" + insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_FAIL_CREATE_MPU_{compression}" error = node.query_and_get_error( - """ + f""" INSERT INTO TABLE FUNCTION s3( 'http://resolver:8083/root/data/test_upload_s3_fail_create_multi_part_upload', 'minio', 'minio123', - 'CSV', auto, 'none' + 'CSV', auto, '{compression}' ) SELECT * @@ -111,7 +115,6 @@ def test_upload_s3_fail_create_multi_part_upload(cluster, broken_s3): assert "Code: 499" in error, error assert "mock s3 injected error" in error, error - assert "DB::WriteBufferFromS3::createMultipartUpload()" in error, error count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters( node, insert_query_id @@ -121,20 +124,26 @@ def test_upload_s3_fail_create_multi_part_upload(cluster, broken_s3): assert count_s3_errors == 1 -def test_upload_s3_fail_upload_part_when_multi_part_upload(cluster, broken_s3): +# Add "lz4" compression method in the list after https://github.com/ClickHouse/ClickHouse/issues/50975 is fixed +@pytest.mark.parametrize( + "compression", ["none", "gzip", "br", "xz", "zstd", "bz2", "deflate"] +) +def test_upload_s3_fail_upload_part_when_multi_part_upload( + cluster, broken_s3, compression +): node = cluster.instances["node"] broken_s3.setup_fake_multpartuploads() broken_s3.setup_error_at_part_upload(count=1, after=2) - insert_query_id = "INSERT_INTO_TABLE_FUNCTION_FAIL_UPLOAD_PART" + insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_FAIL_UPLOAD_PART_{compression}" error = node.query_and_get_error( - """ + f""" INSERT INTO TABLE FUNCTION s3( 'http://resolver:8083/root/data/test_upload_s3_fail_upload_part_when_multi_part_upload', 'minio', 'minio123', - 'CSV', auto, 'none' + 'CSV', auto, '{compression}' ) SELECT * @@ -149,7 +158,6 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload(cluster, broken_s3): assert "Code: 499" in error, error assert "mock s3 injected error" in error, error - assert "DB::WriteBufferFromS3::writePart" in error, error count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters( node, insert_query_id From 011d666073968b1a8cbbd867513e4e8adec1362b Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 14 Jun 2023 14:55:34 +0200 Subject: [PATCH 0358/1997] Fixed typo in tests --- tests/integration/test_storage_azure_blob_storage/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 0002ccbf483..0de325ccd14 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -594,4 +594,4 @@ def test_partition_by_tf(cluster): assert "1,2,3\n" == get_azure_file_content("test_partition_tf_3.csv") assert "3,2,1\n" == get_azure_file_content("test_partition_tf_1.csv") - assert "78,43,45\n" == get_azure_file_content("test_partition_tfs_45.csv") + assert "78,43,45\n" == get_azure_file_content("test_partition_tf_45.csv") From afcc3aca363ff6cee0cb7f2417b711e08854d96c Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 14 Jun 2023 13:14:59 +0000 Subject: [PATCH 0359/1997] Update ci_config.py --- tests/ci/ci_config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index d829115cfe1..36bca9d741d 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -325,6 +325,9 @@ CI_CONFIG = { "Integration tests (asan)": { "required_build": "package_asan", }, + "Integration tests (asan, analyzer)": { + "required_build": "package_asan", + }, "Integration tests (tsan)": { "required_build": "package_tsan", }, From fe8172fbd9c58fadc5c0523c69e5adce05887dd2 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 14 Jun 2023 13:17:04 +0000 Subject: [PATCH 0360/1997] Review fixes --- tests/ci/integration_test_check.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 523b1cfaab5..0d483c08456 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -71,7 +71,7 @@ def get_json_params_dict( } -def get_env_for_runner(build_path, repo_path, result_path, work_path): +def get_env_for_runner(check_name, build_path, repo_path, result_path, work_path): binary_path = os.path.join(build_path, "clickhouse") odbc_bridge_path = os.path.join(build_path, "clickhouse-odbc-bridge") library_bridge_path = os.path.join(build_path, "clickhouse-library-bridge") @@ -88,6 +88,9 @@ def get_env_for_runner(build_path, repo_path, result_path, work_path): my_env["CLICKHOUSE_TESTS_JSON_PARAMS_PATH"] = os.path.join(work_path, "params.json") my_env["CLICKHOUSE_TESTS_RUNNER_RESTART_DOCKER"] = "0" + if "analyzer" in check_name.lower(): + my_env["USE_NEW_ANALYZER"] = "1" + return my_env @@ -225,9 +228,7 @@ def main(): else: download_all_deb_packages(check_name, reports_path, build_path) - my_env = get_env_for_runner(build_path, repo_path, result_path, work_path) - if "analyzer" in check_name.lower(): - my_env["USE_NEW_ANALYZER"] = "1" + my_env = get_env_for_runner(check_name, build_path, repo_path, result_path, work_path) json_path = os.path.join(work_path, "params.json") with open(json_path, "w", encoding="utf-8") as json_params: From 14dfebba49543378b80716cffb5aaea7dcc7fbf7 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 14 Jun 2023 13:35:11 +0000 Subject: [PATCH 0361/1997] Fix links in MD --- docs/en/operations/configuration-files.md | 2 +- docs/ru/operations/configuration-files.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index b5d52acca49..71d5885058a 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -56,7 +56,7 @@ Substitutions can also be performed from ZooKeeper. To do this, specify the attr ## Decryption {#decryption} -Elements with text nodes may be encrypted with [encryption codecs](../../sql-reference/statements/create/table.md#encryption-codecs). In this case `` section should be included in configuration file and each element node with encrypted text should have `encryption_codec` attribute with name of codec. +Elements with text nodes may be encrypted with [encryption codecs](../sql-reference/statements/create/table.md#encryption-codecs). In this case `` section should be included in configuration file and each element node with encrypted text should have `encryption_codec` attribute with name of codec. Example: diff --git a/docs/ru/operations/configuration-files.md b/docs/ru/operations/configuration-files.md index 96512fbbe23..df50d900919 100644 --- a/docs/ru/operations/configuration-files.md +++ b/docs/ru/operations/configuration-files.md @@ -87,7 +87,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml ## Расшифровка {#decryption} -Элементы с текстовыми узлами могут быть зашифрован с помощью [кодеков шифрования](../../sql-reference/statements/create/table.md#encryption-codecs). В этом случае секция `` должна быть включена в конфигурационный файл и каждый элемент с зашифрованным текстом должен иметь аттрибут `encryption_codec` с именем кодека. +Элементы с текстовыми узлами могут быть зашифрован с помощью [кодеков шифрования](../sql-reference/statements/create/table.md#create-query-encryption-codecs). В этом случае секция `` должна быть включена в конфигурационный файл и каждый элемент с зашифрованным текстом должен иметь аттрибут `encryption_codec` с именем кодека. Пример: From 1230519bec047857d7fb9b1edd6baec1a7be8e6a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 14 Jun 2023 13:38:44 +0000 Subject: [PATCH 0362/1997] Automatic style fix --- tests/ci/integration_test_check.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 0d483c08456..843bbc8b3ee 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -228,7 +228,9 @@ def main(): else: download_all_deb_packages(check_name, reports_path, build_path) - my_env = get_env_for_runner(check_name, build_path, repo_path, result_path, work_path) + my_env = get_env_for_runner( + check_name, build_path, repo_path, result_path, work_path + ) json_path = os.path.join(work_path, "params.json") with open(json_path, "w", encoding="utf-8") as json_params: From 7f8162e3465318bb9847956edffcd67a4fdb2020 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Wed, 14 Jun 2023 10:42:20 -0400 Subject: [PATCH 0363/1997] add note regarding -MapState --- docs/en/sql-reference/aggregate-functions/combinators.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/en/sql-reference/aggregate-functions/combinators.md b/docs/en/sql-reference/aggregate-functions/combinators.md index fd693430064..a395b350a55 100644 --- a/docs/en/sql-reference/aggregate-functions/combinators.md +++ b/docs/en/sql-reference/aggregate-functions/combinators.md @@ -97,6 +97,10 @@ Result: If you apply this combinator, the aggregate function does not return the resulting value (such as the number of unique values for the [uniq](../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function), but an intermediate state of the aggregation (for `uniq`, this is the hash table for calculating the number of unique values). This is an `AggregateFunction(...)` that can be used for further processing or stored in a table to finish aggregating later. +:::note +Please notice, that -MapState is not an invariant for the same data due to the fact that order of data in intermediate state can change, though it doesn't impact ingestion of this data. +::: + To work with these states, use: - [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engine. From 0131ca8768efd5840f330b2ef11acc72c35a36a2 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Wed, 14 Jun 2023 10:49:20 -0400 Subject: [PATCH 0364/1997] add note regarding -MapState --- docs/ru/sql-reference/aggregate-functions/combinators.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/ru/sql-reference/aggregate-functions/combinators.md b/docs/ru/sql-reference/aggregate-functions/combinators.md index 3a7ff571f99..99d5f11442c 100644 --- a/docs/ru/sql-reference/aggregate-functions/combinators.md +++ b/docs/ru/sql-reference/aggregate-functions/combinators.md @@ -66,6 +66,10 @@ WITH anySimpleState(number) AS c SELECT toTypeName(c), c FROM numbers(1); В случае применения этого комбинатора, агрегатная функция возвращает не готовое значение (например, в случае функции [uniq](reference/uniq.md#agg_function-uniq) — количество уникальных значений), а промежуточное состояние агрегации (например, в случае функции `uniq` — хэш-таблицу для расчёта количества уникальных значений), которое имеет тип `AggregateFunction(...)` и может использоваться для дальнейшей обработки или может быть сохранено в таблицу для последующей доагрегации. +:::note +Промежуточное состояние для -MapState не является инвариантом для одних и тех же исходных данные т.к. порядок данных может меняться. Это не влияет, тем не менее, на загрузку таких данных. +::: + Для работы с промежуточными состояниями предназначены: - Движок таблиц [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md). From 3d64cf4423b9fb4b935786eca392875d3b66c17c Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 14 Jun 2023 15:40:32 +0000 Subject: [PATCH 0365/1997] Add dbms in cmake --- src/Common/Config/CMakeLists.txt | 2 ++ utils/config-processor/CMakeLists.txt | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Common/Config/CMakeLists.txt b/src/Common/Config/CMakeLists.txt index ec7bdd10196..fdcba5d4a4b 100644 --- a/src/Common/Config/CMakeLists.txt +++ b/src/Common/Config/CMakeLists.txt @@ -15,6 +15,7 @@ target_link_libraries(clickhouse_common_config Poco::XML PRIVATE string_utils + dbms ) add_library(clickhouse_common_config_no_zookeeper_log ${SRCS}) @@ -25,6 +26,7 @@ target_link_libraries(clickhouse_common_config_no_zookeeper_log Poco::XML PRIVATE string_utils + dbms ) if (TARGET ch_contrib::yaml_cpp) diff --git a/utils/config-processor/CMakeLists.txt b/utils/config-processor/CMakeLists.txt index 80c3535ef4e..53b6163ba87 100644 --- a/utils/config-processor/CMakeLists.txt +++ b/utils/config-processor/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable (config-processor config-processor.cpp) -target_link_libraries(config-processor PRIVATE dbms) +target_link_libraries(config-processor PRIVATE clickhouse_common_config_no_zookeeper_log) From 45328a41e81be49e7755a091d68433e2e232ff11 Mon Sep 17 00:00:00 2001 From: tpanetti Date: Wed, 14 Jun 2023 09:31:21 -0700 Subject: [PATCH 0366/1997] Fix test for MySQL Compatible Types to use clickhouse_client rather than clickhouse_local --- .../02775_show_columns_mysql_compatibility.sh | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh index e324926e2e7..9aa199e8913 100755 --- a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh +++ b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh @@ -14,12 +14,12 @@ PORT=9004 # First run the clickhouse test to create the ClickHouse Tables echo "Drop tables if they exist" -${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS tab" -${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS database_123456789abcde" -${CLICKHOUSE_LOCAL} --query "DROP TABLE IF EXISTS database_123456789abcde.tab" +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS tab" +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS database_123456789abcdef" +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS database_123456789abcdef.tab" echo "Create tab table " -${CLICKHOUSE_LOCAL} -n -q " +${CLICKHOUSE_CLIENT} -n -q " SET allow_suspicious_low_cardinality_types=1; SET allow_experimental_object_type=1; CREATE TABLE tab @@ -70,13 +70,13 @@ ${CLICKHOUSE_LOCAL} -n -q " echo "Create pseudo-random database name" -${CLICKHOUSE_LOCAL} --query "CREATE DATABASE database_123456789abcde;" +${CLICKHOUSE_CLIENT} --query "CREATE DATABASE database_123456789abcdef;" echo "Create tab duplicate table" -${CLICKHOUSE_LOCAL} -n -q " +${CLICKHOUSE_CLIENT} -n -q " SET allow_suspicious_low_cardinality_types=1; SET allow_experimental_object_type =1; - CREATE TABLE database_123456789abcde.tab + CREATE TABLE database_123456789abcdef.tab ( uint8 UInt8, uint16 UInt16, @@ -138,9 +138,9 @@ SHOW COLUMNS FROM tab NOT ILIKE '%INT%'; SHOW COLUMNS FROM tab WHERE field LIKE '%int%'; SHOW COLUMNS FROM tab LIMIT 1; SHOW COLUMNS FROM tab; -SHOW COLUMNS FROM tab FROM database_123456789abcde; -SHOW COLUMNS FROM database_123456789abcde.tab; -DROP DATABASE database_123456789abcde; +SHOW COLUMNS FROM tab FROM database_123456789abcdef; +SHOW COLUMNS FROM database_123456789abcdef.tab; +DROP DATABASE database_123456789abcdef; DROP TABLE tab; EOT From 69594862c9cd9b598164cf2f8afd9161f5d3f445 Mon Sep 17 00:00:00 2001 From: tpanetti Date: Wed, 14 Jun 2023 09:34:56 -0700 Subject: [PATCH 0367/1997] Fix test for MySQL Compatible types (drop database properly) --- .../0_stateless/02775_show_columns_mysql_compatibility.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh index 9aa199e8913..89881a4cf60 100755 --- a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh +++ b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh @@ -15,7 +15,7 @@ PORT=9004 echo "Drop tables if they exist" ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS tab" -${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS database_123456789abcdef" +${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS database_123456789abcdef" ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS database_123456789abcdef.tab" echo "Create tab table " From a91fc3ddb33865d2db8170ff96e636de293b323a Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 14 Jun 2023 16:44:31 +0000 Subject: [PATCH 0368/1997] Add docs/ add more cases in test --- docs/en/interfaces/formats.md | 3 +- .../operations/settings/settings-formats.md | 5 +++ docs/ru/interfaces/formats.md | 4 +- docs/ru/operations/settings/settings.md | 8 +++- src/Core/Settings.h | 2 +- src/Formats/FormatFactory.cpp | 2 +- src/Formats/FormatSettings.h | 2 +- .../Formats/Impl/CSVRowInputFormat.cpp | 39 +++++++++---------- .../RowInputFormatWithNamesAndTypes.cpp | 4 ++ tests/queries/0_stateless/00301_csv.reference | 10 +++-- tests/queries/0_stateless/00301_csv.sh | 13 ++++--- 11 files changed, 56 insertions(+), 36 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 324930e248f..950692deb77 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -470,6 +470,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_csv_detect_header) - automatically detect header with names and types in CSV format. Default value - `true`. - [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. +- [input_format_csv_ignore_extra_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_ignore_extra_columns) - ignore extra colums in CSV input. Default value - `false`. ## CSVWithNames {#csvwithnames} @@ -2062,7 +2063,7 @@ Special format for reading Parquet file metadata (https://parquet.apache.org/doc - logical_type - column logical type - compression - compression used for this column - total_uncompressed_size - total uncompressed bytes size of the column, calculated as the sum of total_uncompressed_size of the column from all row groups - - total_compressed_size - total compressed bytes size of the column, calculated as the sum of total_compressed_size of the column from all row groups + - total_compressed_size - total compressed bytes size of the column, calculated as the sum of total_compressed_size of the column from all row groups - space_saved - percent of space saved by compression, calculated as (1 - total_compressed_size/total_uncompressed_size). - encodings - the list of encodings used for this column - row_groups - the list of row groups metadata with the next structure: diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 26501f3f3f6..e721c9408e3 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -931,6 +931,11 @@ Result ```text " string " ``` +### input_format_csv_ignore_extra_columns {#input_format_csv_ignore_extra_columns} + +Ignore extra colums in CSV input. + +Disabled by default. ## Values format settings {#values-format-settings} diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 48a6132170a..8488f4ce55a 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -401,8 +401,8 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR - [output_format_csv_crlf_end_of_line](../operations/settings/settings.md#output_format_csv_crlf_end_of_line) - если установлено значение true, конец строки в формате вывода CSV будет `\r\n` вместо `\n`. Значение по умолчанию - `false`. - [input_format_csv_skip_first_lines](../operations/settings/settings.md#input_format_csv_skip_first_lines) - пропустить указанное количество строк в начале данных. Значение по умолчанию - `0`. - [input_format_csv_detect_header](../operations/settings/settings.md#input_format_csv_detect_header) - обнаружить заголовок с именами и типами в формате CSV. Значение по умолчанию - `true`. -- [input_format_csv_trim_whitespaces](../operations/settings/settings.md#input_format_csv_trim_whitespaces) - удалить пробелы и символы табуляции из строк без кавычек. -Значение по умолчанию - `true`. +- [input_format_csv_trim_whitespaces](../operations/settings/settings.md#input_format_csv_trim_whitespaces) - удалить пробелы и символы табуляции из строк без кавычек. Значение по умолчанию - `true`. +- [input_format_csv_ignore_extra_columns](../operations/settings/settings.md/#input_format_csv_ignore_extra_columns) - игнорировать дополнительные столбцы. Значение по умолчанию - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index e3da8302fc8..33d9300f8e1 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1686,7 +1686,7 @@ SELECT * FROM table_with_enum_column_for_csv_insert; ## input_format_csv_detect_header {#input_format_csv_detect_header} Обнаружить заголовок с именами и типами в формате CSV. - + Значение по умолчанию - `true`. ## input_format_csv_skip_first_lines {#input_format_csv_skip_first_lines} @@ -1727,6 +1727,12 @@ echo ' string ' | ./clickhouse local -q "select * from table FORMAT CSV" --in " string " ``` +## input_format_csv_ignore_extra_columns {#input_format_csv_ignore_extra_columns} + +Игнорировать дополнительные столбцы. + +Выключено по умолчанию. + ## output_format_tsv_crlf_end_of_line {#settings-output-format-tsv-crlf-end-of-line} Использовать в качестве разделителя строк для TSV формата CRLF (DOC/Windows стиль) вместо LF (Unix стиль). diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d38f7767252..9582419b98c 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -835,7 +835,6 @@ class IColumn; M(Bool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).", 0) \ M(Bool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, -WithNames, -WithNamesAndTypes formats).", IMPORTANT) \ M(Bool, input_format_csv_empty_as_default, true, "Treat empty fields in CSV input as default values.", 0) \ - M(Bool, input_format_csv_ignore_extra_columns, false, "", 0) \ M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \ M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices.", 0) \ M(Bool, input_format_null_as_default, true, "Initialize null fields with default values if the data type of this field is not nullable and it is supported by the input format", 0) \ @@ -1001,6 +1000,7 @@ class IColumn; M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \ \ M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \ + M(Bool, input_format_csv_ignore_extra_columns, false, "Ignore extra colums in CSV input", 0) \ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 0218d268c51..f29b55f7e73 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -63,7 +63,6 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.delimiter = settings.format_csv_delimiter; format_settings.csv.tuple_delimiter = settings.format_csv_delimiter; format_settings.csv.empty_as_default = settings.input_format_csv_empty_as_default; - format_settings.csv.ignore_extra_columns = settings.input_format_csv_ignore_extra_columns; format_settings.csv.enum_as_number = settings.input_format_csv_enum_as_number; format_settings.csv.null_representation = settings.format_csv_null_representation; format_settings.csv.arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv; @@ -72,6 +71,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.try_detect_header = settings.input_format_csv_detect_header; format_settings.csv.skip_trailing_empty_lines = settings.input_format_csv_skip_trailing_empty_lines; format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces; + format_settings.csv.ignore_extra_columns = settings.input_format_csv_ignore_extra_columns; format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter; format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter; format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 3bc53140fe5..38148bda373 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -128,7 +128,6 @@ struct FormatSettings bool allow_single_quotes = true; bool allow_double_quotes = true; bool empty_as_default = false; - bool ignore_extra_columns = false; bool crlf_end_of_line = false; bool enum_as_number = false; bool arrays_as_nested_csv = false; @@ -140,6 +139,7 @@ struct FormatSettings bool try_detect_header = true; bool skip_trailing_empty_lines = false; bool trim_whitespaces = true; + bool ignore_extra_columns = false; } csv; struct HiveText diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 0cc5889b732..8aaf8fd3e2f 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -288,6 +288,8 @@ bool CSVFormatReader::readField( const bool at_delimiter = !buf->eof() && *buf->position() == format_settings.csv.delimiter; const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n' || *buf->position() == '\r'); + bool res = false; + /// Note: Tuples are serialized in CSV as separate columns, but with empty_as_default or null_as_default /// only one empty or NULL column will be expected if (format_settings.csv.empty_as_default && (at_delimiter || at_last_column_line_end)) @@ -299,31 +301,28 @@ bool CSVFormatReader::readField( /// they do not contain empty unquoted fields, so this check /// works for tuples as well. column.insertDefault(); - return false; } - - auto skip_all = [&]() - { - if (!is_last_file_column || !format_settings.csv.ignore_extra_columns) - { - return; - } - //std::cout << "skip !!!" << std::endl; - buf->position() = find_first_symbols<'\n'>(buf->position(), buf->buffer().end()); - }; - if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) + else if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) { /// If value is null but type is not nullable then use default value instead. - bool res = SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization); - skip_all(); - return res; + res = SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization); + } + else + { + /// Read the column normally. + serialization->deserializeTextCSV(column, *buf, format_settings); + res = true; } - /// Read the column normally. - serialization->deserializeTextCSV(column, *buf, format_settings); - - skip_all(); - return true; + if (is_last_file_column && format_settings.csv.ignore_extra_columns) + { + while (checkChar(format_settings.csv.delimiter, *buf)) + { + skipField(); + skipWhitespacesAndTabs(*buf); + } + } + return res; } void CSVFormatReader::skipPrefixBeforeHeader() diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp index eaedbbb4a1e..24bf1d0d595 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp @@ -212,8 +212,12 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE format_reader->skipRowStartDelimiter(); ext.read_columns.resize(data_types.size()); + //std::cout << "col size " << column_mapping->column_indexes_for_input_fields.size() << std::endl; for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column) { + // std::cout << " file_column " << file_column << column_mapping->names_of_columns[file_column] << std::endl; + + const auto & column_index = column_mapping->column_indexes_for_input_fields[file_column]; const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size(); if (column_index) diff --git a/tests/queries/0_stateless/00301_csv.reference b/tests/queries/0_stateless/00301_csv.reference index 61279f3b84a..3dbe3116bea 100644 --- a/tests/queries/0_stateless/00301_csv.reference +++ b/tests/queries/0_stateless/00301_csv.reference @@ -11,7 +11,9 @@ default-eof 1 2019-06-19 2016-01-01 01:02:03 NUL 2016-01-02 01:02:03 Nhello \N \N -Hello world 1 2016-01-01 -Hello world 2 2016-01-02 -Hello world 3 2016-01-03 -Hello world 4 2016-01-04 +Hello 1 String1 +Hello 2 String2 +Hello 3 String3 +Hello 4 String4 +Hello 5 String5 +Hello 6 String6 diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index e99c39a0f6f..fafe75f6f63 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -39,11 +39,14 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s NULLS LAST"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; -$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 1, d Date DEFAULT '2019-06-19') ENGINE = Memory"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 3, d String DEFAULT 'String4') ENGINE = Memory"; -echo 'Hello world, 1, 2016-01-01 -Hello world, 2 ,2016-01-02, -Hello world, 3 ,2016-01-03, 2016-01-13 -Hello world, 4 ,2016-01-04, 2016-01-14, 2016-01-15' | $CLICKHOUSE_CLIENT --input_format_csv_empty_as_default=1 --input_format_csv_ignore_extra_columns=1 --query="INSERT INTO csv FORMAT CSV"; +echo 'Hello, 1, String1 +Hello, 2, String2, +Hello, 3, String3, 2016-01-13 +Hello, 4, , 2016-01-14 +Hello, 5, String5, 2016-01-15, 2016-01-16 +Hello, 6, String6, "line with a +break"' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_empty_as_default=1 --input_format_csv_ignore_extra_columns=1 --query="INSERT INTO csv FORMAT CSV"; $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s, n"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; \ No newline at end of file From 96fb7f04cbc94c625229afd2958cea056c563920 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 14 Jun 2023 15:11:43 +0200 Subject: [PATCH 0369/1997] Finish download of partially downloaded file segments in the background instead of resizing --- src/Common/CurrentMetrics.cpp | 1 + src/Interpreters/Cache/FileCache.cpp | 38 ++--- src/Interpreters/Cache/FileCache.h | 1 + src/Interpreters/Cache/FileCacheSettings.cpp | 3 + src/Interpreters/Cache/FileCacheSettings.h | 3 +- src/Interpreters/Cache/FileCache_fwd.h | 1 + src/Interpreters/Cache/FileSegment.cpp | 20 ++- src/Interpreters/Cache/Metadata.cpp | 154 ++++++++++++------ src/Interpreters/Cache/Metadata.h | 2 + ...02789_filesystem_cache_alignment.reference | 7 + .../02789_filesystem_cache_alignment.sh | 125 ++++++++++++++ 11 files changed, 264 insertions(+), 91 deletions(-) create mode 100644 tests/queries/0_stateless/02789_filesystem_cache_alignment.reference create mode 100755 tests/queries/0_stateless/02789_filesystem_cache_alignment.sh diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 61725d079bf..b479eac3021 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -188,6 +188,7 @@ M(CacheDetachedFileSegments, "Number of existing detached cache file segments") \ M(FilesystemCacheSize, "Filesystem cache size in bytes") \ M(FilesystemCacheElements, "Filesystem cache elements (file segments)") \ + M(FilesystemCacheDownloadQueueElements, "Filesystem cache elements in download queue") \ M(AsyncInsertCacheSize, "Number of async insert hash id in cache") \ M(S3Requests, "S3 requests") \ M(KeeperAliveConnections, "Number of alive connections") \ diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 65f8ecf7e89..c090d3accd4 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -51,6 +51,7 @@ FileCache::FileCache(const FileCacheSettings & settings) , bypass_cache_threshold(settings.enable_bypass_cache_with_threashold ? settings.bypass_cache_threashold : 0) , delayed_cleanup_interval_ms(settings.delayed_cleanup_interval_ms) , boundary_alignment(settings.boundary_alignment) + , background_download_threads(settings.background_download_threads) , log(&Poco::Logger::get("FileCache")) , metadata(settings.base_path) { @@ -125,12 +126,10 @@ void FileCache::initialize() is_initialized = true; - size_t num_threads=2; - for (size_t i = 0; i < num_threads; ++i) + for (size_t i = 0; i < background_download_threads; ++i) download_threads.emplace_back([this] { metadata.downloadThreadFunc(); }); - auto & schedule_pool = Context::getGlobalContextInstance()->getSchedulePool(); - cleanup_task = schedule_pool.createTask("FileCacheCleanup", [this]{ cleanupThreadFunc(); }); + cleanup_task = Context::getGlobalContextInstance()->getSchedulePool().createTask("FileCacheCleanup", [this]{ cleanupThreadFunc(); }); cleanup_task->activate(); cleanup_task->scheduleAfter(delayed_cleanup_interval_ms); } @@ -644,27 +643,14 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) if (releasable) { - auto segment = segment_metadata->file_segment; - if (segment->state() == FileSegment::State::DOWNLOADED) - { - const auto & key = segment->key(); + const auto & key = segment_metadata->file_segment->key(); + auto it = to_delete.find(key); + if (it == to_delete.end()) + it = to_delete.emplace(key, locked_key.getKeyMetadata()).first; + it->second.add(segment_metadata); - auto it = to_delete.find(key); - if (it == to_delete.end()) - it = to_delete.emplace(key, locked_key.getKeyMetadata()).first; - it->second.add(segment_metadata); - - freeable_space += segment_metadata->size(); - freeable_count += 1; - - return PriorityIterationResult::CONTINUE; - } - - ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedFileSegments); - ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedBytes, segment->getDownloadedSize(false)); - - locked_key.removeFileSegment(segment->offset(), segment->lock()); - return PriorityIterationResult::REMOVE_AND_CONTINUE; + freeable_space += segment_metadata->size(); + freeable_count += 1; } return PriorityIterationResult::CONTINUE; }; @@ -1005,10 +991,6 @@ void FileCache::cleanupThreadFunc() { try { -#ifdef ABORT_ON_LOGICAL_ERROR - assertCacheCorrectness(); -#endif - cleanup(); } catch (...) diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index ed693b475ec..931cecf607b 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -135,6 +135,7 @@ private: const size_t bypass_cache_threshold = 0; const size_t delayed_cleanup_interval_ms; const size_t boundary_alignment; + const size_t background_download_threads; Poco::Logger * log; diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp index d41aa8ffcb9..bc6e641c869 100644 --- a/src/Interpreters/Cache/FileCacheSettings.cpp +++ b/src/Interpreters/Cache/FileCacheSettings.cpp @@ -49,6 +49,9 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & if (config.has(config_prefix + ".boundary_alignment")) boundary_alignment = parseWithSizeSuffix(config.getString(config_prefix + ".boundary_alignment")); + if (config.has(config_prefix + ".background_download_threads")) + background_download_threads = config.getUInt(config_prefix + ".background_download_threads"); + delayed_cleanup_interval_ms = config.getUInt64(config_prefix + ".delayed_cleanup_interval_ms", FILECACHE_DELAYED_CLEANUP_INTERVAL_MS); } diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h index eeb2a02c131..fcc5c02c52e 100644 --- a/src/Interpreters/Cache/FileCacheSettings.h +++ b/src/Interpreters/Cache/FileCacheSettings.h @@ -28,7 +28,8 @@ struct FileCacheSettings size_t bypass_cache_threashold = FILECACHE_BYPASS_THRESHOLD; size_t delayed_cleanup_interval_ms = FILECACHE_DELAYED_CLEANUP_INTERVAL_MS; - size_t boundary_alignment = DBMS_DEFAULT_BUFFER_SIZE; + size_t boundary_alignment = FILECACHE_DEFAULT_MIN_FILE_SEGMENT_SIZE; + size_t background_download_threads = FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_THREADS; void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); }; diff --git a/src/Interpreters/Cache/FileCache_fwd.h b/src/Interpreters/Cache/FileCache_fwd.h index c791d08ed4f..902a6ff42d0 100644 --- a/src/Interpreters/Cache/FileCache_fwd.h +++ b/src/Interpreters/Cache/FileCache_fwd.h @@ -6,6 +6,7 @@ namespace DB static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 32 * 1024 * 1024; /// 32Mi static constexpr int FILECACHE_DEFAULT_MIN_FILE_SEGMENT_SIZE = 4 * 1024 * 1024; /// 4Mi +static constexpr int FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_THREADS = 2; static constexpr int FILECACHE_DEFAULT_MAX_ELEMENTS = 10000000; static constexpr int FILECACHE_DEFAULT_HITS_THRESHOLD = 0; static constexpr size_t FILECACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024; diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index f95379ba07f..5dd35a720c1 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -282,8 +282,11 @@ void FileSegment::resetRemoteFileReader() FileSegment::RemoteFileReaderPtr FileSegment::extractRemoteFileReader() { - if (download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION) + if (isCompleted(false) + || download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION) + { return std::move(remote_file_reader); + } return nullptr; } @@ -598,7 +601,6 @@ void FileSegment::complete() auto remove_from_cache = [&, this]() { - LOG_TEST(log, "Remove file segment {} (nothing downloaded)", range().toString()); locked_key->removeFileSegment(offset(), segment_lock); setDetachedState(segment_lock); }; @@ -625,6 +627,12 @@ void FileSegment::complete() chassert(!is_last_holder); break; } + case State::EMPTY: + { + if (is_last_holder) + remove_from_cache(); + break; + } case State::PARTIALLY_DOWNLOADED: { if (is_last_holder) @@ -637,12 +645,6 @@ void FileSegment::complete() } break; } - case State::EMPTY: - { - if (is_last_holder) - remove_from_cache(); - break; - } case State::PARTIALLY_DOWNLOADED_NO_CONTINUATION: { chassert(current_downloaded_size != range().size()); @@ -653,7 +655,7 @@ void FileSegment::complete() { remove_from_cache(); } - else if (download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION) + else { LOG_TEST(log, "Resize file segment {} to downloaded: {}", range().toString(), current_downloaded_size); diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 2a3803eb3d7..c3f428c3e08 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -1,11 +1,17 @@ #include #include #include +#include "Common/Exception.h" #include #include namespace fs = std::filesystem; +namespace CurrentMetrics +{ + extern const Metric FilesystemCacheDownloadQueueElements; +} + namespace DB { @@ -256,6 +262,7 @@ void CacheMetadata::doCleanup() continue; } + chassert(it->second->empty()); locked_metadata->markAsRemoved(); erase(it); LOG_DEBUG(log, "Key {} is removed from metadata", cleanup_key); @@ -306,6 +313,8 @@ public: std::lock_guard lock(mutex); queue.push(file_segment); } + + CurrentMetrics::add(CurrentMetrics::FilesystemCacheDownloadQueueElements); cv.notify_one(); } @@ -324,10 +333,10 @@ private: void CacheMetadata::downloadThreadFunc() { - std::optional> memory; while (true) { std::weak_ptr file_segment_weak; + { std::unique_lock lock(download_queue->mutex); @@ -344,67 +353,106 @@ void CacheMetadata::downloadThreadFunc() download_queue->queue.pop(); } + CurrentMetrics::sub(CurrentMetrics::FilesystemCacheDownloadQueueElements); + FileSegmentsHolderPtr holder; + try { - auto file_segment = file_segment_weak.lock(); - if (!file_segment - || file_segment->state() != FileSegment::State::PARTIALLY_DOWNLOADED) - continue; - - auto lock = lockKeyMetadata(file_segment->key(), KeyNotFoundPolicy::RETURN_NULL); - if (!lock) - continue; - - holder = std::make_unique(FileSegments{file_segment}); - } - - auto & file_segment = holder->front(); - chassert(file_segment.assertCorrectness()); - - if (file_segment.getOrSetDownloader() != FileSegment::getCallerId()) - continue; - - LOG_TRACE(log, "Downloading file segment: {}", file_segment.getInfoForLog()); - - auto reader = file_segment.getRemoteFileReader(); - - /// If remote_fs_read_method == 'threadpool', - /// reader iteself does not allocate the buffer, but uses the buffer passed to it. - /// So will need to allocate a buffer here as well. - if (reader->buffer().begin() == nullptr) - { - if (!memory) - memory.emplace(DBMS_DEFAULT_BUFFER_SIZE); - reader->set(memory->data(), memory->size()); - } - - size_t offset = file_segment.getCurrentWriteOffset(false); - while (!reader->eof()) - { - auto size = reader->available(); - - if (!file_segment.reserve(size)) - return; - - try { - file_segment.write(reader->position(), size, offset); - offset += size; - } - catch (ErrnoException & e) - { - int code = e.getErrno(); - if (code == /* No space left on device */28 || code == /* Quota exceeded */122) - { - LOG_INFO(log, "Insert into cache is skipped due to insufficient disk space. ({})", e.displayText()); + auto file_segment = file_segment_weak.lock(); + if (!file_segment + || file_segment->state() != FileSegment::State::PARTIALLY_DOWNLOADED) continue; - } - throw; + + auto lock = lockKeyMetadata(file_segment->key(), KeyNotFoundPolicy::RETURN_NULL); + if (!lock) + continue; + + holder = std::make_unique(FileSegments{file_segment}); + } + + downloadImpl(holder->front()); + } + catch (...) + { + if (holder) + { + const auto & file_segment = holder->front(); + LOG_ERROR( + log, "Error during background download of {}:{} ({}): {}", + file_segment.key(), file_segment.offset(), + file_segment.getInfoForLog(), getCurrentExceptionMessage(true)); + } + else + { + tryLogCurrentException(__PRETTY_FUNCTION__); + chassert(false); } } } } +void CacheMetadata::downloadImpl(FileSegment & file_segment) +{ + chassert(file_segment.assertCorrectness()); + + if (file_segment.getOrSetDownloader() != FileSegment::getCallerId()) + return; + + LOG_TEST( + log, "Downloading {} bytes for file segment {}", + file_segment.range().size() - file_segment.getDownloadedSize(false), file_segment.getInfoForLog()); + + auto reader = file_segment.getRemoteFileReader(); + + /// If remote_fs_read_method == 'threadpool', + /// reader iteself bever owns/allocates the buffer. + std::optional> memory; + if (reader->internalBuffer().empty()) + { + memory.emplace(DBMS_DEFAULT_BUFFER_SIZE); + reader->set(memory->data(), memory->size()); + } + + size_t offset = file_segment.getCurrentWriteOffset(false); + if (offset != static_cast(reader->getPosition())) + reader->seek(offset, SEEK_SET); + + while (!reader->eof()) + { + auto size = reader->available(); + + if (!file_segment.reserve(size)) + { + LOG_TEST( + log, "Failed to reserve space during background download " + "for {}:{} (downloaded size: {}/{})", + file_segment.key(), file_segment.offset(), + file_segment.getDownloadedSize(false), file_segment.range().size()); + return; + } + + try + { + file_segment.write(reader->position(), size, offset); + offset += size; + reader->position() += size; + } + catch (ErrnoException & e) + { + int code = e.getErrno(); + if (code == /* No space left on device */28 || code == /* Quota exceeded */122) + { + LOG_INFO(log, "Insert into cache is skipped due to insufficient disk space. ({})", e.displayText()); + return; + } + throw; + } + } + + LOG_TEST(log, "Downloaded file segment: {}", file_segment.getInfoForLog()); +} + void CacheMetadata::cancelDownload() { download_queue->cancel(); diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index f96243c3f1f..7e8fdef6641 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -125,6 +125,8 @@ private: const CleanupQueuePtr cleanup_queue; const DownloadQueuePtr download_queue; Poco::Logger * log; + + void downloadImpl(FileSegment & file_segment); }; diff --git a/tests/queries/0_stateless/02789_filesystem_cache_alignment.reference b/tests/queries/0_stateless/02789_filesystem_cache_alignment.reference new file mode 100644 index 00000000000..70e1fca6a65 --- /dev/null +++ b/tests/queries/0_stateless/02789_filesystem_cache_alignment.reference @@ -0,0 +1,7 @@ +0 +OK +OK +0 +0 +OK +OK diff --git a/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh b/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh new file mode 100755 index 00000000000..a8bb4440a4c --- /dev/null +++ b/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh @@ -0,0 +1,125 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -nm -q " +DROP TABLE IF EXISTS test; +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() +ORDER BY a +SETTINGS disk = disk(type = cache, + max_size = '1Gi', + max_file_segment_size = '40Mi', + boundary_alignment = '20Mi', + path = '$CLICKHOUSE_TEST_UNIQUE_NAME', + disk = 's3disk'); + +INSERT INTO test SELECT number, randomString(100) FROM numbers(1000000); +" + +QUERY_ID=$RANDOM +$CLICKHOUSE_CLIENT --query_id "$QUERY_ID" -nm -q " +SET enable_filesystem_cache_log = 1; +SYSTEM DROP FILESYSTEM CACHE; +SELECT * FROM test WHERE NOT ignore() LIMIT 1 FORMAT Null; +SYSTEM FLUSH LOGS; +" + +query=" +SELECT cache_path, file_size, + tupleElement(file_segment_range, 2) - tupleElement(file_segment_range, 1) + 1 as file_segment_size, + formatReadableSize(file_size) as formatted_file_size, + formatReadableSize(file_segment_size) as formatted_file_segment_size, + tupleElement(file_segment_range, 2) as end_offset +FROM ( + SELECT arrayJoin(cache_paths) AS cache_path, + local_path, + remote_path, + size as file_size + FROM system.remote_data_paths + WHERE endsWith(local_path, '.bin') +) AS data_paths +INNER JOIN system.filesystem_cache_log AS cache_log +ON data_paths.remote_path = cache_log.source_file_path +WHERE query_id = '$QUERY_ID' " + +# File segments cannot be less that 20Mi, +# except for last file segment in a file or if file size is less. +$CLICKHOUSE_CLIENT -nm -q " +SELECT count() FROM ($query) +WHERE file_segment_size < file_size +AND end_offset + 1 != file_size +AND file_segment_size < 20 * 1024 * 1024; +" + +all=$($CLICKHOUSE_CLIENT -nm -q " +SELECT count() FROM ($query) +WHERE file_segment_size < file_size AND end_offset + 1 != file_size; +") +#echo $all + +if [ "$all" -gt "10" ]; then + echo "OK" +else + echo "FAIL" +fi + +count=$($CLICKHOUSE_CLIENT -nm -q " +SELECT count() FROM ($query) +WHERE file_segment_size < file_size +AND end_offset + 1 != file_size +AND formatted_file_segment_size in ('20.00 MiB', '40.00 MiB') +") + +if [ "$count" = "$all" ]; then + echo "OK" +else + echo "FAIL" +fi + +query2=" +SELECT * +FROM (SELECT * FROM ($query)) AS cache_log +INNER JOIN system.filesystem_cache AS cache +ON cache_log.cache_path = cache.cache_path " + +$CLICKHOUSE_CLIENT -nm -q " +SELECT count() FROM ($query2) +WHERE file_segment_range_begin - file_segment_range_end + 1 < file_size +AND file_segment_range_end + 1 != file_size +AND downloaded_size < 20 * 1024 * 1024; +" + +$CLICKHOUSE_CLIENT -nm -q " +SELECT count() FROM ($query2) +WHERE file_segment_range_begin - file_segment_range_end + 1 < file_size +AND file_segment_range_end + 1 != file_size +AND formatReadableSize(downloaded_size) not in ('20.00 MiB', '40.00 MiB'); +" + +all=$($CLICKHOUSE_CLIENT -nm -q " +SELECT count() FROM ($query2) +WHERE file_segment_size < file_size AND file_segment_range_end + 1 != file_size; +") + +if [ "$all" -gt "10" ]; then + echo "OK" +else + echo "FAIL" +fi + +count2=$($CLICKHOUSE_CLIENT -nm -q " +SELECT count() FROM ($query2) +WHERE file_segment_range_begin - file_segment_range_end + 1 < file_size +AND file_segment_range_end + 1 != file_size +AND formatReadableSize(downloaded_size) in ('20.00 MiB', '40.00 MiB'); +") + +if [ "$count2" = "$all" ]; then + echo "OK" +else + echo "FAIL" +fi From d0d39c8f6142cf2b954b3125eca4491f82718524 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 14 Jun 2023 18:10:30 +0000 Subject: [PATCH 0370/1997] Trying to fix a test. --- src/Interpreters/ActionsVisitor.cpp | 29 ++++++- .../InterpreterSelectQueryAnalyzer.h | 2 + src/Interpreters/interpretSubquery.cpp | 85 ++++++++++++------- src/Interpreters/interpretSubquery.h | 5 ++ src/Planner/CollectSets.cpp | 1 + 5 files changed, 87 insertions(+), 35 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 59bbc74ca3a..34c3dab8926 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -3,7 +3,8 @@ #include #include #include -#include "Parsers/queryToString.h" +#include +#include #include #include @@ -55,6 +56,7 @@ #include #include #include +#include #include #include @@ -1394,7 +1396,18 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool if (no_subqueries) return {}; //std::cerr << queryToString(right_in_operand) << std::endl; - auto set_key = PreparedSetKey::forSubquery(right_in_operand->getTreeHash()); + PreparedSetKey set_key; + if (data.getContext()->getSettingsRef().allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter(right_in_operand, data.getContext(), SelectQueryOptions().analyze(true).subquery()); + auto query_tree = interpreter.getQueryTree(); + if (auto * query_node = query_tree->as()) + query_node->setIsSubquery(true); + // std::cerr << "============== " << interpreter.getQueryTree()->dumpTree() << std::endl; + set_key = PreparedSetKey::forSubquery(interpreter.getQueryTree()->getTreeHash()); + } + else + set_key = PreparedSetKey::forSubquery(right_in_operand->getTreeHash()); // std::cerr << set_key.toString() << std::endl; // std::cerr << data.prepared_sets->getSets().size() << std::endl; @@ -1446,8 +1459,16 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool * Also it doesn't make sense if it is GLOBAL IN or ordinary IN. */ { - auto interpreter = interpretSubquery(right_in_operand, data.getContext(), data.subquery_depth, {}); - subquery_for_set.createSource(*interpreter); + if (data.getContext()->getSettingsRef().allow_experimental_analyzer) + { + auto interpreter = interpretSubquery(right_in_operand, data.getContext(), data.subquery_depth); + subquery_for_set.source = std::make_unique(std::move(*interpreter).extractQueryPlan()); + } + else + { + auto interpreter = interpretSubquery(right_in_operand, data.getContext(), data.subquery_depth, {}); + subquery_for_set.createSource(*interpreter); + } } return data.prepared_sets->addFromSubquery(set_key, std::move(subquery_for_set), data.getContext()->getSettingsRef(), std::move(external_table_set)); diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.h b/src/Interpreters/InterpreterSelectQueryAnalyzer.h index 1e0ac737536..4434fabe746 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.h +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.h @@ -69,6 +69,8 @@ public: const Planner & getPlanner() const { return planner; } Planner & getPlanner() { return planner; } + const QueryTreeNodePtr & getQueryTree() const { return query_tree; } + private: ASTPtr query; ContextMutablePtr context; diff --git a/src/Interpreters/interpretSubquery.cpp b/src/Interpreters/interpretSubquery.cpp index 5f00be07fa5..b757127a14d 100644 --- a/src/Interpreters/interpretSubquery.cpp +++ b/src/Interpreters/interpretSubquery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -21,37 +22,10 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -std::shared_ptr interpretSubquery( - const ASTPtr & table_expression, ContextPtr context, size_t subquery_depth, const Names & required_source_columns) +static ASTPtr buildQueryAST(const ASTPtr & table_expression, const ContextPtr & context, SelectQueryOptions & subquery_options); + +static ContextPtr getSubqueryContext(const ContextPtr & context) { - auto subquery_options = SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth); - return interpretSubquery(table_expression, context, required_source_columns, subquery_options); -} - -std::shared_ptr interpretSubquery( - const ASTPtr & table_expression, ContextPtr context, const Names & required_source_columns, const SelectQueryOptions & options) -{ - if (auto * expr = table_expression->as()) - { - ASTPtr table; - if (expr->subquery) - table = expr->subquery; - else if (expr->table_function) - table = expr->table_function; - else if (expr->database_and_table_name) - table = expr->database_and_table_name; - - return interpretSubquery(table, context, required_source_columns, options); - } - - /// Subquery or table name. The name of the table is similar to the subquery `SELECT * FROM t`. - const auto * subquery = table_expression->as(); - const auto * function = table_expression->as(); - const auto * table = table_expression->as(); - - if (!subquery && !table && !function) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Table expression is undefined, Method: ExpressionAnalyzer::interpretSubquery."); - /** The subquery in the IN / JOIN section does not have any restrictions on the maximum size of the result. * Because the result of this query is not the result of the entire query. * Constraints work instead @@ -67,7 +41,56 @@ std::shared_ptr interpretSubquery( subquery_settings.extremes = false; subquery_context->setSettings(subquery_settings); + return subquery_context; +} + +std::shared_ptr interpretSubquery( + const ASTPtr & table_expression, ContextPtr context, size_t subquery_depth, const Names & required_source_columns) +{ + auto subquery_options = SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth); + return interpretSubquery(table_expression, context, required_source_columns, subquery_options); +} + +std::shared_ptr interpretSubquery( + const ASTPtr & table_expression, ContextPtr context, const Names & required_source_columns, const SelectQueryOptions & options) +{ auto subquery_options = options.subquery(); + auto query = buildQueryAST(table_expression, context, subquery_options); + auto subquery_context = getSubqueryContext(context); + return std::make_shared(query, subquery_context, subquery_options, required_source_columns); +} + +std::shared_ptr interpretSubquery( + const ASTPtr & table_expression, ContextPtr context, size_t subquery_depth) +{ + auto subquery_options = SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth).subquery(); + auto query = buildQueryAST(table_expression, context, subquery_options); + auto subquery_context = getSubqueryContext(context); + return std::make_shared(query, subquery_context, subquery_options); +} + +static ASTPtr buildQueryAST(const ASTPtr & table_expression, const ContextPtr & context, SelectQueryOptions & subquery_options) +{ + if (auto * expr = table_expression->as()) + { + ASTPtr table; + if (expr->subquery) + table = expr->subquery; + else if (expr->table_function) + table = expr->table_function; + else if (expr->database_and_table_name) + table = expr->database_and_table_name; + + return buildQueryAST(table, context, subquery_options); + } + + /// Subquery or table name. The name of the table is similar to the subquery `SELECT * FROM t`. + const auto * subquery = table_expression->as(); + const auto * function = table_expression->as(); + const auto * table = table_expression->as(); + + if (!subquery && !table && !function) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table expression is undefined, Method: ExpressionAnalyzer::interpretSubquery."); ASTPtr query; if (table || function) @@ -112,7 +135,7 @@ std::shared_ptr interpretSubquery( subquery_options.removeDuplicates(); } - return std::make_shared(query, subquery_context, subquery_options, required_source_columns); + return query; } } diff --git a/src/Interpreters/interpretSubquery.h b/src/Interpreters/interpretSubquery.h index 3836d1f7664..02d2003ea5f 100644 --- a/src/Interpreters/interpretSubquery.h +++ b/src/Interpreters/interpretSubquery.h @@ -6,6 +6,11 @@ namespace DB { +class InterpreterSelectQueryAnalyzer; + +std::shared_ptr interpretSubquery( + const ASTPtr & table_expression, ContextPtr context, size_t subquery_depth); + std::shared_ptr interpretSubquery( const ASTPtr & table_expression, ContextPtr context, size_t subquery_depth, const Names & required_source_columns); diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp index 68ad1ab78d3..90f3f074761 100644 --- a/src/Planner/CollectSets.cpp +++ b/src/Planner/CollectSets.cpp @@ -93,6 +93,7 @@ public: in_second_argument_node_type == QueryTreeNodeType::UNION || in_second_argument_node_type == QueryTreeNodeType::TABLE) { + // std::cerr << "======2======= " << in_second_argument->dumpTree() << std::endl; auto set_key = PreparedSetKey::forSubquery(in_second_argument->getTreeHash()); if (sets.getFuture(set_key)) return; From 87904976edc57ba13e011ae01e000602f653f776 Mon Sep 17 00:00:00 2001 From: tpanetti Date: Wed, 14 Jun 2023 13:04:20 -0700 Subject: [PATCH 0371/1997] Fix test for MySQL Compatible Types (suppress password warning) --- .../0_stateless/02775_show_columns_mysql_compatibility.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh index 89881a4cf60..c86519b6f5d 100755 --- a/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh +++ b/tests/queries/0_stateless/02775_show_columns_mysql_compatibility.sh @@ -146,7 +146,7 @@ EOT # Now run the MySQL test script on the ClickHouse DB echo "Run MySQL test" -${MYSQL_CLIENT} --user="$USER" --password="$PASSWORD" --host="$HOST" --port="$PORT" < $TEMP_FILE +MYSQL_PWD=$PASSWORD ${MYSQL_CLIENT} --user="$USER" --host="$HOST" --port="$PORT" < $TEMP_FILE # Clean up the temp file rm $TEMP_FILE From 0aa363f5254455ea3a351c9fcc9adc7c99701c53 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 14 Jun 2023 23:13:09 +0200 Subject: [PATCH 0372/1997] Fix style check --- src/Interpreters/Cache/Metadata.cpp | 2 +- tests/queries/0_stateless/02789_filesystem_cache_alignment.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index c3f428c3e08..b0009c6abc1 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -406,7 +406,7 @@ void CacheMetadata::downloadImpl(FileSegment & file_segment) auto reader = file_segment.getRemoteFileReader(); /// If remote_fs_read_method == 'threadpool', - /// reader iteself bever owns/allocates the buffer. + /// reader itself never owns/allocates the buffer. std::optional> memory; if (reader->internalBuffer().empty()) { diff --git a/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh b/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh index a8bb4440a4c..ca459153c03 100755 --- a/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh +++ b/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh @@ -15,7 +15,7 @@ SETTINGS disk = disk(type = cache, max_file_segment_size = '40Mi', boundary_alignment = '20Mi', path = '$CLICKHOUSE_TEST_UNIQUE_NAME', - disk = 's3disk'); + disk = 's3_disk'); INSERT INTO test SELECT number, randomString(100) FROM numbers(1000000); " From 8c610275c2e271cd3a506d207fd9cef938fcd258 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 14 Jun 2023 23:30:21 +0200 Subject: [PATCH 0373/1997] Review fixes --- src/Interpreters/Cache/FileCache.cpp | 1 - src/Interpreters/Cache/FileCacheSettings.h | 2 +- src/Interpreters/Cache/FileCache_fwd.h | 2 +- src/Interpreters/Cache/Metadata.cpp | 17 ++++++++++------- src/Interpreters/Cache/Metadata.h | 2 +- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index c090d3accd4..3b5ae9acb1b 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1014,7 +1014,6 @@ FileSegmentsHolderPtr FileCache::getSnapshot() { for (const auto & [_, file_segment_metadata] : locked_key) file_segments.push_back(FileSegment::getSnapshot(file_segment_metadata->file_segment)); - return true; }); return std::make_unique(std::move(file_segments), /* complete_on_dtor */false); } diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h index fcc5c02c52e..6820aff61e3 100644 --- a/src/Interpreters/Cache/FileCacheSettings.h +++ b/src/Interpreters/Cache/FileCacheSettings.h @@ -28,7 +28,7 @@ struct FileCacheSettings size_t bypass_cache_threashold = FILECACHE_BYPASS_THRESHOLD; size_t delayed_cleanup_interval_ms = FILECACHE_DELAYED_CLEANUP_INTERVAL_MS; - size_t boundary_alignment = FILECACHE_DEFAULT_MIN_FILE_SEGMENT_SIZE; + size_t boundary_alignment = FILECACHE_DEFAULT_FILE_SEGMENT_ALIGNMENT; size_t background_download_threads = FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_THREADS; void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); diff --git a/src/Interpreters/Cache/FileCache_fwd.h b/src/Interpreters/Cache/FileCache_fwd.h index 902a6ff42d0..4d4a1c1429c 100644 --- a/src/Interpreters/Cache/FileCache_fwd.h +++ b/src/Interpreters/Cache/FileCache_fwd.h @@ -5,7 +5,7 @@ namespace DB { static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 32 * 1024 * 1024; /// 32Mi -static constexpr int FILECACHE_DEFAULT_MIN_FILE_SEGMENT_SIZE = 4 * 1024 * 1024; /// 4Mi +static constexpr int FILECACHE_DEFAULT_FILE_SEGMENT_ALIGNMENT = 4 * 1024 * 1024; /// 4Mi static constexpr int FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_THREADS = 2; static constexpr int FILECACHE_DEFAULT_MAX_ELEMENTS = 10000000; static constexpr int FILECACHE_DEFAULT_HITS_THRESHOLD = 0; diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index b0009c6abc1..f94b8ae4db2 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -321,8 +321,11 @@ public: private: void cancel() { - std::lock_guard lock(mutex); - cancelled = true; + { + std::lock_guard lock(mutex); + cancelled = true; + } + cv.notify_all(); } std::mutex mutex; @@ -333,6 +336,7 @@ private: void CacheMetadata::downloadThreadFunc() { + std::optional> memory; while (true) { std::weak_ptr file_segment_weak; @@ -371,7 +375,7 @@ void CacheMetadata::downloadThreadFunc() holder = std::make_unique(FileSegments{file_segment}); } - downloadImpl(holder->front()); + downloadImpl(holder->front(), memory); } catch (...) { @@ -392,7 +396,7 @@ void CacheMetadata::downloadThreadFunc() } } -void CacheMetadata::downloadImpl(FileSegment & file_segment) +void CacheMetadata::downloadImpl(FileSegment & file_segment, std::optional> & memory) { chassert(file_segment.assertCorrectness()); @@ -407,10 +411,10 @@ void CacheMetadata::downloadImpl(FileSegment & file_segment) /// If remote_fs_read_method == 'threadpool', /// reader itself never owns/allocates the buffer. - std::optional> memory; if (reader->internalBuffer().empty()) { - memory.emplace(DBMS_DEFAULT_BUFFER_SIZE); + if (!memory) + memory.emplace(DBMS_DEFAULT_BUFFER_SIZE); reader->set(memory->data(), memory->size()); } @@ -456,7 +460,6 @@ void CacheMetadata::downloadImpl(FileSegment & file_segment) void CacheMetadata::cancelDownload() { download_queue->cancel(); - download_queue->cv.notify_all(); } LockedKey::LockedKey(std::shared_ptr key_metadata_) diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index 7e8fdef6641..f9f7dfccaa5 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -126,7 +126,7 @@ private: const DownloadQueuePtr download_queue; Poco::Logger * log; - void downloadImpl(FileSegment & file_segment); + void downloadImpl(FileSegment & file_segment, std::optional> & memory); }; From 0023bf69cbaf2ec756bca883e3bb6e9338e574de Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Jun 2023 12:22:50 +0200 Subject: [PATCH 0374/1997] Fix --- src/Interpreters/Cache/FileSegment.cpp | 18 ++++++++++--- src/Interpreters/Cache/Metadata.cpp | 3 +++ .../InterpreterDescribeCacheQuery.cpp | 27 ++++++++++++------- .../02344_describe_cache.reference | 4 +-- 4 files changed, 37 insertions(+), 15 deletions(-) diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 5dd35a720c1..4cc5e4307ad 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -635,13 +635,23 @@ void FileSegment::complete() } case State::PARTIALLY_DOWNLOADED: { + chassert(current_downloaded_size > 0); + if (is_last_holder) { - LOG_TEST( - log, "Submitted file segment for background download " - "(having {}/{})", downloaded_size, range().size()); + if (remote_file_reader) + { + LOG_TEST( + log, "Submitting file segment for background download " + "(having {}/{})", downloaded_size, range().size()); - locked_key->addToDownloadQueue(offset(), segment_lock); /// Finish download in background. + locked_key->addToDownloadQueue(offset(), segment_lock); /// Finish download in background. + } + else + { + locked_key->shrinkFileSegmentToDownloadedSize(offset(), segment_lock); + setDetachedState(segment_lock); /// See comment below. + } } break; } diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index f94b8ae4db2..fd590315ab1 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -403,6 +403,9 @@ void CacheMetadata::downloadImpl(FileSegment & file_segment, std::optional(), "max_size"}, ColumnWithTypeAndName{std::make_shared(), "max_elements"}, ColumnWithTypeAndName{std::make_shared(), "max_file_segment_size"}, + ColumnWithTypeAndName{std::make_shared(), "boundary_alignment"}, ColumnWithTypeAndName{std::make_shared>(), "cache_on_write_operations"}, ColumnWithTypeAndName{std::make_shared>(), "cache_hits_threshold"}, ColumnWithTypeAndName{std::make_shared(), "current_size"}, ColumnWithTypeAndName{std::make_shared(), "current_elements"}, ColumnWithTypeAndName{std::make_shared(), "path"}, ColumnWithTypeAndName{std::make_shared>(), "do_not_evict_index_and_mark_files"}, + ColumnWithTypeAndName{std::make_shared>(), "delayed_cleanup_interval_ms"}, + ColumnWithTypeAndName{std::make_shared>(), "background_download_threads"}, + ColumnWithTypeAndName{std::make_shared>(), "enable_bypass_cache_with_threshold"}, }; return Block(columns); } @@ -41,15 +45,20 @@ BlockIO InterpreterDescribeCacheQuery::execute() const auto & settings = cache_data.settings; const auto & cache = cache_data.cache; - res_columns[0]->insert(settings.max_size); - res_columns[1]->insert(settings.max_elements); - res_columns[2]->insert(settings.max_file_segment_size); - res_columns[3]->insert(settings.cache_on_write_operations); - res_columns[4]->insert(settings.cache_hits_threshold); - res_columns[5]->insert(cache->getUsedCacheSize()); - res_columns[6]->insert(cache->getFileSegmentsNum()); - res_columns[7]->insert(cache->getBasePath()); - res_columns[8]->insert(settings.do_not_evict_index_and_mark_files); + size_t i = 0; + res_columns[i++]->insert(settings.max_size); + res_columns[i++]->insert(settings.max_elements); + res_columns[i++]->insert(settings.max_file_segment_size); + res_columns[i++]->insert(settings.boundary_alignment); + res_columns[i++]->insert(settings.cache_on_write_operations); + res_columns[i++]->insert(settings.cache_hits_threshold); + res_columns[i++]->insert(cache->getUsedCacheSize()); + res_columns[i++]->insert(cache->getFileSegmentsNum()); + res_columns[i++]->insert(cache->getBasePath()); + res_columns[i++]->insert(settings.do_not_evict_index_and_mark_files); + res_columns[i++]->insert(settings.delayed_cleanup_interval_ms); + res_columns[i++]->insert(settings.background_download_threads); + res_columns[i++]->insert(settings.enable_bypass_cache_with_threashold); BlockIO res; size_t num_rows = res_columns[0]->size(); diff --git a/tests/queries/0_stateless/02344_describe_cache.reference b/tests/queries/0_stateless/02344_describe_cache.reference index d8a2ffab1fa..7496f754937 100644 --- a/tests/queries/0_stateless/02344_describe_cache.reference +++ b/tests/queries/0_stateless/02344_describe_cache.reference @@ -1,2 +1,2 @@ -134217728 10000000 8388608 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ 0 -134217728 10000000 104857600 0 0 0 0 /var/lib/clickhouse/caches/s3_cache_2/ 0 +134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ 0 0 60000 2 0 +134217728 10000000 104857600 4194304 0 0 0 0 /var/lib/clickhouse/caches/s3_cache_2/ 0 0 60000 2 0 From 3b4dba3d681cb2ef75e31740e801d8813ce4586f Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Jun 2023 12:33:24 +0200 Subject: [PATCH 0375/1997] Fix tests --- .../test_mask_sensitive_info/configs/users.xml | 9 +++++++++ tests/integration/test_mask_sensitive_info/test.py | 1 + tests/integration/test_redirect_url_storage/test.py | 2 +- tests/integration/test_s3_cluster/test.py | 2 ++ .../test_storage_azure_blob_storage/configs/users.xml | 9 +++++++++ .../integration/test_storage_azure_blob_storage/test.py | 2 +- tests/integration/test_storage_dict/configs/users.xml | 9 +++++++++ .../test_storage_hudi/configs/users.d/users.xml | 9 +++++++++ .../test_storage_meilisearch/configs/users.xml | 9 +++++++++ tests/integration/test_storage_meilisearch/test.py | 2 +- tests/integration/test_storage_url/configs/users.xml | 9 +++++++++ tests/integration/test_storage_url/test.py | 1 + 12 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 tests/integration/test_mask_sensitive_info/configs/users.xml create mode 100644 tests/integration/test_storage_azure_blob_storage/configs/users.xml create mode 100644 tests/integration/test_storage_dict/configs/users.xml create mode 100644 tests/integration/test_storage_hudi/configs/users.d/users.xml create mode 100644 tests/integration/test_storage_meilisearch/configs/users.xml create mode 100644 tests/integration/test_storage_url/configs/users.xml diff --git a/tests/integration/test_mask_sensitive_info/configs/users.xml b/tests/integration/test_mask_sensitive_info/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_mask_sensitive_info/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index 2131a76b5be..004491af4ac 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -9,6 +9,7 @@ node = cluster.add_instance( main_configs=[ "configs/named_collections.xml", ], + user_configs=["configs/users.xml"], with_zookeeper=True, ) diff --git a/tests/integration/test_redirect_url_storage/test.py b/tests/integration/test_redirect_url_storage/test.py index 225a34c9109..17a9a03008e 100644 --- a/tests/integration/test_redirect_url_storage/test.py +++ b/tests/integration/test_redirect_url_storage/test.py @@ -9,7 +9,7 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( "node1", main_configs=["configs/named_collections.xml"], - user_configs=["configs/user.xml"], + user_configs=["configs/users.xml"], with_zookeeper=False, with_hdfs=True, ) diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py index 3b8fd80060f..673ca318c92 100644 --- a/tests/integration/test_s3_cluster/test.py +++ b/tests/integration/test_s3_cluster/test.py @@ -76,12 +76,14 @@ def started_cluster(): cluster.add_instance( "s0_0_1", main_configs=["configs/cluster.xml", "configs/named_collections.xml"], + user_configs=["configs/users.xml"], macros={"replica": "replica2", "shard": "shard1"}, with_zookeeper=True, ) cluster.add_instance( "s0_1_0", main_configs=["configs/cluster.xml", "configs/named_collections.xml"], + user_configs=["configs/users.xml"], macros={"replica": "replica1", "shard": "shard2"}, with_zookeeper=True, ) diff --git a/tests/integration/test_storage_azure_blob_storage/configs/users.xml b/tests/integration/test_storage_azure_blob_storage/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_azure_blob_storage/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index f9d337b6d86..54b3de8cd9b 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -25,7 +25,7 @@ def cluster(): cluster.add_instance( "node", main_configs=["configs/named_collections.xml"], - user_configs=["configs/disable_profilers.xml"], + user_configs=["configs/disable_profilers.xml", "configs/users.xml"], with_azurite=True, ) cluster.start() diff --git a/tests/integration/test_storage_dict/configs/users.xml b/tests/integration/test_storage_dict/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_dict/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_hudi/configs/users.d/users.xml b/tests/integration/test_storage_hudi/configs/users.d/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_hudi/configs/users.d/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_meilisearch/configs/users.xml b/tests/integration/test_storage_meilisearch/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_meilisearch/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_meilisearch/test.py b/tests/integration/test_storage_meilisearch/test.py index ddcd7154154..3724bb18d34 100644 --- a/tests/integration/test_storage_meilisearch/test.py +++ b/tests/integration/test_storage_meilisearch/test.py @@ -16,7 +16,7 @@ def started_cluster(request): try: cluster = ClickHouseCluster(__file__) node = cluster.add_instance( - "meili", main_configs=["configs/named_collection.xml"], with_meili=True + "meili", main_configs=["configs/named_collection.xml"], user_configs=["configs/users.xml"], with_meili=True ) cluster.start() yield cluster diff --git a/tests/integration/test_storage_url/configs/users.xml b/tests/integration/test_storage_url/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_url/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_url/test.py b/tests/integration/test_storage_url/test.py index f360ec105ec..7f359078967 100644 --- a/tests/integration/test_storage_url/test.py +++ b/tests/integration/test_storage_url/test.py @@ -6,6 +6,7 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( "node1", main_configs=["configs/conf.xml", "configs/named_collections.xml"], + user_configs=["configs/users.xml"], with_nginx=True, ) From 05811d3dd8dda58defa9e6a7360ee17fdcc5c085 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Jun 2023 12:35:13 +0200 Subject: [PATCH 0376/1997] Rename --- src/Access/Common/AccessType.h | 2 +- src/Storages/NamedCollectionsHelpers.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 709a519e712..0b66a1b9578 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -142,7 +142,7 @@ enum class AccessType M(ACCESS_MANAGEMENT, "", GROUP, ALL) \ M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ M(SHOW_NAMED_COLLECTIONS_SECRETS, "SHOW NAMED COLLECTIONS SECRETS", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ - M(USE_NAMED_COLLECTION, "NAMED COLLECTION USAGE, NAMED COLLECTION", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ + M(NAMED_COLLECTION, "NAMED COLLECTION USAGE, USE NAMED COLLECTION", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ M(NAMED_COLLECTION_ADMIN, "NAMED COLLECTION CONTROL", NAMED_COLLECTION, ALL) \ \ M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN", GLOBAL, SYSTEM) \ diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp index 29d47e131a6..f301cca92a1 100644 --- a/src/Storages/NamedCollectionsHelpers.cpp +++ b/src/Storages/NamedCollectionsHelpers.cpp @@ -76,7 +76,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( if (!collection_name.has_value()) return nullptr; - context->checkAccess(AccessType::USE_NAMED_COLLECTION, *collection_name); + context->checkAccess(AccessType::NAMED_COLLECTION, *collection_name); NamedCollectionPtr collection; if (throw_unknown_collection) @@ -121,7 +121,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( if (collection_name.empty()) return nullptr; - context->checkAccess(AccessType::USE_NAMED_COLLECTION, collection_name); + context->checkAccess(AccessType::NAMED_COLLECTION, collection_name); const auto & collection = NamedCollectionFactory::instance().get(collection_name); auto collection_copy = collection->duplicate(); From 02cf8a1bade3898196d7a760cb75ae10af3f2ecf Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Thu, 15 Jun 2023 14:09:38 +0300 Subject: [PATCH 0377/1997] Update comment StorageFile.cpp --- src/Storages/StorageFile.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 41ddd6b5780..4fbea89baa2 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -100,7 +100,14 @@ void listFilesWithRegexpMatchingImpl( std::vector & result, bool recursive = false); -/// When `{...}` has any `/`s, it must be processed in a different way +/* + * When `{...}` has any `/`s, it must be processed in a different way: + * Basically, a path with globs is processed by listFilesWithRegexpMatching. In case it detects multi-dir glob {.../..., .../...}, + * listFilesWithFoldedRegexpMatching is in charge from now on. + * It works a bit different: it still recursively goes through subdirectories, but does not match every directory to glob. + * Instead, it goes many levels down (until the approximate max_depth is reached) and compares this multi-dir path to a glob. + * StorageHDFS.cpp has the same logic. +*/ void listFilesWithFoldedRegexpMatchingImpl(const std::string & path_for_ls, const std::string & processed_suffix, const std::string & suffix_with_globs, @@ -111,14 +118,6 @@ void listFilesWithFoldedRegexpMatchingImpl(const std::string & path_for_ls, const size_t next_slash_after_glob_pos, std::vector & result) { - /* - * When `{...}` has any `/`s, it must be processed in a different way: - * Basically, a path with globs is processed by LSWithRegexpMatching. In case it detects multi-dir glob {.../..., .../...}, - * LSWithFoldedRegexpMatching is in charge from now on. - * It works a bit different: it still recursively goes through subdirectories, but does not match every directory to glob. - * Instead, it goes many levels down (until the approximate max_depth is reached) and compares this multi-dir path to a glob. - * StorageHDFS.cpp has the same logic. - */ if (!max_depth) return; From 806176d88e0b4237c16e23aed27179ed93aa17c1 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 15 Jun 2023 11:23:08 +0000 Subject: [PATCH 0378/1997] Add input_format_csv_missing_as_default setting and tests --- docs/en/interfaces/formats.md | 3 ++- .../operations/settings/settings-formats.md | 8 +++++++- docs/ru/interfaces/formats.md | 3 ++- docs/ru/operations/settings/settings.md | 8 +++++++- src/Core/Settings.h | 3 ++- src/Dictionaries/CacheDictionary.cpp | 2 +- src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + .../Formats/Impl/CSVRowInputFormat.cpp | 18 +++++++++++++++++- .../Formats/Impl/CSVRowInputFormat.h | 1 + .../RowInputFormatWithNamesAndTypes.cpp | 4 ---- tests/queries/0_stateless/00301_csv.reference | 10 ++++++++++ tests/queries/0_stateless/00301_csv.sh | 19 +++++++++++++++++-- 13 files changed, 68 insertions(+), 13 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 950692deb77..e0b0fcfabd5 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -470,7 +470,8 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_csv_detect_header) - automatically detect header with names and types in CSV format. Default value - `true`. - [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. -- [input_format_csv_ignore_extra_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_ignore_extra_columns) - ignore extra colums in CSV input. Default value - `false`. +- [input_format_csv_ignore_extra_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_ignore_extra_columns) - ignore extra columns in CSV input (if your file has more columns than expected). Default value - `false`. +- [input_format_csv_missing_as_default](/docs/en/operations/settings/settings-formats.md/#input_format_csv_missing_as_default) - treat missing fields in CSV input as default values. Default value - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index e721c9408e3..6d9a1fb5160 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -933,7 +933,13 @@ Result ``` ### input_format_csv_ignore_extra_columns {#input_format_csv_ignore_extra_columns} -Ignore extra colums in CSV input. +Ignore extra columns in CSV input (if your file has more columns than expected). + +Disabled by default. + +### input_format_csv_missing_as_default {#input_format_csv_missing_as_default} + +Treat missing fields in CSV input as default values. Disabled by default. diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 8488f4ce55a..7e3bb3f7d26 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -402,7 +402,8 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR - [input_format_csv_skip_first_lines](../operations/settings/settings.md#input_format_csv_skip_first_lines) - пропустить указанное количество строк в начале данных. Значение по умолчанию - `0`. - [input_format_csv_detect_header](../operations/settings/settings.md#input_format_csv_detect_header) - обнаружить заголовок с именами и типами в формате CSV. Значение по умолчанию - `true`. - [input_format_csv_trim_whitespaces](../operations/settings/settings.md#input_format_csv_trim_whitespaces) - удалить пробелы и символы табуляции из строк без кавычек. Значение по умолчанию - `true`. -- [input_format_csv_ignore_extra_columns](../operations/settings/settings.md/#input_format_csv_ignore_extra_columns) - игнорировать дополнительные столбцы. Значение по умолчанию - `false`. +- [input_format_csv_ignore_extra_columns](../operations/settings/settings.md/#input_format_csv_ignore_extra_columns) - игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается). Значение по умолчанию - `false`. +- [input_format_csv_missing_as_default](../operations/settings/settings.md/#input_format_csv_missing_as_default) - рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. Значение по умолчанию - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 33d9300f8e1..61cfc332585 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1729,7 +1729,13 @@ echo ' string ' | ./clickhouse local -q "select * from table FORMAT CSV" --in ## input_format_csv_ignore_extra_columns {#input_format_csv_ignore_extra_columns} -Игнорировать дополнительные столбцы. +Игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается). + +Выключено по умолчанию. + +## input_format_csv_missing_as_default {#input_format_csv_missing_as_default} + +Рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. Выключено по умолчанию. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9582419b98c..ce7c28996e8 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1000,7 +1000,8 @@ class IColumn; M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \ \ M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \ - M(Bool, input_format_csv_ignore_extra_columns, false, "Ignore extra colums in CSV input", 0) \ + M(Bool, input_format_csv_ignore_extra_columns, false, "Ignore extra columns in CSV input (if your file has more columns than expected)", 0) \ + M(Bool, input_format_csv_missing_as_default, false, "Treat missing fields in CSV input as default values", 0) \ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index c5c88a9f142..359f7c17436 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -138,7 +138,7 @@ Columns CacheDictionary::getColumns( const Columns & default_values_columns) const { /** - * Flow of getColumsImpl + * Flow of getColumnsImpl * 1. Get fetch result from storage * 2. If all keys are found in storage and not expired * 2.1. If storage returns fetched columns in order of keys then result is returned to client. diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index f29b55f7e73..102b5d7eec0 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -72,6 +72,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.skip_trailing_empty_lines = settings.input_format_csv_skip_trailing_empty_lines; format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces; format_settings.csv.ignore_extra_columns = settings.input_format_csv_ignore_extra_columns; + format_settings.csv.missing_as_default = settings.input_format_csv_missing_as_default; format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter; format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter; format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 38148bda373..2b52d88184c 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -140,6 +140,7 @@ struct FormatSettings bool skip_trailing_empty_lines = false; bool trim_whitespaces = true; bool ignore_extra_columns = false; + bool missing_as_default = false; } csv; struct HiveText diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 8aaf8fd3e2f..dcc057baef2 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -147,7 +147,18 @@ CSVFormatReader::CSVFormatReader(PeekableReadBuffer & buf_, const FormatSettings void CSVFormatReader::skipFieldDelimiter() { skipWhitespacesAndTabs(*buf); - assertChar(format_settings.csv.delimiter, *buf); + + bool res = checkChar(format_settings.csv.delimiter, *buf); + if (!res && !format_settings.csv.missing_as_default) + { + char err[2] = {format_settings.csv.delimiter, '\0'}; + throwAtAssertionFailed(err, *buf); + } + + if (!res && format_settings.csv.missing_as_default) + { + current_row_has_missing_fields = true; + } } template @@ -187,6 +198,7 @@ void CSVFormatReader::skipRowEndDelimiter() return; skipEndOfLine(*buf); + current_row_has_missing_fields = false; } void CSVFormatReader::skipHeaderRow() @@ -302,6 +314,10 @@ bool CSVFormatReader::readField( /// works for tuples as well. column.insertDefault(); } + else if (current_row_has_missing_fields) + { + column.insertDefault(); + } else if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) { /// If value is null but type is not nullable then use default value instead. diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index 0c8099a216c..3958c66bbc6 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -89,6 +89,7 @@ public: protected: PeekableReadBuffer * buf; + bool current_row_has_missing_fields = false; }; class CSVSchemaReader : public FormatWithNamesAndTypesSchemaReader diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp index 24bf1d0d595..eaedbbb4a1e 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp @@ -212,12 +212,8 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE format_reader->skipRowStartDelimiter(); ext.read_columns.resize(data_types.size()); - //std::cout << "col size " << column_mapping->column_indexes_for_input_fields.size() << std::endl; for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column) { - // std::cout << " file_column " << file_column << column_mapping->names_of_columns[file_column] << std::endl; - - const auto & column_index = column_mapping->column_indexes_for_input_fields[file_column]; const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size(); if (column_index) diff --git a/tests/queries/0_stateless/00301_csv.reference b/tests/queries/0_stateless/00301_csv.reference index 3dbe3116bea..fa85fd924e1 100644 --- a/tests/queries/0_stateless/00301_csv.reference +++ b/tests/queries/0_stateless/00301_csv.reference @@ -1,19 +1,29 @@ +=== Test input_format_csv_empty_as_default Hello, world 123 2016-01-01 Hello, "world" 456 2016-01-02 Hello "world" 789 2016-01-03 Hello\n world 100 2016-01-04 default 1 2019-06-19 default-eof 1 2019-06-19 +=== Test datetime 2016-01-01 01:02:03 1 2016-01-02 01:02:03 2 2017-08-15 13:15:01 3 1970-01-02 05:46:39 4 +=== Test nullable datetime 2016-01-01 01:02:03 NUL 2016-01-02 01:02:03 Nhello \N \N +=== Test input_format_csv_ignore_extra_columns Hello 1 String1 Hello 2 String2 Hello 3 String3 Hello 4 String4 Hello 5 String5 Hello 6 String6 +=== Test input_format_csv_missing_as_default +Hello 0 33 \N 55 Default +Hello 0 33 \N 55 Default +Hello 1 2 \N 55 Default +Hello 1 2 3 4 String +Hello 1 2 3 4 String diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index fafe75f6f63..887a75b0ded 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -4,6 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +echo === Test input_format_csv_empty_as_default $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS csv"; $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 1, d Date DEFAULT '2019-06-19') ENGINE = Memory"; @@ -18,6 +19,7 @@ Hello "world", 789 ,2016-01-03 $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY d, s"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; +echo === Test datetime $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t DateTime('Asia/Istanbul'), s String) ENGINE = Memory"; echo '"2016-01-01 01:02:03","1" @@ -28,7 +30,7 @@ echo '"2016-01-01 01:02:03","1" $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; - +echo === Test nullable datetime $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t Nullable(DateTime('Asia/Istanbul')), s Nullable(String)) ENGINE = Memory"; echo 'NULL, NULL @@ -39,6 +41,7 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s NULLS LAST"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; +echo === Test input_format_csv_ignore_extra_columns $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 3, d String DEFAULT 'String4') ENGINE = Memory"; echo 'Hello, 1, String1 @@ -49,4 +52,16 @@ Hello, 5, String5, 2016-01-15, 2016-01-16 Hello, 6, String6, "line with a break"' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_empty_as_default=1 --input_format_csv_ignore_extra_columns=1 --query="INSERT INTO csv FORMAT CSV"; $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s, n"; -$CLICKHOUSE_CLIENT --query="DROP TABLE csv"; \ No newline at end of file +$CLICKHOUSE_CLIENT --query="DROP TABLE csv"; + + +echo === Test input_format_csv_missing_as_default +$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (f1 String, f2 UInt64, f3 UInt64 Default 33, f4 Nullable(UInt64), f5 Nullable(UInt64) Default 55, f6 String DEFAULT 'Default') ENGINE = Memory"; + +echo 'Hello +Hello, +Hello, 1, 2 +Hello, 1, 2, 3, 4, String +Hello, 1, 2, 3, 4, String,'| $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_empty_as_default=1 --input_format_csv_missing_as_default=1 --query="INSERT INTO csv FORMAT CSV"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY f1, f2, f3, f4 NULLS FIRST, f5, f6"; +$CLICKHOUSE_CLIENT --query="DROP TABLE csv"; From b8a4a784debcfb4e739b2368bc5611885ffa2a51 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Jun 2023 14:22:13 +0200 Subject: [PATCH 0379/1997] Fix unit test, add check --- src/Interpreters/Cache/Metadata.cpp | 8 ++++++++ src/Interpreters/tests/gtest_lru_file_cache.cpp | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index fd590315ab1..987361c0537 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -412,6 +412,14 @@ void CacheMetadata::downloadImpl(FileSegment & file_segment, std::optionalinternalBuffer().empty()) diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp index 8c8e715ce92..d8baf1c0b45 100644 --- a/src/Interpreters/tests/gtest_lru_file_cache.cpp +++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp @@ -533,8 +533,8 @@ TEST_F(FileCacheTest, get) cv.notify_one(); file_segment2.wait(file_segment2.range().left); - ASSERT_TRUE(file_segment2.state() == DB::FileSegment::State::PARTIALLY_DOWNLOADED); - ASSERT_TRUE(file_segment2.getOrSetDownloader() == DB::FileSegment::getCallerId()); + ASSERT_EQ(file_segment2.state(), DB::FileSegment::State::EMPTY); + ASSERT_EQ(file_segment2.getOrSetDownloader(), DB::FileSegment::getCallerId()); download(file_segment2); }); From 5cceae1e1fa97126a7b1223927354d9b535e184b Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Jun 2023 14:25:10 +0200 Subject: [PATCH 0380/1997] Fix --- tests/integration/test_storage_meilisearch/test.py | 5 ++++- tests/queries/0_stateless/01271_show_privileges.reference | 2 +- .../0_stateless/02117_show_create_table_system.reference | 6 +++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_storage_meilisearch/test.py b/tests/integration/test_storage_meilisearch/test.py index 3724bb18d34..b6acee18981 100644 --- a/tests/integration/test_storage_meilisearch/test.py +++ b/tests/integration/test_storage_meilisearch/test.py @@ -16,7 +16,10 @@ def started_cluster(request): try: cluster = ClickHouseCluster(__file__) node = cluster.add_instance( - "meili", main_configs=["configs/named_collection.xml"], user_configs=["configs/users.xml"], with_meili=True + "meili", + main_configs=["configs/named_collection.xml"], + user_configs=["configs/users.xml"], + with_meili=True, ) cluster.start() yield cluster diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 925e0921759..b1ce5ab71d5 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -94,7 +94,7 @@ SHOW ACCESS [] \N ACCESS MANAGEMENT ACCESS MANAGEMENT [] \N ALL SHOW NAMED COLLECTIONS ['SHOW NAMED COLLECTIONS'] NAMED_COLLECTION NAMED COLLECTION ADMIN SHOW NAMED COLLECTIONS SECRETS ['SHOW NAMED COLLECTIONS SECRETS'] NAMED_COLLECTION NAMED COLLECTION ADMIN -USE NAMED COLLECTION ['NAMED COLLECTION USAGE','NAMED COLLECTION'] NAMED_COLLECTION NAMED COLLECTION ADMIN +NAMED COLLECTION ['NAMED COLLECTION USAGE','USE NAMED COLLECTION'] NAMED_COLLECTION NAMED COLLECTION ADMIN NAMED COLLECTION ADMIN ['NAMED COLLECTION CONTROL'] NAMED_COLLECTION ALL SYSTEM SHUTDOWN ['SYSTEM KILL','SHUTDOWN'] GLOBAL SYSTEM SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYSTEM DROP CACHE diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 143fb24a637..72c1027e7b1 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -297,7 +297,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -581,10 +581,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' From 0eeee11dc46d462412ad671a7d59006fba59c403 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 15 Jun 2023 12:36:18 +0000 Subject: [PATCH 0381/1997] Style fix, add comment --- .../Formats/Impl/CSVRowInputFormat.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index dcc057baef2..7cd812bc5b0 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -149,15 +149,17 @@ void CSVFormatReader::skipFieldDelimiter() skipWhitespacesAndTabs(*buf); bool res = checkChar(format_settings.csv.delimiter, *buf); - if (!res && !format_settings.csv.missing_as_default) + if (!res) { - char err[2] = {format_settings.csv.delimiter, '\0'}; - throwAtAssertionFailed(err, *buf); - } - - if (!res && format_settings.csv.missing_as_default) - { - current_row_has_missing_fields = true; + if (!format_settings.csv.missing_as_default) + { + char err[2] = {format_settings.csv.delimiter, '\0'}; + throwAtAssertionFailed(err, *buf); + } + else + { + current_row_has_missing_fields = true; + } } } @@ -332,6 +334,7 @@ bool CSVFormatReader::readField( if (is_last_file_column && format_settings.csv.ignore_extra_columns) { + // Skip all fields to next line. while (checkChar(format_settings.csv.delimiter, *buf)) { skipField(); From 9a25958be831d6068597703cef97238ab76ee325 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 15 Jun 2023 13:49:49 +0000 Subject: [PATCH 0382/1997] Add HTTP header filtering --- programs/server/Server.cpp | 2 + programs/server/config.xml | 8 +++ src/Common/HTTPHeaderFilter.cpp | 56 +++++++++++++++++++ src/Common/HTTPHeaderFilter.h | 27 +++++++++ src/Interpreters/Context.cpp | 16 +++++- src/Interpreters/Context.h | 5 ++ src/Storages/StorageS3.cpp | 1 + src/Storages/StorageS3Cluster.cpp | 2 + src/Storages/StorageURL.cpp | 1 + src/Storages/StorageURLCluster.cpp | 1 + tests/config/config.d/forbidden_headers.xml | 6 ++ tests/config/install.sh | 1 + .../02752_forbidden_headers.reference | 0 .../0_stateless/02752_forbidden_headers.sql | 18 ++++++ 14 files changed, 142 insertions(+), 2 deletions(-) create mode 100644 src/Common/HTTPHeaderFilter.cpp create mode 100644 src/Common/HTTPHeaderFilter.h create mode 100644 tests/config/config.d/forbidden_headers.xml create mode 100644 tests/queries/0_stateless/02752_forbidden_headers.reference create mode 100644 tests/queries/0_stateless/02752_forbidden_headers.sql diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d0fc8aca5e8..6490eb6e3f5 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -887,6 +887,7 @@ try #endif global_context->setRemoteHostFilter(config()); + global_context->setHTTPHeaderFilter(config()); std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH)); fs::path path = path_str; @@ -1184,6 +1185,7 @@ try } global_context->setRemoteHostFilter(*config); + global_context->setHTTPHeaderFilter(*config); global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop); global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop); diff --git a/programs/server/config.xml b/programs/server/config.xml index d18b4cb2ac9..f5013104630 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1006,6 +1006,14 @@ --> + + + password_sha256_hex --- docs/en/operations/named-collections.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md index a521a369721..02f52b6f8bf 100644 --- a/docs/en/operations/named-collections.md +++ b/docs/en/operations/named-collections.md @@ -50,7 +50,7 @@ To manage named collections with DDL a user must have the `named_control_collect ``` :::tip -In the above example the `passowrd_sha256_hex` value is the hexadecimal representation of the SHA256 hash of the password. This configuration for the user `default` has the attribute `replace=true` as in the default configuration has a plain text `password` set, and it is not possible to have both plain text and sha256 hex passwords set for a user. +In the above example the `password_sha256_hex` value is the hexadecimal representation of the SHA256 hash of the password. This configuration for the user `default` has the attribute `replace=true` as in the default configuration has a plain text `password` set, and it is not possible to have both plain text and sha256 hex passwords set for a user. ::: ## Storing named collections in configuration files From 5c38231a146db290cc5a989418de4cf678293652 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 21 Jun 2023 15:18:25 +0200 Subject: [PATCH 0597/1997] One more profile event for fs cache (#51223) --- src/Common/ProfileEvents.cpp | 1 + src/Interpreters/Cache/FileCache.cpp | 56 ++++++++++++++++------------ 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index ffd7f8ca696..0838e0366df 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -391,6 +391,7 @@ The server successfully detected this situation and will download merged part fr M(FilesystemCacheLockMetadataMicroseconds, "Lock filesystem cache metadata time") \ M(FilesystemCacheLockCacheMicroseconds, "Lock filesystem cache time") \ M(FilesystemCacheReserveMicroseconds, "Filesystem cache space reservation time") \ + M(FilesystemCacheEvictMicroseconds, "Filesystem cache eviction time") \ M(FilesystemCacheGetOrSetMicroseconds, "Filesystem cache getOrSet() time") \ M(FilesystemCacheGetMicroseconds, "Filesystem cache get() time") \ M(FileSegmentWaitMicroseconds, "Wait on DOWNLOADING state") \ diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index ec9220af955..3d1b85dfbde 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -24,6 +24,7 @@ namespace ProfileEvents extern const Event FilesystemCacheEvictedFileSegments; extern const Event FilesystemCacheLockCacheMicroseconds; extern const Event FilesystemCacheReserveMicroseconds; + extern const Event FilesystemCacheEvictMicroseconds; extern const Event FilesystemCacheGetOrSetMicroseconds; extern const Event FilesystemCacheGetMicroseconds; } @@ -612,10 +613,6 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) file_segment.key(), file_segment.offset()); } - /// A file_segment_metadata acquires a LRUQueue iterator on first successful space reservation attempt. - auto queue_iterator = file_segment.getQueueIterator(); - chassert(!queue_iterator || file_segment.getReservedSize() > 0); - struct EvictionCandidates { explicit EvictionCandidates(KeyMetadataPtr key_metadata_) : key_metadata(key_metadata_) {} @@ -732,37 +729,48 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) if (!file_segment.getKeyMetadata()->createBaseDirectory()) return false; - for (auto & [current_key, deletion_info] : to_delete) + if (!to_delete.empty()) { - auto locked_key = deletion_info.key_metadata->tryLock(); - if (!locked_key) - continue; /// key could become invalid after we released the key lock above, just skip it. + LOG_DEBUG( + log, "Will evict {} file segments (while reserving {} bytes for {}:{})", + to_delete.size(), size, file_segment.key(), file_segment.offset()); - /// delete from vector in reverse order just for efficiency - auto & candidates = deletion_info.candidates; - while (!candidates.empty()) + ProfileEventTimeIncrement evict_watch(ProfileEvents::FilesystemCacheEvictMicroseconds); + + for (auto & [current_key, deletion_info] : to_delete) { - auto & candidate = candidates.back(); - chassert(candidate->releasable()); + auto locked_key = deletion_info.key_metadata->tryLock(); + if (!locked_key) + continue; /// key could become invalid after we released the key lock above, just skip it. - const auto * segment = candidate->file_segment.get(); + /// delete from vector in reverse order just for efficiency + auto & candidates = deletion_info.candidates; + while (!candidates.empty()) + { + auto & candidate = candidates.back(); + chassert(candidate->releasable()); - ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedFileSegments); - ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedBytes, segment->range().size()); + const auto * segment = candidate->file_segment.get(); - locked_key->removeFileSegment(segment->offset(), segment->lock()); - segment->getQueueIterator()->remove(cache_lock); + ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedFileSegments); + ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedBytes, segment->range().size()); - if (query_context) - query_context->remove(current_key, segment->offset(), cache_lock); + locked_key->removeFileSegment(segment->offset(), segment->lock()); + segment->getQueueIterator()->remove(cache_lock); - candidates.pop_back(); + if (query_context) + query_context->remove(current_key, segment->offset(), cache_lock); + + candidates.pop_back(); + } } } - /// queue_iteratir is std::nullopt here if no space has been reserved yet, a file_segment_metadata - /// acquires queue iterator on first successful space reservation attempt. - /// If queue iterator already exists, we need to update the size after each space reservation. + /// A file_segment_metadata acquires a LRUQueue iterator on first successful space reservation attempt, + /// e.g. queue_iteratir is std::nullopt here if no space has been reserved yet. + auto queue_iterator = file_segment.getQueueIterator(); + chassert(!queue_iterator || file_segment.getReservedSize() > 0); + if (queue_iterator) { queue_iterator->updateSize(size); From 3157f29a462bff69affc9c880ee7c98f16d9826e Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 21 Jun 2023 13:25:10 +0000 Subject: [PATCH 0598/1997] introduce settings enum field with auto-generated values list --- src/Core/SettingsEnums.cpp | 61 +++++++++----------------------------- src/Core/SettingsFields.h | 41 ++++++++++++++++++------- 2 files changed, 44 insertions(+), 58 deletions(-) diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index a291a23c140..612f8689480 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -1,8 +1,10 @@ #include +#include namespace DB { + namespace ErrorCodes { extern const int UNKNOWN_LOAD_BALANCING; @@ -15,7 +17,6 @@ namespace ErrorCodes extern const int UNKNOWN_UNION; } - IMPLEMENT_SETTING_ENUM(LoadBalancing, ErrorCodes::UNKNOWN_LOAD_BALANCING, {{"random", LoadBalancing::RANDOM}, {"nearest_hostname", LoadBalancing::NEAREST_HOSTNAME}, @@ -54,7 +55,7 @@ IMPLEMENT_SETTING_ENUM(OverflowMode, ErrorCodes::UNKNOWN_OVERFLOW_MODE, {"break", OverflowMode::BREAK}}) -IMPLEMENT_SETTING_ENUM_WITH_RENAME(OverflowModeGroupBy, ErrorCodes::UNKNOWN_OVERFLOW_MODE, +IMPLEMENT_SETTING_ENUM(OverflowModeGroupBy, ErrorCodes::UNKNOWN_OVERFLOW_MODE, {{"throw", OverflowMode::THROW}, {"break", OverflowMode::BREAK}, {"any", OverflowMode::ANY}}) @@ -67,51 +68,26 @@ IMPLEMENT_SETTING_ENUM(DistributedProductMode, ErrorCodes::UNKNOWN_DISTRIBUTED_P {"allow", DistributedProductMode::ALLOW}}) -IMPLEMENT_SETTING_ENUM_WITH_RENAME(DateTimeInputFormat, ErrorCodes::BAD_ARGUMENTS, +IMPLEMENT_SETTING_ENUM(DateTimeInputFormat, ErrorCodes::BAD_ARGUMENTS, {{"basic", FormatSettings::DateTimeInputFormat::Basic}, {"best_effort", FormatSettings::DateTimeInputFormat::BestEffort}, {"best_effort_us", FormatSettings::DateTimeInputFormat::BestEffortUS}}) -IMPLEMENT_SETTING_ENUM_WITH_RENAME(DateTimeOutputFormat, ErrorCodes::BAD_ARGUMENTS, +IMPLEMENT_SETTING_ENUM(DateTimeOutputFormat, ErrorCodes::BAD_ARGUMENTS, {{"simple", FormatSettings::DateTimeOutputFormat::Simple}, {"iso", FormatSettings::DateTimeOutputFormat::ISO}, {"unix_timestamp", FormatSettings::DateTimeOutputFormat::UnixTimestamp}}) -IMPLEMENT_SETTING_ENUM(LogsLevel, ErrorCodes::BAD_ARGUMENTS, - {{"none", LogsLevel::none}, - {"fatal", LogsLevel::fatal}, - {"error", LogsLevel::error}, - {"warning", LogsLevel::warning}, - {"information", LogsLevel::information}, - {"debug", LogsLevel::debug}, - {"trace", LogsLevel::trace}, - {"test", LogsLevel::test}}) +IMPLEMENT_SETTING_AUTO_ENUM(LogsLevel, ErrorCodes::BAD_ARGUMENTS) -IMPLEMENT_SETTING_ENUM_WITH_RENAME(LogQueriesType, ErrorCodes::BAD_ARGUMENTS, - {{"QUERY_START", QUERY_START}, - {"QUERY_FINISH", QUERY_FINISH}, - {"EXCEPTION_BEFORE_START", EXCEPTION_BEFORE_START}, - {"EXCEPTION_WHILE_PROCESSING", EXCEPTION_WHILE_PROCESSING}}) +IMPLEMENT_SETTING_AUTO_ENUM(LogQueriesType, ErrorCodes::BAD_ARGUMENTS) +IMPLEMENT_SETTING_AUTO_ENUM(DefaultDatabaseEngine, ErrorCodes::BAD_ARGUMENTS) -IMPLEMENT_SETTING_ENUM_WITH_RENAME(DefaultDatabaseEngine, ErrorCodes::BAD_ARGUMENTS, - {{"Ordinary", DefaultDatabaseEngine::Ordinary}, - {"Atomic", DefaultDatabaseEngine::Atomic}}) +IMPLEMENT_SETTING_AUTO_ENUM(DefaultTableEngine, ErrorCodes::BAD_ARGUMENTS) -IMPLEMENT_SETTING_ENUM_WITH_RENAME(DefaultTableEngine, ErrorCodes::BAD_ARGUMENTS, - {{"None", DefaultTableEngine::None}, - {"Log", DefaultTableEngine::Log}, - {"StripeLog", DefaultTableEngine::StripeLog}, - {"MergeTree", DefaultTableEngine::MergeTree}, - {"ReplacingMergeTree", DefaultTableEngine::ReplacingMergeTree}, - {"ReplicatedMergeTree", DefaultTableEngine::ReplicatedMergeTree}, - {"ReplicatedReplacingMergeTree", DefaultTableEngine::ReplicatedReplacingMergeTree}, - {"Memory", DefaultTableEngine::Memory}}) - -IMPLEMENT_SETTING_ENUM(CleanDeletedRows, ErrorCodes::BAD_ARGUMENTS, - {{"Never", CleanDeletedRows::Never}, - {"Always", CleanDeletedRows::Always}}) +IMPLEMENT_SETTING_AUTO_ENUM(CleanDeletedRows, ErrorCodes::BAD_ARGUMENTS) IMPLEMENT_SETTING_MULTI_ENUM(MySQLDataTypesSupport, ErrorCodes::UNKNOWN_MYSQL_DATATYPES_SUPPORT_LEVEL, {{"decimal", MySQLDataTypesSupport::DECIMAL}, @@ -149,14 +125,7 @@ IMPLEMENT_SETTING_ENUM(CapnProtoEnumComparingMode, ErrorCodes::BAD_ARGUMENTS, {"by_values", FormatSettings::CapnProtoEnumComparingMode::BY_VALUES}, {"by_names_case_insensitive", FormatSettings::CapnProtoEnumComparingMode::BY_NAMES_CASE_INSENSITIVE}}) -IMPLEMENT_SETTING_ENUM(EscapingRule, ErrorCodes::BAD_ARGUMENTS, - {{"None", FormatSettings::EscapingRule::None}, - {"Escaped", FormatSettings::EscapingRule::Escaped}, - {"Quoted", FormatSettings::EscapingRule::Quoted}, - {"CSV", FormatSettings::EscapingRule::CSV}, - {"JSON", FormatSettings::EscapingRule::JSON}, - {"XML", FormatSettings::EscapingRule::XML}, - {"Raw", FormatSettings::EscapingRule::Raw}}) +IMPLEMENT_SETTING_AUTO_ENUM(EscapingRule, ErrorCodes::BAD_ARGUMENTS) IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation, ErrorCodes::BAD_ARGUMENTS, {{"bin", FormatSettings::MsgPackUUIDRepresentation::BIN}, @@ -166,17 +135,15 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation, ErrorCodes::BAD_ARGUMENTS, IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS, {{"clickhouse", Dialect::clickhouse}, {"kusto", Dialect::kusto}}) + // FIXME: do not add 'kusto_auto' to the list. Maybe remove it from code completely? IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGUMENTS, {{"default", ParallelReplicasCustomKeyFilterType::DEFAULT}, {"range", ParallelReplicasCustomKeyFilterType::RANGE}}) -IMPLEMENT_SETTING_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS, - {{"mmap", LocalFSReadMethod::mmap}, - {"pread", LocalFSReadMethod::pread}, - {"read", LocalFSReadMethod::read}}) +IMPLEMENT_SETTING_AUTO_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS) -IMPLEMENT_SETTING_ENUM_WITH_RENAME(ParquetVersion, ErrorCodes::BAD_ARGUMENTS, +IMPLEMENT_SETTING_ENUM(ParquetVersion, ErrorCodes::BAD_ARGUMENTS, {{"1.0", FormatSettings::ParquetVersion::V1_0}, {"2.4", FormatSettings::ParquetVersion::V2_4}, {"2.6", FormatSettings::ParquetVersion::V2_6}, diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index 2cd55e6b4c5..d02932c8000 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -10,7 +10,6 @@ #include #include - namespace DB { namespace ErrorCodes @@ -371,19 +370,25 @@ void SettingFieldEnum::readBinary(ReadBuffer & in) *this = Traits::fromString(SettingFieldEnumHelpers::readBinary(in)); } +template +constexpr auto getEnumValues() +{ + std::array, magic_enum::enum_count()> enum_values{}; + for (auto value : magic_enum::enum_values()) + enum_values[magic_enum::enum_integer(value)] = std::pair{magic_enum::enum_name(value), value}; + return enum_values; +} + /// NOLINTNEXTLINE #define DECLARE_SETTING_ENUM(ENUM_TYPE) \ DECLARE_SETTING_ENUM_WITH_RENAME(ENUM_TYPE, ENUM_TYPE) -/// NOLINTNEXTLINE -#define IMPLEMENT_SETTING_ENUM(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \ - IMPLEMENT_SETTING_ENUM_WITH_RENAME(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, __VA_ARGS__) - /// NOLINTNEXTLINE #define DECLARE_SETTING_ENUM_WITH_RENAME(NEW_NAME, ENUM_TYPE) \ struct SettingField##NEW_NAME##Traits \ { \ using EnumType = ENUM_TYPE; \ + using EnumValuePairs = std::pair[]; \ static const String & toString(EnumType value); \ static EnumType fromString(std::string_view str); \ }; \ @@ -391,13 +396,20 @@ void SettingFieldEnum::readBinary(ReadBuffer & in) using SettingField##NEW_NAME = SettingFieldEnum; /// NOLINTNEXTLINE -#define IMPLEMENT_SETTING_ENUM_WITH_RENAME(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \ +#define IMPLEMENT_SETTING_ENUM(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \ + IMPLEMENT_SETTING_ENUM_IMPL(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, EnumValuePairs, __VA_ARGS__) + +/// NOLINTNEXTLINE +#define IMPLEMENT_SETTING_AUTO_ENUM(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME) \ + IMPLEMENT_SETTING_ENUM_IMPL(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, , getEnumValues()) + +/// NOLINTNEXTLINE +#define IMPLEMENT_SETTING_ENUM_IMPL(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, PAIRS_TYPE, ...) \ const String & SettingField##NEW_NAME##Traits::toString(typename SettingField##NEW_NAME::EnumType value) \ { \ static const std::unordered_map map = [] { \ std::unordered_map res; \ - constexpr std::pair pairs[] = __VA_ARGS__; \ - for (const auto & [name, val] : pairs) \ + for (const auto & [name, val] : PAIRS_TYPE __VA_ARGS__) \ res.emplace(val, name); \ return res; \ }(); \ @@ -413,8 +425,7 @@ void SettingFieldEnum::readBinary(ReadBuffer & in) { \ static const std::unordered_map map = [] { \ std::unordered_map res; \ - constexpr std::pair pairs[] = __VA_ARGS__; \ - for (const auto & [name, val] : pairs) \ + for (const auto & [name, val] : PAIRS_TYPE __VA_ARGS__) \ res.emplace(name, val); \ return res; \ }(); \ @@ -527,6 +538,7 @@ void SettingFieldMultiEnum::readBinary(ReadBuffer & in) struct SettingField##NEW_NAME##Traits \ { \ using EnumType = ENUM_TYPE; \ + using EnumValuePairs = std::pair[]; \ static size_t getEnumSize(); \ static const String & toString(EnumType value); \ static EnumType fromString(std::string_view str); \ @@ -540,11 +552,18 @@ void SettingFieldMultiEnum::readBinary(ReadBuffer & in) /// NOLINTNEXTLINE #define IMPLEMENT_SETTING_MULTI_ENUM_WITH_RENAME(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \ - IMPLEMENT_SETTING_ENUM_WITH_RENAME(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, __VA_ARGS__)\ + IMPLEMENT_SETTING_ENUM(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, __VA_ARGS__)\ size_t SettingField##NEW_NAME##Traits::getEnumSize() {\ return std::initializer_list> __VA_ARGS__ .size();\ } +/// NOLINTNEXTLINE +#define IMPLEMENT_SETTING_MULTI_AUTO_ENUM(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME) \ + IMPLEMENT_SETTING_AUTO_ENUM(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME)\ + size_t SettingField##NEW_NAME##Traits::getEnumSize() {\ + return getEnumValues().size();\ + } + /// Can keep a value of any type. Used for user-defined settings. struct SettingFieldCustom { From 8bd53cad7849816a6bd6591eddebd9ba19fa7272 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 21 Jun 2023 14:01:05 +0000 Subject: [PATCH 0599/1997] Add quotes to test --- tests/queries/0_stateless/00301_csv.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index c598be44261..dc354433af9 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -44,12 +44,12 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; echo === Test input_format_csv_ignore_extra_columns $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 3, d String DEFAULT 'String4') ENGINE = Memory"; -echo 'Hello, 1, String1 -Hello, 2, String2, -Hello, 3, String3, 2016-01-13 -Hello, 4, , 2016-01-14 -Hello, 5, String5, 2016-01-15, 2016-01-16 -Hello, 6, String6, "line with a +echo '"Hello", 1, "String1" +"Hello", 2, "String2", +"Hello", 3, "String3", "2016-01-13" +"Hello", 4, , "2016-01-14" +"Hello", 5, "String5", "2016-01-15", "2016-01-16" +"Hello", 6, "String6", "line with a break"' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_empty_as_default=1 --input_format_csv_ignore_extra_columns=1 --query="INSERT INTO csv FORMAT CSV"; $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s, n"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; From 6ebeb93a1a9a0a8e25d98867b0e4753d84f9a569 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Wed, 21 Jun 2023 16:05:48 +0200 Subject: [PATCH 0600/1997] Drop session if we fail to get API version --- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 7f23ac00efe..14b31c6a411 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -1107,17 +1107,20 @@ void ZooKeeper::initApiVersion() get(keeper_api_version_path, std::move(callback), {}); if (future.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) { - LOG_TRACE(log, "Failed to get API version: timeout"); - return; + throw Exception(Error::ZOPERATIONTIMEOUT, "Failed to get API version: timeout"); } auto response = future.get(); - if (response.error != Coordination::Error::ZOK) + if (response.error == Coordination::Error::ZNONODE) { - LOG_TRACE(log, "Failed to get API version"); + LOG_TRACE(log, "API version not found, assuming {}", keeper_api_version); return; } + else if (response.error != Coordination::Error::ZOK) + { + throw Exception(response.error, "Failed to get API version"); + } uint8_t keeper_version{0}; DB::ReadBufferFromOwnString buf(response.data); From 76e3d1e7b62340f88475045630061bed12ca08e3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 21 Jun 2023 17:10:20 +0300 Subject: [PATCH 0601/1997] Revert "Fix a crash in s3 and s3Cluster functions" --- src/TableFunctions/TableFunctionS3.cpp | 6 +++--- tests/queries/0_stateless/02772_s3_crash.reference | 0 tests/queries/0_stateless/02772_s3_crash.sql | 5 ----- 3 files changed, 3 insertions(+), 8 deletions(-) delete mode 100644 tests/queries/0_stateless/02772_s3_crash.reference delete mode 100644 tests/queries/0_stateless/02772_s3_crash.sql diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 1d84fa8a265..c8cc0cddd30 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -41,13 +41,13 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context } else { + if (args.empty() || args.size() > 6) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature()); + auto * header_it = StorageURL::collectHeaders(args, configuration.headers_from_ast, context); if (header_it != args.end()) args.erase(header_it); - if (args.empty() || args.size() > 6) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature()); - for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); diff --git a/tests/queries/0_stateless/02772_s3_crash.reference b/tests/queries/0_stateless/02772_s3_crash.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/02772_s3_crash.sql b/tests/queries/0_stateless/02772_s3_crash.sql deleted file mode 100644 index 5cad83def63..00000000000 --- a/tests/queries/0_stateless/02772_s3_crash.sql +++ /dev/null @@ -1,5 +0,0 @@ --- Tags: no-fasttest --- Tag no-fasttest: Depends on AWS - -SELECT * FROM s3(headers('random_header' = 'value')); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', headers('random_header' = 'value')); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } From b723f9e7bf9746aea4bafee642817b83e49d67d8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 21 Jun 2023 16:45:52 +0200 Subject: [PATCH 0602/1997] Fix sometimes not correct current_elements_num --- src/Interpreters/Cache/LRUFileCachePriority.cpp | 16 ++++++++++++---- src/Interpreters/Cache/LRUFileCachePriority.h | 3 ++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index 3c36962a0e5..ebdf402908a 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -49,6 +49,7 @@ IFileCachePriority::Iterator LRUFileCachePriority::add( auto iter = queue.insert(queue.end(), Entry(key, offset, size, key_metadata)); current_size += size; + ++current_elements_num; CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size); CurrentMetrics::add(CurrentMetrics::FilesystemCacheElements); @@ -63,12 +64,13 @@ IFileCachePriority::Iterator LRUFileCachePriority::add( void LRUFileCachePriority::removeAll(const CacheGuard::Lock &) { CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, current_size); - CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements, queue.size()); + CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements, current_elements_num); LOG_TEST(log, "Removed all entries from LRU queue"); queue.clear(); current_size = 0; + current_elements_num = 0; } void LRUFileCachePriority::pop(const CacheGuard::Lock &) @@ -78,10 +80,15 @@ void LRUFileCachePriority::pop(const CacheGuard::Lock &) LRUFileCachePriority::LRUQueueIterator LRUFileCachePriority::remove(LRUQueueIterator it) { - current_size -= it->size; + /// If size is 0, entry is annuled, current_elements_num was already updated. + if (it->size) + { + current_size -= it->size; + --current_elements_num; - CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, it->size); - CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements); + CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, it->size); + CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements); + } LOG_TEST( log, "Removed entry from LRU queue, key: {}, offset: {}, size: {}", @@ -157,6 +164,7 @@ void LRUFileCachePriority::LRUFileCacheIterator::annul() { updateSize(-queue_iter->size); chassert(queue_iter->size == 0); + --cache_priority->current_elements_num; } void LRUFileCachePriority::LRUFileCacheIterator::updateSize(int64_t size) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index a80e4647f2d..208d05f9ed8 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -22,7 +22,7 @@ public: size_t getSize(const CacheGuard::Lock &) const override { return current_size; } - size_t getElementsCount(const CacheGuard::Lock &) const override { return queue.size(); } + size_t getElementsCount(const CacheGuard::Lock &) const override { return current_elements_num; } Iterator add(KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) override; @@ -37,6 +37,7 @@ private: Poco::Logger * log = &Poco::Logger::get("LRUFileCachePriority"); std::atomic current_size = 0; + std::atomic current_elements_num = 0; LRUQueueIterator remove(LRUQueueIterator it); }; From 3224e49757a4d6258f1601915b8ecdffebb79a72 Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Wed, 21 Jun 2023 22:46:41 +0800 Subject: [PATCH 0603/1997] fix --- src/DataTypes/Native.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/DataTypes/Native.cpp b/src/DataTypes/Native.cpp index 9af50a180ea..fd3716c2291 100644 --- a/src/DataTypes/Native.cpp +++ b/src/DataTypes/Native.cpp @@ -125,9 +125,7 @@ llvm::Value * nativeCast(llvm::IRBuilderBase & b, const DataTypePtr & from_type, { auto * to_native_type = toNativeType(b, to_type); auto * inner = nativeCast(b, from_type, value, removeNullable(to_type)); - auto * res_ptr = b.CreateAlloca(to_native_type); - auto * res_value = b.CreateLoad(to_native_type, res_ptr); - return b.CreateInsertValue(res_value, inner, {0}); + return b.CreateInsertValue(llvm::Constant::getNullValue(to_native_type), inner, {0}); } else { From 89f19fc912b1dc20b5c19f2601a4bd0de6caa878 Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 21 Jun 2023 14:47:47 +0000 Subject: [PATCH 0604/1997] fix values indexing --- src/Core/SettingsFields.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index d02932c8000..686638d497f 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -374,8 +374,9 @@ template constexpr auto getEnumValues() { std::array, magic_enum::enum_count()> enum_values{}; + size_t index = 0; for (auto value : magic_enum::enum_values()) - enum_values[magic_enum::enum_integer(value)] = std::pair{magic_enum::enum_name(value), value}; + enum_values[index++] = std::pair{magic_enum::enum_name(value), value}; return enum_values; } From c5ade1c8fcf6ad4bd0ecfd5b2fc76eef312ed018 Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 21 Jun 2023 15:16:18 +0000 Subject: [PATCH 0605/1997] fix flaky `AsyncLoader` destructor --- src/Common/AsyncLoader.cpp | 16 ++++++++++++++-- src/Common/AsyncLoader.h | 1 + 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index 86edcdc8f3d..4d885fae2e8 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -200,9 +200,11 @@ void AsyncLoader::start() void AsyncLoader::wait() { - // Because job can create new jobs in other pools we have to recheck in cycle + // Because job can create new jobs in other pools we have to recheck in cycle. + // Also wait for all workers to finish to avoid races on `pool.workers`, + // which can be decrease even after all jobs are already finished. std::unique_lock lock{mutex}; - while (!scheduled_jobs.empty()) + while (!scheduled_jobs.empty() && hasWorker(lock)) { lock.unlock(); for (auto & p : pools) @@ -719,4 +721,14 @@ void AsyncLoader::worker(Pool & pool) } } +bool AsyncLoader::hasWorker(std::unique_lock &) const +{ + for (const Pool & pool : pools) + { + if (pool.workers > 0) + return true; + } + return false; +} + } diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 322a4482e4e..77905319f00 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -445,6 +445,7 @@ private: void updateCurrentPriorityAndSpawn(std::unique_lock &); void spawn(Pool & pool, std::unique_lock &); void worker(Pool & pool); + bool hasWorker(std::unique_lock &) const; // Logging const bool log_failures; // Worker should log all exceptions caught from job functions. From 1f901f4ec498078cbb7ae83d7c93275fffbdcedf Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 21 Jun 2023 15:18:49 +0000 Subject: [PATCH 0606/1997] fix --- src/Common/AsyncLoader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index 4d885fae2e8..43900c3afd5 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -204,7 +204,7 @@ void AsyncLoader::wait() // Also wait for all workers to finish to avoid races on `pool.workers`, // which can be decrease even after all jobs are already finished. std::unique_lock lock{mutex}; - while (!scheduled_jobs.empty() && hasWorker(lock)) + while (!scheduled_jobs.empty() || hasWorker(lock)) { lock.unlock(); for (auto & p : pools) From 804466f12958949c244d38ef5089ba85315ef252 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 21 Jun 2023 11:20:17 -0400 Subject: [PATCH 0607/1997] fix server settings nav --- .../server-configuration-parameters/index.md | 16 ---------------- .../server-configuration-parameters/settings.md | 8 ++++++++ 2 files changed, 8 insertions(+), 16 deletions(-) delete mode 100644 docs/en/operations/server-configuration-parameters/index.md diff --git a/docs/en/operations/server-configuration-parameters/index.md b/docs/en/operations/server-configuration-parameters/index.md deleted file mode 100644 index d4b941c0819..00000000000 --- a/docs/en/operations/server-configuration-parameters/index.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -slug: /en/operations/server-configuration-parameters/ -sidebar_position: 54 -sidebar_label: Server Configuration Parameters -pagination_next: en/operations/server-configuration-parameters/settings ---- - -# Server Configuration Parameters - -This section contains descriptions of server settings that cannot be changed at the session or query level. - -These settings are stored in the `config.xml` file on the ClickHouse server. - -Other settings are described in the “[Settings](../../operations/settings/index.md#session-settings-intro)” section. - -Before studying the settings, read the [Configuration files](../../operations/configuration-files.md#configuration_files) section and note the use of substitutions (the `incl` and `optional` attributes). diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 83756097cfa..ce62d436813 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -7,6 +7,14 @@ description: This section contains descriptions of server settings that cannot b # Server Settings +This section contains descriptions of server settings that cannot be changed at the session or query level. + +These settings are stored in the `config.xml` file on the ClickHouse server. + +Other settings are described in the “[Settings](../../operations/settings/index.md#session-settings-intro)” section. + +Before studying the settings, read the [Configuration files](../../operations/configuration-files.md#configuration_files) section and note the use of substitutions (the `incl` and `optional` attributes). + ## allow_use_jemalloc_memory Allows to use jemalloc memory. From d18da7098a01a54b592b223c5ed49fd1ec7e4843 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 21 Jun 2023 17:25:09 +0200 Subject: [PATCH 0608/1997] Update LRUFileCachePriority.cpp --- src/Interpreters/Cache/LRUFileCachePriority.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index ebdf402908a..2807fa2d87e 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -80,7 +80,7 @@ void LRUFileCachePriority::pop(const CacheGuard::Lock &) LRUFileCachePriority::LRUQueueIterator LRUFileCachePriority::remove(LRUQueueIterator it) { - /// If size is 0, entry is annuled, current_elements_num was already updated. + /// If size is 0, entry is annulled, current_elements_num was already updated. if (it->size) { current_size -= it->size; From b007c9f194842f27686876e63495c0e70ccaf769 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 21 Jun 2023 18:28:14 +0200 Subject: [PATCH 0609/1997] Review comments --- src/Interpreters/Cache/IFileCachePriority.h | 2 +- src/Interpreters/Cache/LRUFileCachePriority.cpp | 13 +++++++++++-- src/Interpreters/Cache/LRUFileCachePriority.h | 4 +++- src/Interpreters/Cache/Metadata.cpp | 2 +- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h index 93343398783..7338d06e756 100644 --- a/src/Interpreters/Cache/IFileCachePriority.h +++ b/src/Interpreters/Cache/IFileCachePriority.h @@ -50,7 +50,7 @@ public: virtual Entry & getEntry() = 0; - virtual void annul() = 0; + virtual void invalidate() = 0; virtual void updateSize(int64_t size) = 0; }; diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index ebdf402908a..e2307e63bc7 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -25,6 +25,14 @@ IFileCachePriority::Iterator LRUFileCachePriority::add( const CacheGuard::Lock &) { const auto & key = key_metadata->key; + if (size == 0) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Adding zero size entries to LRU queue is not allowed " + "(Key: {}, offset: {}", key, offset); + } + #ifndef NDEBUG for (const auto & entry : queue) { @@ -80,7 +88,7 @@ void LRUFileCachePriority::pop(const CacheGuard::Lock &) LRUFileCachePriority::LRUQueueIterator LRUFileCachePriority::remove(LRUQueueIterator it) { - /// If size is 0, entry is annuled, current_elements_num was already updated. + /// If size is 0, entry is invalidateed, current_elements_num was already updated. if (it->size) { current_size -= it->size; @@ -160,11 +168,12 @@ LRUFileCachePriority::LRUFileCacheIterator::remove(const CacheGuard::Lock &) cache_priority, cache_priority->remove(queue_iter)); } -void LRUFileCachePriority::LRUFileCacheIterator::annul() +void LRUFileCachePriority::LRUFileCacheIterator::invalidate() { updateSize(-queue_iter->size); chassert(queue_iter->size == 0); --cache_priority->current_elements_num; + CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements); } void LRUFileCachePriority::LRUFileCacheIterator::updateSize(int64_t size) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index 208d05f9ed8..afa2cfb4613 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -37,6 +37,8 @@ private: Poco::Logger * log = &Poco::Logger::get("LRUFileCachePriority"); std::atomic current_size = 0; + /// current_elements_num is not always equal to queue.size() + /// because of invalidated entries. std::atomic current_elements_num = 0; LRUQueueIterator remove(LRUQueueIterator it); @@ -57,7 +59,7 @@ public: Iterator remove(const CacheGuard::Lock &) override; - void annul() override; + void invalidate() override; void updateSize(int64_t size) override; diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 20af931bbc2..dd4ac567e2a 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -402,7 +402,7 @@ KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegm chassert(file_segment->assertCorrectnessUnlocked(segment_lock)); if (file_segment->queue_iterator) - file_segment->queue_iterator->annul(); + file_segment->queue_iterator->invalidate(); const auto path = key_metadata->getFileSegmentPath(*file_segment); bool exists = fs::exists(path); From 3ce17fd5701ed3d48bf5ec011167ff1b52076ef7 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 21 Jun 2023 18:30:46 +0200 Subject: [PATCH 0610/1997] Fix log message --- src/Interpreters/Cache/LRUFileCachePriority.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index 3ecb1500a8b..ea437c51050 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -30,7 +30,7 @@ IFileCachePriority::Iterator LRUFileCachePriority::add( throw Exception( ErrorCodes::LOGICAL_ERROR, "Adding zero size entries to LRU queue is not allowed " - "(Key: {}, offset: {}", key, offset); + "(Key: {}, offset: {})", key, offset); } #ifndef NDEBUG From fb47eea283771228f6667cba83d05cdba47f18d8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 21 Jun 2023 16:41:42 +0000 Subject: [PATCH 0611/1997] Docs: little cleanup of configuration-files.md I tried to understand in what ways ClickHouse's configuration can be set up. Unfortunately, the official documentation is quite lacking and sometimes self-contradictory. After reading some 3rd party guides and StackOverflow answers, I tried to clean it up a little bit but there is more work to be done. --- docs/en/operations/configuration-files.md | 41 ++++++++++++++--------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index b3583e156ad..d3e21cb2364 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -6,32 +6,43 @@ sidebar_label: Configuration Files # Configuration Files -ClickHouse supports multi-file configuration management. The main server configuration file is `/etc/clickhouse-server/config.xml` or `/etc/clickhouse-server/config.yaml`. Other files must be in the `/etc/clickhouse-server/config.d` directory. Note, that any configuration file can be written either in XML or YAML, but mixing formats in one file is not supported. For example, you can have main configs as `config.xml` and `users.xml` and write additional files in `config.d` and `users.d` directories in `.yaml`. +The ClickHouse server can be configured with configuration files in XML or YAML syntax. In most installation types, the ClickHouse server runs with `/etc/clickhouse-server/config.xml` as default configuration file but it is also possible to specify the location of the configuration file manually at server startup using command line option `--config-file=` or `-C`. Additional configuration files may be placed into directory `config.d/` relative to the main configuration file, for example into directory `/etc/clickhouse-server/config.d/`. Files in this directory and the main configuration are merged in a preprocessing step before the configuration is applied in ClickHouse server. Configuration files are merged in alphabetical order. To simplify updates and improve modularization, it is best practice to keep the default `config.xml` file unmodified and place additional customization into `config.d/`. -All XML files should have the same root element, usually ``. As for YAML, `clickhouse:` should not be present, the parser will insert it automatically. +It is possible to mix XML and YAML configuration files, for example you could have a main configuration file `config.xml` and additional configuration files `config.d/network.xml`, `config.d/timezone.yaml` and `config.d/keeper.yaml`. Mixing XML and YAML within a single configuration file is not supported. XML configuration files should use `...` as top-level tag. In YAML configuration files, `clickhouse:` is optional, the parser inserts it implicitly if absent. -## Override {#override} +## Overriding Configuration {#override} -Some settings specified in the main configuration file can be overridden in other configuration files: +The merge of configuration files behaves as one intuitively expects: The contents of both files are combined recursively, children with the same name are replaced by the element of the more specific configuration file. The merge can be customized using attributes `replace` and `remove`. +- Attribute `replace` means that the element is replaced by the specified one. +- Attribute `remove` means that the element is deleted. -- The `replace` or `remove` attributes can be specified for the elements of these configuration files. -- If neither is specified, it combines the contents of elements recursively, replacing values of duplicate children. -- If `replace` is specified, it replaces the entire element with the specified one. -- If `remove` is specified, it deletes the element. +To specify that a value of an element should be replaced by the value of an environment variable, you can use attribute `from_env`. -You can also declare attributes as coming from environment variables by using `from_env="VARIABLE_NAME"`: +Example with `$MAX_QUERY_SIZE = 150000`: ```xml - - - - - + + + + + ``` -## Substitution {#substitution} +which is equal to + +``` xml + + + + 150000 + + + +``` + +## Substituting Configuration {#substitution} The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/clickhouse/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md#macros)). From c14930bc19df80cc3d029da5af6a827557727119 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 21 Jun 2023 13:11:54 -0400 Subject: [PATCH 0612/1997] bad path --- docs/en/sql-reference/statements/set.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/set.md b/docs/en/sql-reference/statements/set.md index 14f523adc3b..3e5e86eccf7 100644 --- a/docs/en/sql-reference/statements/set.md +++ b/docs/en/sql-reference/statements/set.md @@ -10,7 +10,7 @@ sidebar_label: SET SET param = value ``` -Assigns `value` to the `param` [setting](../../operations/settings/index.md) for the current session. You cannot change [server settings](../../operations/server-configuration-parameters/index.md) this way. +Assigns `value` to the `param` [setting](../../operations/settings/index.md) for the current session. You cannot change [server settings](../../operations/server-configuration-parameters/settings.md) this way. You can also set all the values from the specified settings profile in a single query. From e87b9ee84668a56824fd197167bcd1b13f097f59 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Jun 2023 20:25:10 +0300 Subject: [PATCH 0613/1997] Update AsyncLoader.cpp --- src/Common/AsyncLoader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index 43900c3afd5..9662146e137 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -202,7 +202,7 @@ void AsyncLoader::wait() { // Because job can create new jobs in other pools we have to recheck in cycle. // Also wait for all workers to finish to avoid races on `pool.workers`, - // which can be decrease even after all jobs are already finished. + // which can decrease even after all jobs are already finished. std::unique_lock lock{mutex}; while (!scheduled_jobs.empty() || hasWorker(lock)) { From 38466169c79410b7873c6d30ab3e266541b34c20 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 21 Jun 2023 20:02:19 +0200 Subject: [PATCH 0614/1997] Better --- .../Cache/LRUFileCachePriority.cpp | 54 ++++++++++--------- src/Interpreters/Cache/LRUFileCachePriority.h | 3 ++ 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index ea437c51050..6f142c0cc6d 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -30,7 +30,7 @@ IFileCachePriority::Iterator LRUFileCachePriority::add( throw Exception( ErrorCodes::LOGICAL_ERROR, "Adding zero size entries to LRU queue is not allowed " - "(Key: {}, offset: {})", key, offset); + "(key: {}, offset: {})", key, offset); } #ifndef NDEBUG @@ -56,11 +56,9 @@ IFileCachePriority::Iterator LRUFileCachePriority::add( } auto iter = queue.insert(queue.end(), Entry(key, offset, size, key_metadata)); - current_size += size; - ++current_elements_num; - CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size); - CurrentMetrics::add(CurrentMetrics::FilesystemCacheElements); + updateSize(size); + updateElementsCount(1); LOG_TEST( log, "Added entry into LRU queue, key: {}, offset: {}, size: {}", @@ -71,14 +69,11 @@ IFileCachePriority::Iterator LRUFileCachePriority::add( void LRUFileCachePriority::removeAll(const CacheGuard::Lock &) { - CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, current_size); - CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements, current_elements_num); - LOG_TEST(log, "Removed all entries from LRU queue"); + updateSize(-current_size); + updateElementsCount(-current_elements_num); queue.clear(); - current_size = 0; - current_elements_num = 0; } void LRUFileCachePriority::pop(const CacheGuard::Lock &) @@ -91,11 +86,8 @@ LRUFileCachePriority::LRUQueueIterator LRUFileCachePriority::remove(LRUQueueIter /// If size is 0, entry is invalidated, current_elements_num was already updated. if (it->size) { - current_size -= it->size; - --current_elements_num; - - CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, it->size); - CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements); + updateSize(-it->size); + updateElementsCount(-1); } LOG_TEST( @@ -105,6 +97,19 @@ LRUFileCachePriority::LRUQueueIterator LRUFileCachePriority::remove(LRUQueueIter return queue.erase(it); } +void LRUFileCachePriority::updateSize(int64_t size) +{ + current_size += size; + CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size); +} + +void LRUFileCachePriority::updateElementsCount(int64_t num) +{ + current_elements_num += num; + CurrentMetrics::add(CurrentMetrics::FilesystemCacheElements, num); +} + + LRUFileCachePriority::LRUFileCacheIterator::LRUFileCacheIterator( LRUFileCachePriority * cache_priority_, LRUFileCachePriority::LRUQueueIterator queue_iter_) @@ -170,10 +175,14 @@ LRUFileCachePriority::LRUFileCacheIterator::remove(const CacheGuard::Lock &) void LRUFileCachePriority::LRUFileCacheIterator::invalidate() { - updateSize(-queue_iter->size); - chassert(queue_iter->size == 0); - --cache_priority->current_elements_num; - CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements); + LOG_TEST( + cache_priority->log, + "Invalidating entry in LRU queue. Key: {}, offset: {}, previous size: {}", + queue_iter->key, queue_iter->offset, queue_iter->size); + + cache_priority->updateSize(-queue_iter->size); + cache_priority->updateElementsCount(-1); + queue_iter->size = 0; } void LRUFileCachePriority::LRUFileCacheIterator::updateSize(int64_t size) @@ -183,13 +192,8 @@ void LRUFileCachePriority::LRUFileCacheIterator::updateSize(int64_t size) "Update size with {} in LRU queue for key: {}, offset: {}, previous size: {}", size, queue_iter->key, queue_iter->offset, queue_iter->size); - cache_priority->current_size += size; + cache_priority->updateSize(size); queue_iter->size += size; - - CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size); - - chassert(cache_priority->current_size >= 0); - chassert(queue_iter->size >= 0); } size_t LRUFileCachePriority::LRUFileCacheIterator::use(const CacheGuard::Lock &) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index afa2cfb4613..8dc4eb0a016 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -33,6 +33,9 @@ public: void iterate(IterateFunc && func, const CacheGuard::Lock &) override; private: + void updateElementsCount(int64_t num); + void updateSize(int64_t size); + LRUQueue queue; Poco::Logger * log = &Poco::Logger::get("LRUFileCachePriority"); From 4a570a05c9714c8ee94e68e4dda58efa759c8780 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Mon, 19 Jun 2023 23:35:35 +0000 Subject: [PATCH 0615/1997] Decrease default timeouts for S3 and HTTP requests --- docs/en/operations/settings/settings.md | 4 +- src/Backups/BackupIO_S3.cpp | 1 + src/Coordination/KeeperSnapshotManagerS3.cpp | 6 +-- src/Core/Defines.h | 2 +- src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.h | 2 + .../ObjectStorages/S3/S3ObjectStorage.cpp | 48 ++++++++++--------- src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 16 +++++-- src/Disks/ObjectStorages/S3/diskSettings.cpp | 2 +- src/IO/S3/Client.cpp | 33 +++++++++---- src/IO/S3/Client.h | 22 +++++++-- src/IO/S3/tests/gtest_aws_s3_client.cpp | 1 + src/IO/WriteBufferFromS3.cpp | 4 +- src/IO/WriteBufferFromS3.h | 3 ++ src/IO/tests/gtest_writebuffer_s3.cpp | 1 + src/Storages/StorageS3.cpp | 5 +- src/Storages/StorageS3.h | 1 + src/Storages/StorageS3Settings.cpp | 5 +- src/Storages/StorageS3Settings.h | 3 +- 19 files changed, 109 insertions(+), 51 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index baefbb2cf6f..4916dfaaf7d 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3501,7 +3501,7 @@ Possible values: - Any positive integer. - 0 - Disabled (infinite timeout). -Default value: 180. +Default value: 30. ## http_receive_timeout {#http_receive_timeout} @@ -3512,7 +3512,7 @@ Possible values: - Any positive integer. - 0 - Disabled (infinite timeout). -Default value: 180. +Default value: 30. ## check_query_single_value_result {#check_query_single_value_result} diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 967beba4bf5..9a2a457e13e 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -253,6 +253,7 @@ std::unique_ptr BackupWriterS3::writeFile(const String & file_name) { return std::make_unique( client, + client, // already has long timeout s3_uri.bucket, fs::path(s3_uri.key) / file_name, DBMS_DEFAULT_BUFFER_SIZE, diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 1afe0b352c5..bf437f03ae3 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -144,14 +144,14 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa const auto create_writer = [&](const auto & key) { - return WriteBufferFromS3 - { + return WriteBufferFromS3( + s3_client->client, s3_client->client, s3_client->uri.bucket, key, DBMS_DEFAULT_BUFFER_SIZE, request_settings_1 - }; + ); }; LOG_INFO(log, "Will try to upload snapshot on {} to S3", snapshot_path); diff --git a/src/Core/Defines.h b/src/Core/Defines.h index e9b84b71cae..efe14b93a3d 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -41,7 +41,7 @@ /// The boundary on which the blocks for asynchronous file operations should be aligned. #define DEFAULT_AIO_FILE_BLOCK_SIZE 4096 -#define DEFAULT_HTTP_READ_BUFFER_TIMEOUT 180 +#define DEFAULT_HTTP_READ_BUFFER_TIMEOUT 30 #define DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT 1 /// Maximum number of http-connections between two endpoints /// the number is unmotivated diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 204a27483df..5162e0f273e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -102,6 +102,7 @@ class IColumn; M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \ M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(UInt64, s3_retry_attempts, 10, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ + M(UInt64, s3_request_timeout_ms, 3000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \ M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \ M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \ M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 9fd45ac16d6..2886cdd288d 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -80,6 +80,8 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, + {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index e48924326e1..e46ca3d0828 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -149,7 +149,7 @@ private: bool S3ObjectStorage::exists(const StoredObject & object) const { auto settings_ptr = s3_settings.get(); - return S3::objectExists(*client.get(), bucket, object.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); + return S3::objectExists(*clients.get()->client, bucket, object.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); } std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT @@ -168,7 +168,7 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT (const std::string & path, size_t read_until_position) -> std::unique_ptr { return std::make_unique( - client.get(), + clients.get()->client, bucket, path, version_id, @@ -218,7 +218,7 @@ std::unique_ptr S3ObjectStorage::readObject( /// NOLINT { auto settings_ptr = s3_settings.get(); return std::make_unique( - client.get(), + clients.get()->client, bucket, object.remote_path, version_id, @@ -243,8 +243,10 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN if (write_settings.s3_allow_parallel_part_upload) scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "VFSWrite"); + auto clients_ = clients.get(); return std::make_unique( - client.get(), + clients_->client, + clients_->client_with_long_timeout, bucket, object.remote_path, buf_size, @@ -258,7 +260,7 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix) const { auto settings_ptr = s3_settings.get(); - auto client_ptr = client.get(); + auto client_ptr = clients.get()->client; return std::make_shared(bucket, path_prefix, client_ptr, settings_ptr->list_object_keys_size); } @@ -266,7 +268,7 @@ ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefi void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const { auto settings_ptr = s3_settings.get(); - auto client_ptr = client.get(); + auto client_ptr = clients.get()->client; S3::ListObjectsV2Request request; request.SetBucket(bucket); @@ -307,7 +309,7 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet void S3ObjectStorage::removeObjectImpl(const StoredObject & object, bool if_exists) { - auto client_ptr = client.get(); + auto client_ptr = clients.get()->client; ProfileEvents::increment(ProfileEvents::S3DeleteObjects); ProfileEvents::increment(ProfileEvents::DiskS3DeleteObjects); @@ -333,7 +335,7 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e } else { - auto client_ptr = client.get(); + auto client_ptr = clients.get()->client; auto settings_ptr = s3_settings.get(); size_t chunk_size_limit = settings_ptr->objects_chunk_size_to_delete; @@ -394,7 +396,7 @@ void S3ObjectStorage::removeObjectsIfExist(const StoredObjects & objects) std::optional S3ObjectStorage::tryGetObjectMetadata(const std::string & path) const { auto settings_ptr = s3_settings.get(); - auto object_info = S3::getObjectInfo(*client.get(), bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true, /* throw_on_error= */ false); + auto object_info = S3::getObjectInfo(*clients.get()->client, bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true, /* throw_on_error= */ false); if (object_info.size == 0 && object_info.last_modification_time == 0 && object_info.metadata.empty()) return {}; @@ -410,7 +412,7 @@ std::optional S3ObjectStorage::tryGetObjectMetadata(const std::s ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) const { auto settings_ptr = s3_settings.get(); - auto object_info = S3::getObjectInfo(*client.get(), bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true); + auto object_info = S3::getObjectInfo(*clients.get()->client, bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true); ObjectMetadata result; result.size_bytes = object_info.size; @@ -429,7 +431,7 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT /// Shortcut for S3 if (auto * dest_s3 = dynamic_cast(&object_storage_to); dest_s3 != nullptr) { - auto client_ptr = client.get(); + auto client_ptr = clients.get()->client; auto settings_ptr = s3_settings.get(); auto size = S3::getObjectSize(*client_ptr, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "S3ObjStor_copy"); @@ -445,7 +447,7 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT void S3ObjectStorage::copyObject( // NOLINT const StoredObject & object_from, const StoredObject & object_to, std::optional object_to_attributes) { - auto client_ptr = client.get(); + auto client_ptr = clients.get()->client; auto settings_ptr = s3_settings.get(); auto size = S3::getObjectSize(*client_ptr, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "S3ObjStor_copy"); @@ -458,35 +460,33 @@ void S3ObjectStorage::setNewSettings(std::unique_ptr && s3_settings.set(std::move(s3_settings_)); } -void S3ObjectStorage::setNewClient(std::unique_ptr && client_) -{ - client.set(std::move(client_)); -} - void S3ObjectStorage::shutdown() { - auto client_ptr = client.get(); + auto clients_ptr = clients.get(); /// This call stops any next retry attempts for ongoing S3 requests. /// If S3 request is failed and the method below is executed S3 client immediately returns the last failed S3 request outcome. /// If S3 is healthy nothing wrong will be happened and S3 requests will be processed in a regular way without errors. /// This should significantly speed up shutdown process if S3 is unhealthy. - const_cast(*client_ptr).DisableRequestProcessing(); + const_cast(*clients_ptr->client).DisableRequestProcessing(); + const_cast(*clients_ptr->client_with_long_timeout).DisableRequestProcessing(); } void S3ObjectStorage::startup() { - auto client_ptr = client.get(); + auto clients_ptr = clients.get(); /// Need to be enabled if it was disabled during shutdown() call. - const_cast(*client_ptr).EnableRequestProcessing(); + const_cast(*clients_ptr->client).EnableRequestProcessing(); + const_cast(*clients_ptr->client_with_long_timeout).EnableRequestProcessing(); } void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { auto new_s3_settings = getSettings(config, config_prefix, context); auto new_client = getClient(config, config_prefix, context, *new_s3_settings); + auto new_clients = std::make_unique(std::move(new_client), *new_s3_settings); s3_settings.set(std::move(new_s3_settings)); - client.set(std::move(new_client)); + clients.set(std::move(new_clients)); } std::unique_ptr S3ObjectStorage::cloneObjectStorage( @@ -501,7 +501,9 @@ std::unique_ptr S3ObjectStorage::cloneObjectStorage( endpoint); } +S3ObjectStorage::Clients::Clients(std::shared_ptr client_, const S3ObjectStorageSettings & settings) + : client(std::move(client_)), client_with_long_timeout(client->clone(std::nullopt, settings.request_settings.long_request_timeout_ms)) {} + } - #endif diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 072e1354d38..527b1479d89 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -39,6 +39,16 @@ struct S3ObjectStorageSettings class S3ObjectStorage : public IObjectStorage { +public: + struct Clients + { + std::shared_ptr client; + std::shared_ptr client_with_long_timeout; + + Clients() = default; + Clients(std::shared_ptr client, const S3ObjectStorageSettings & settings); + }; + private: friend class S3PlainObjectStorage; @@ -51,7 +61,7 @@ private: String bucket_, String connection_string) : bucket(bucket_) - , client(std::move(client_)) + , clients(std::make_unique(std::move(client_), *s3_settings_)) , s3_settings(std::move(s3_settings_)) , s3_capabilities(s3_capabilities_) , version_id(std::move(version_id_)) @@ -159,14 +169,12 @@ public: private: void setNewSettings(std::unique_ptr && s3_settings_); - void setNewClient(std::unique_ptr && client_); - void removeObjectImpl(const StoredObject & object, bool if_exists); void removeObjectsImpl(const StoredObjects & objects, bool if_exists); std::string bucket; - MultiVersion client; + MultiVersion clients; MultiVersion s3_settings; S3Capabilities s3_capabilities; diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 409eb2a3dc3..cbf0392aae9 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -129,7 +129,7 @@ std::unique_ptr getClient( throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 path must ends with '/', but '{}' doesn't.", uri.key); client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 1000); - client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 30000); + client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 3000); client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100); client_configuration.endpointOverride = uri.endpoint; diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 668b1a3959d..7e20b1a9e8f 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -100,7 +100,7 @@ std::unique_ptr Client::create( size_t max_redirects_, ServerSideEncryptionKMSConfig sse_kms_config_, const std::shared_ptr & credentials_provider, - const Aws::Client::ClientConfiguration & client_configuration, + const PocoHTTPClientConfiguration & client_configuration, Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads, bool use_virtual_addressing) { @@ -109,9 +109,16 @@ std::unique_ptr Client::create( new Client(max_redirects_, std::move(sse_kms_config_), credentials_provider, client_configuration, sign_payloads, use_virtual_addressing)); } -std::unique_ptr Client::create(const Client & other) +std::unique_ptr Client::clone( + std::optional> override_retry_strategy, + std::optional override_request_timeout_ms) const { - return std::unique_ptr(new Client(other)); + PocoHTTPClientConfiguration new_configuration = client_configuration; + if (override_retry_strategy.has_value()) + new_configuration.retryStrategy = *override_retry_strategy; + if (override_request_timeout_ms.has_value()) + new_configuration.requestTimeoutMs = *override_request_timeout_ms; + return std::unique_ptr(new Client(*this, new_configuration)); } namespace @@ -134,11 +141,14 @@ Client::Client( size_t max_redirects_, ServerSideEncryptionKMSConfig sse_kms_config_, const std::shared_ptr & credentials_provider_, - const Aws::Client::ClientConfiguration & client_configuration, - Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads, - bool use_virtual_addressing) - : Aws::S3::S3Client(credentials_provider_, client_configuration, std::move(sign_payloads), use_virtual_addressing) + const PocoHTTPClientConfiguration & client_configuration_, + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads_, + bool use_virtual_addressing_) + : Aws::S3::S3Client(credentials_provider_, client_configuration_, sign_payloads_, use_virtual_addressing_) , credentials_provider(credentials_provider_) + , client_configuration(client_configuration_) + , sign_payloads(sign_payloads_) + , use_virtual_addressing(use_virtual_addressing_) , max_redirects(max_redirects_) , sse_kms_config(std::move(sse_kms_config_)) , log(&Poco::Logger::get("S3Client")) @@ -175,10 +185,15 @@ Client::Client( ClientCacheRegistry::instance().registerClient(cache); } -Client::Client(const Client & other) - : Aws::S3::S3Client(other) +Client::Client( + const Client & other, const PocoHTTPClientConfiguration & client_configuration_) + : Aws::S3::S3Client(other.credentials_provider, client_configuration_, other.sign_payloads, + other.use_virtual_addressing) , initial_endpoint(other.initial_endpoint) , credentials_provider(other.credentials_provider) + , client_configuration(client_configuration_) + , sign_payloads(other.sign_payloads) + , use_virtual_addressing(other.use_virtual_addressing) , explicit_region(other.explicit_region) , detect_region(other.detect_region) , provider_type(other.provider_type) diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h index e1b99c893a6..8904c850553 100644 --- a/src/IO/S3/Client.h +++ b/src/IO/S3/Client.h @@ -105,6 +105,8 @@ private: class Client : private Aws::S3::S3Client { public: + class RetryStrategy; + /// we use a factory method to verify arguments before creating a client because /// there are certain requirements on arguments for it to work correctly /// e.g. Client::RetryStrategy should be used @@ -112,11 +114,19 @@ public: size_t max_redirects_, ServerSideEncryptionKMSConfig sse_kms_config_, const std::shared_ptr & credentials_provider, - const Aws::Client::ClientConfiguration & client_configuration, + const PocoHTTPClientConfiguration & client_configuration, Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads, bool use_virtual_addressing); - static std::unique_ptr create(const Client & other); + /// Create a client with adjusted settings: + /// * override_retry_strategy can be used to disable retries to avoid nested retries when we have + /// a retry loop outside of S3 client. Specifically, for read and write buffers. Currently not + /// actually used. + /// * override_request_timeout_ms is used to increase timeout for CompleteMultipartUploadRequest + /// because it often sits idle for 10 seconds: https://github.com/ClickHouse/ClickHouse/pull/42321 + std::unique_ptr clone( + std::optional> override_retry_strategy = std::nullopt, + std::optional override_request_timeout_ms = std::nullopt) const; Client & operator=(const Client &) = delete; @@ -211,11 +221,12 @@ private: Client(size_t max_redirects_, ServerSideEncryptionKMSConfig sse_kms_config_, const std::shared_ptr & credentials_provider_, - const Aws::Client::ClientConfiguration& client_configuration, + const PocoHTTPClientConfiguration & client_configuration, Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads, bool use_virtual_addressing); - Client(const Client & other); + Client( + const Client & other, const PocoHTTPClientConfiguration & client_configuration); /// Leave regular functions private so we don't accidentally use them /// otherwise region and endpoint redirection won't work @@ -251,6 +262,9 @@ private: String initial_endpoint; std::shared_ptr credentials_provider; + PocoHTTPClientConfiguration client_configuration; + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads; + bool use_virtual_addressing; std::string explicit_region; mutable bool detect_region = true; diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp index a9b5fa03f30..5731e9061d6 100644 --- a/src/IO/S3/tests/gtest_aws_s3_client.cpp +++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp @@ -89,6 +89,7 @@ void doWriteRequest(std::shared_ptr client, const DB::S3:: DB::S3Settings::RequestSettings request_settings; request_settings.max_unexpected_write_error_retries = max_unexpected_write_error_retries; DB::WriteBufferFromS3 write_buffer( + client, client, uri.bucket, uri.key, diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 990505adfb3..900861a7831 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -77,6 +77,7 @@ struct WriteBufferFromS3::PartData WriteBufferFromS3::WriteBufferFromS3( std::shared_ptr client_ptr_, + std::shared_ptr client_with_long_timeout_ptr_, const String & bucket_, const String & key_, size_t buf_size_, @@ -91,6 +92,7 @@ WriteBufferFromS3::WriteBufferFromS3( , upload_settings(request_settings.getUploadSettings()) , write_settings(write_settings_) , client_ptr(std::move(client_ptr_)) + , client_with_long_timeout_ptr(std::move(client_with_long_timeout_ptr_)) , object_metadata(std::move(object_metadata_)) , buffer_allocation_policy(ChooseBufferPolicy(upload_settings)) , task_tracker( @@ -551,7 +553,7 @@ void WriteBufferFromS3::completeMultipartUpload() ProfileEvents::increment(ProfileEvents::DiskS3CompleteMultipartUpload); Stopwatch watch; - auto outcome = client_ptr->CompleteMultipartUpload(req); + auto outcome = client_with_long_timeout_ptr->CompleteMultipartUpload(req); watch.stop(); ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds()); diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index f4200b0a646..32f4867a439 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -29,6 +29,8 @@ class WriteBufferFromS3 final : public WriteBufferFromFileBase public: WriteBufferFromS3( std::shared_ptr client_ptr_, + /// for CompleteMultipartUploadRequest, because it blocks on recv() for a few seconds on big uploads + std::shared_ptr client_with_long_timeout_ptr_, const String & bucket_, const String & key_, size_t buf_size_, @@ -86,6 +88,7 @@ private: const S3Settings::RequestSettings::PartUploadSettings & upload_settings; const WriteSettings write_settings; const std::shared_ptr client_ptr; + const std::shared_ptr client_with_long_timeout_ptr; const std::optional> object_metadata; Poco::Logger * log = &Poco::Logger::get("WriteBufferFromS3"); diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp index cd38291fb31..44c0ee67669 100644 --- a/src/IO/tests/gtest_writebuffer_s3.cpp +++ b/src/IO/tests/gtest_writebuffer_s3.cpp @@ -526,6 +526,7 @@ public: getAsyncPolicy().setAutoExecute(false); return std::make_unique( + client, client, bucket, file_name, diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index f4791e45e2b..135722dbce2 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -150,7 +150,7 @@ public: KeysWithInfo * read_keys_, const S3Settings::RequestSettings & request_settings_) : WithContext(context_) - , client(S3::Client::create(client_)) + , client(client_.clone()) , globbed_uri(globbed_uri_) , query(query_) , virtual_header(virtual_header_) @@ -783,6 +783,7 @@ public: write_buf = wrapWriteBufferWithCompressionMethod( std::make_unique( configuration_.client, + configuration_.client_with_long_timeout, bucket, key, DBMS_DEFAULT_BUFFER_SIZE, @@ -1296,6 +1297,8 @@ void StorageS3::Configuration::connect(ContextPtr context) context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), }); + + client_with_long_timeout = client->clone(std::nullopt, request_settings.long_request_timeout_ms); } void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection) diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 13053833623..8d571dd796f 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -274,6 +274,7 @@ public: HTTPHeaderEntries headers_from_ast; std::shared_ptr client; + std::shared_ptr client_with_long_timeout; std::vector keys; }; diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 23b4630707c..89e6ee46b4d 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -199,7 +199,7 @@ S3Settings::RequestSettings::RequestSettings( list_object_keys_size = config.getUInt64(key + "list_object_keys_size", settings.s3_list_object_keys_size); throw_on_zero_files_match = config.getBool(key + "throw_on_zero_files_match", settings.s3_throw_on_zero_files_match); retry_attempts = config.getUInt64(key + "retry_attempts", settings.s3_retry_attempts); - request_timeout_ms = config.getUInt64(key + "request_timeout_ms", request_timeout_ms); + request_timeout_ms = config.getUInt64(key + "request_timeout_ms", settings.s3_request_timeout_ms); /// NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload, /// which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used. @@ -255,6 +255,9 @@ void S3Settings::RequestSettings::updateFromSettingsImpl(const Settings & settin if (!if_changed || settings.s3_retry_attempts.changed) retry_attempts = settings.s3_retry_attempts; + + if (!if_changed || settings.s3_request_timeout_ms.changed) + request_timeout_ms = settings.s3_request_timeout_ms; } void S3Settings::RequestSettings::updateFromSettings(const Settings & settings) diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 41489927e7f..991e323acb6 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -69,7 +69,8 @@ struct S3Settings ThrottlerPtr get_request_throttler; ThrottlerPtr put_request_throttler; size_t retry_attempts = 10; - size_t request_timeout_ms = 30000; + size_t request_timeout_ms = 3000; + size_t long_request_timeout_ms = 30000; // TODO: Take this from config like request_timeout_ms bool throw_on_zero_files_match = false; From 57070a6a6e773b7fa853eb6f7598660558d4ac8e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 21 Jun 2023 18:16:52 +0000 Subject: [PATCH 0616/1997] Ignore "modularization" --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 73ec64e2f30..46bd0da4236 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1762,6 +1762,7 @@ misconfiguration mispredictions mmap mmapped +modularization moduloOrZero mongodb monthName From 9157314b2a8d03a87bc467e716c3557b7d9d768f Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 21 Jun 2023 20:29:32 +0200 Subject: [PATCH 0617/1997] fix --- .../ObjectStorages/DiskObjectStorage.cpp | 4 ++- .../MergeTree/DataPartStorageOnDiskBase.cpp | 27 +++++++++++++------ .../MergeTree/DataPartStorageOnDiskBase.h | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 6 +++-- tests/integration/helpers/cluster.py | 2 +- .../configs/config.d/storage_conf.xml | 2 ++ .../test_merge_tree_s3_failover/test.py | 3 ++- 7 files changed, 32 insertions(+), 14 deletions(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index e3922b6c505..90eb87a56f1 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -544,8 +544,10 @@ void DiskObjectStorage::writeFileUsingBlobWritingFunction(const String & path, W } void DiskObjectStorage::applyNewSettings( - const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String & config_prefix, const DisksMap & disk_map) + const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String & /*config_prefix*/, const DisksMap & disk_map) { + /// FIXME we cannot use config_prefix that was passed through arguments because the disk may be wrapped with cache and we need another name + const auto config_prefix = "storage_configuration.disks." + name; object_storage->applyNewSettings(config, config_prefix, context_); IDisk::applyNewSettings(config, context_, config_prefix, disk_map); } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index c397a634db6..01fcc2698eb 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -455,23 +455,34 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::freeze( MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart( const std::string & to, const std::string & dir_path, - const DiskPtr & disk, - Poco::Logger *) const + const DiskPtr & dst_disk, + Poco::Logger * log) const { String path_to_clone = fs::path(to) / dir_path / ""; + auto src_disk = volume->getDisk(); - if (disk->exists(path_to_clone)) + if (dst_disk->exists(path_to_clone)) { throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Cannot clone part {} from '{}' to '{}': path '{}' already exists", - dir_path, getRelativePath(), path_to_clone, fullPath(disk, path_to_clone)); + dir_path, getRelativePath(), path_to_clone, fullPath(dst_disk, path_to_clone)); } - disk->createDirectories(to); - volume->getDisk()->copyDirectoryContent(getRelativePath(), disk, path_to_clone); - volume->getDisk()->removeFileIfExists(fs::path(path_to_clone) / "delete-on-destroy.txt"); + try + { + dst_disk->createDirectories(to); + src_disk->copyDirectoryContent(getRelativePath(), dst_disk, path_to_clone); + } + catch (...) + { + /// It's safe to remove it recursively (even with zero-copy-replication) + /// because we've just did full copy through copyDirectoryContent + LOG_WARNING(log, "Removing directory {} after failed attempt to move a data part", path_to_clone); + dst_disk->removeRecursive(path_to_clone); + throw; + } - auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); + auto single_disk_volume = std::make_shared(dst_disk->getName(), dst_disk, 0); return create(single_disk_volume, to, dir_path, /*initialize=*/ true); } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h index 043953eb20c..59f29b76b75 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h @@ -71,7 +71,7 @@ public: MutableDataPartStoragePtr clonePart( const std::string & to, const std::string & dir_path, - const DiskPtr & disk, + const DiskPtr & dst_disk, Poco::Logger * log) const override; void rename( diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index e1e64b82ea3..c5754f70265 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -502,8 +502,10 @@ void IMergeTreeDataPart::removeIfNeeded() throw Exception(ErrorCodes::LOGICAL_ERROR, "relative_path {} of part {} is invalid or not set", getDataPartStorage().getPartDirectory(), name); - const auto part_parent_directory = directoryPath(part_directory); - bool is_moving_part = part_parent_directory.ends_with("moving/"); + fs::path part_directory_path = getDataPartStorage().getRelativePath(); + if (part_directory_path.filename().empty()) + part_directory_path = part_directory_path.parent_path(); + bool is_moving_part = part_directory_path.parent_path().filename() == "moving"; if (!startsWith(file_name, "tmp") && !endsWith(file_name, ".tmp_proj") && !is_moving_part) { LOG_ERROR( diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index f57ebf40e54..2b14b2eeb25 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -36,6 +36,7 @@ try: from confluent_kafka.avro.cached_schema_registry_client import ( CachedSchemaRegistryClient, ) + from .hdfs_api import HDFSApi # imports requests_kerberos except Exception as e: logging.warning(f"Cannot import some modules, some tests may not work: {e}") @@ -51,7 +52,6 @@ from helpers.client import QueryRuntimeException import docker from .client import Client -from .hdfs_api import HDFSApi from .config_cluster import * diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml index 4480327c4b5..235b9a7b7a1 100644 --- a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml +++ b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml @@ -72,4 +72,6 @@ + + true diff --git a/tests/integration/test_merge_tree_s3_failover/test.py b/tests/integration/test_merge_tree_s3_failover/test.py index 05aeeff2ec1..90dda631924 100644 --- a/tests/integration/test_merge_tree_s3_failover/test.py +++ b/tests/integration/test_merge_tree_s3_failover/test.py @@ -183,7 +183,8 @@ def test_move_failover(cluster): ) ENGINE=MergeTree() ORDER BY id TTL dt + INTERVAL 4 SECOND TO VOLUME 'external' - SETTINGS storage_policy='s3_cold' + SETTINGS storage_policy='s3_cold', temporary_directories_lifetime=1, + merge_tree_clear_old_temporary_directories_interval_seconds=1 """ ) From fc5ed7ffd7b6594beed5b3ed172ea79055358862 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 21 Jun 2023 18:45:28 +0000 Subject: [PATCH 0618/1997] Automatic style fix --- tests/integration/helpers/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 2b14b2eeb25..4c356219537 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -36,7 +36,7 @@ try: from confluent_kafka.avro.cached_schema_registry_client import ( CachedSchemaRegistryClient, ) - from .hdfs_api import HDFSApi # imports requests_kerberos + from .hdfs_api import HDFSApi # imports requests_kerberos except Exception as e: logging.warning(f"Cannot import some modules, some tests may not work: {e}") From 1419bb7adbac4603439c02d8e8b68d1338437c48 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 20 Jun 2023 20:31:23 +0200 Subject: [PATCH 0619/1997] rollback changes in test --- tests/integration/test_merge_tree_s3/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 22805eb6e94..2ccd517923a 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -923,7 +923,7 @@ def test_merge_canceled_by_s3_errors_when_move(cluster, broken_s3, node_name): @pytest.mark.parametrize("node_name", ["node"]) @pytest.mark.parametrize( - "in_flight_memory", [(10, 288044299), (5, 193557290), (1, 128348733)] + "in_flight_memory", [(10, 245918115), (5, 156786752), (1, 106426187)] ) def test_s3_engine_heavy_write_check_mem( cluster, broken_s3, node_name, in_flight_memory From 9aa4647f4fb2c25a74df952df2d6d815c27e9ee9 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 21 Jun 2023 20:48:12 +0200 Subject: [PATCH 0620/1997] Delete feature --- .../IO/CachedOnDiskReadBufferFromFile.cpp | 5 +--- src/Disks/IO/CachedOnDiskReadBufferFromFile.h | 2 -- .../IO/CachedOnDiskWriteBufferFromFile.cpp | 5 +--- .../IO/CachedOnDiskWriteBufferFromFile.h | 2 -- .../Cached/CachedObjectStorage.cpp | 15 ---------- .../Cached/CachedObjectStorage.h | 4 --- src/IO/ReadSettings.h | 1 - src/IO/WriteSettings.h | 1 - src/Interpreters/Cache/FileCache.cpp | 14 +++------ src/Interpreters/Cache/FileCache.h | 1 - src/Interpreters/Cache/FileCacheSettings.cpp | 2 -- src/Interpreters/Cache/FileCacheSettings.h | 2 -- src/Interpreters/Cache/FileSegment.h | 7 ----- src/Interpreters/Cache/Metadata.cpp | 3 -- .../InterpreterDescribeCacheQuery.cpp | 2 -- .../System/StorageSystemFilesystemCache.cpp | 29 +++++++++---------- 16 files changed, 20 insertions(+), 75 deletions(-) diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 76d54f9d27c..59cf8f0174c 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -69,7 +69,6 @@ CachedOnDiskReadBufferFromFile::CachedOnDiskReadBufferFromFile( , allow_seeks_after_first_read(allow_seeks_after_first_read_) , use_external_buffer(use_external_buffer_) , query_context_holder(cache_->getQueryContextHolder(query_id, settings_)) - , is_persistent(settings_.is_file_cache_persistent) , cache_log(cache_log_) { } @@ -125,7 +124,7 @@ void CachedOnDiskReadBufferFromFile::initialize(size_t offset, size_t size) } else { - CreateFileSegmentSettings create_settings(is_persistent ? FileSegmentKind::Persistent : FileSegmentKind::Regular); + CreateFileSegmentSettings create_settings(FileSegmentKind::Regular); file_segments = cache->getOrSet(cache_key, offset, size, file_size.value(), create_settings); } @@ -149,8 +148,6 @@ CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segm { ProfileEventTimeIncrement watch(ProfileEvents::CachedReadBufferCreateBufferMicroseconds); - /// Use is_persistent flag from in-memory state of the filesegment, - /// because it is consistent with what is written on disk. auto path = file_segment.getPathInLocalCache(); ReadSettings local_read_settings{settings}; diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h index 8faf23ad343..b4e7701de75 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h @@ -147,8 +147,6 @@ private: FileCache::QueryContextHolderPtr query_context_holder; - bool is_persistent; - std::shared_ptr cache_log; }; diff --git a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp index b7727555480..33d4ed7b3d7 100644 --- a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp @@ -194,7 +194,6 @@ CachedOnDiskWriteBufferFromFile::CachedOnDiskWriteBufferFromFile( FileCachePtr cache_, const String & source_path_, const FileCache::Key & key_, - bool is_persistent_cache_file_, const String & query_id_, const WriteSettings & settings_) : WriteBufferFromFileDecorator(std::move(impl_)) @@ -202,7 +201,6 @@ CachedOnDiskWriteBufferFromFile::CachedOnDiskWriteBufferFromFile( , cache(cache_) , source_path(source_path_) , key(key_) - , is_persistent_cache_file(is_persistent_cache_file_) , query_id(query_id_) , enable_cache_log(!query_id_.empty() && settings_.enable_filesystem_cache_log) , throw_on_error_from_cache(settings_.throw_on_error_from_cache) @@ -255,8 +253,7 @@ void CachedOnDiskWriteBufferFromFile::cacheData(char * data, size_t size, bool t try { - auto segment_kind = is_persistent_cache_file ? FileSegmentKind::Persistent : FileSegmentKind::Regular; - if (!cache_writer->write(data, size, current_download_offset, segment_kind)) + if (!cache_writer->write(data, size, current_download_offset, FileSegmentKind::Regular)) { LOG_INFO(log, "Write-through cache is stopped as cache limit is reached and nothing can be evicted"); return; diff --git a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h index 8642886d6de..8d39b6eed42 100644 --- a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h +++ b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h @@ -72,7 +72,6 @@ public: FileCachePtr cache_, const String & source_path_, const FileCache::Key & key_, - bool is_persistent_cache_file_, const String & query_id_, const WriteSettings & settings_); @@ -89,7 +88,6 @@ private: String source_path; FileCache::Key key; - bool is_persistent_cache_file; size_t current_download_offset = 0; const String query_id; diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp index b1cf8226895..3e7c4d12c42 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp @@ -119,7 +119,6 @@ std::unique_ptr CachedObjectStorage::writeObject( /// N cache, implementation_buffer->getFileName(), key, - modified_write_settings.is_file_cache_persistent, CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() ? std::string(CurrentThread::getQueryId()) : "", modified_write_settings); } @@ -164,20 +163,6 @@ void CachedObjectStorage::removeObjectsIfExist(const StoredObjects & objects) object_storage->removeObjectsIfExist(objects); } -ReadSettings CachedObjectStorage::getAdjustedSettingsFromMetadataFile(const ReadSettings & settings, const std::string & path) const -{ - ReadSettings new_settings{settings}; - new_settings.is_file_cache_persistent = isFileWithPersistentCache(path) && cache_settings.do_not_evict_index_and_mark_files; - return new_settings; -} - -WriteSettings CachedObjectStorage::getAdjustedSettingsFromMetadataFile(const WriteSettings & settings, const std::string & path) const -{ - WriteSettings new_settings{settings}; - new_settings.is_file_cache_persistent = isFileWithPersistentCache(path) && cache_settings.do_not_evict_index_and_mark_files; - return new_settings; -} - void CachedObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT const StoredObject & object_from, const StoredObject & object_to, diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h index ba9fbd02d94..76f16c9d930 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h @@ -108,10 +108,6 @@ public: bool supportParallelWrite() const override { return object_storage->supportParallelWrite(); } - ReadSettings getAdjustedSettingsFromMetadataFile(const ReadSettings & settings, const std::string & path) const override; - - WriteSettings getAdjustedSettingsFromMetadataFile(const WriteSettings & settings, const std::string & path) const override; - const FileCacheSettings & getCacheSettings() const { return cache_settings; } static bool canUseReadThroughCache(const ReadSettings & settings); diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index dae4261e92c..87f249823b2 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -98,7 +98,6 @@ struct ReadSettings bool enable_filesystem_cache = true; bool read_from_filesystem_cache_if_exists_otherwise_bypass_cache = false; bool enable_filesystem_cache_log = false; - bool is_file_cache_persistent = false; /// Some files can be made non-evictable. /// Don't populate cache when the read is not part of query execution (e.g. background thread). bool avoid_readthrough_cache_outside_query_context = true; diff --git a/src/IO/WriteSettings.h b/src/IO/WriteSettings.h index e160796d9a3..8f22e44145a 100644 --- a/src/IO/WriteSettings.h +++ b/src/IO/WriteSettings.h @@ -19,7 +19,6 @@ struct WriteSettings /// Filesystem cache settings bool enable_filesystem_cache_on_write_operations = false; bool enable_filesystem_cache_log = false; - bool is_file_cache_persistent = false; bool throw_on_error_from_cache = false; bool s3_allow_parallel_part_upload = true; diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 3d1b85dfbde..1bbc02eceaf 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -52,7 +52,6 @@ namespace ErrorCodes FileCache::FileCache(const FileCacheSettings & settings) : max_file_segment_size(settings.max_file_segment_size) - , allow_persistent_files(settings.do_not_evict_index_and_mark_files) , bypass_cache_threshold(settings.enable_bypass_cache_with_threashold ? settings.bypass_cache_threashold : 0) , delayed_cleanup_interval_ms(settings.delayed_cleanup_interval_ms) , log(&Poco::Logger::get("FileCache")) @@ -642,10 +641,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) { chassert(segment_metadata->file_segment->assertCorrectness()); - const bool is_persistent = allow_persistent_files && segment_metadata->file_segment->isPersistent(); - const bool releasable = segment_metadata->releasable() && !is_persistent; - - if (releasable) + if (segment_metadata->releasable()) { auto segment = segment_metadata->file_segment; if (segment->state() == FileSegment::State::DOWNLOADED) @@ -820,10 +816,6 @@ void FileCache::removeAllReleasable() { assertInitialized(); - /// Only releasable file segments are evicted. - /// `remove_persistent_files` defines whether non-evictable by some criteria files - /// (they do not comply with the cache eviction policy) should also be removed. - auto lock = lockCache(); main_priority->iterate([&](LockedKey & locked_key, FileSegmentMetadataPtr segment_metadata) @@ -926,7 +918,9 @@ void FileCache::loadMetadata() parsed = tryParse(offset, offset_with_suffix.substr(0, delim_pos)); if (offset_with_suffix.substr(delim_pos+1) == "persistent") { - segment_kind = FileSegmentKind::Persistent; + /// For compatibility. Persistent files are no loger supported. + fs::remove(offset_it->path()); + continue; } if (offset_with_suffix.substr(delim_pos+1) == "temporary") { diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index dc5dd67710c..c211fdfc976 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -131,7 +131,6 @@ private: using KeyAndOffset = FileCacheKeyAndOffset; const size_t max_file_segment_size; - const bool allow_persistent_files; const size_t bypass_cache_threshold = 0; const size_t delayed_cleanup_interval_ms; diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp index 1fe51bf5f3e..dbe3e8ced02 100644 --- a/src/Interpreters/Cache/FileCacheSettings.cpp +++ b/src/Interpreters/Cache/FileCacheSettings.cpp @@ -47,8 +47,6 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & else bypass_cache_threashold = FILECACHE_BYPASS_THRESHOLD; - do_not_evict_index_and_mark_files = config.getUInt64(config_prefix + ".do_not_evict_index_and_mark_files", true); - boundary_alignment = config.getUInt64(config_prefix + ".boundary_alignment", DBMS_DEFAULT_BUFFER_SIZE); delayed_cleanup_interval_ms = config.getUInt64(config_prefix + ".delayed_cleanup_interval_ms", FILECACHE_DELAYED_CLEANUP_INTERVAL_MS); diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h index eeb2a02c131..cddb8fd459b 100644 --- a/src/Interpreters/Cache/FileCacheSettings.h +++ b/src/Interpreters/Cache/FileCacheSettings.h @@ -22,8 +22,6 @@ struct FileCacheSettings size_t cache_hits_threshold = FILECACHE_DEFAULT_HITS_THRESHOLD; bool enable_filesystem_query_cache_limit = false; - bool do_not_evict_index_and_mark_files = true; - bool enable_bypass_cache_with_threashold = false; size_t bypass_cache_threashold = FILECACHE_BYPASS_THRESHOLD; size_t delayed_cleanup_interval_ms = FILECACHE_DELAYED_CLEANUP_INTERVAL_MS; diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index 681c0d719e4..062f168a046 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -37,11 +37,6 @@ enum class FileSegmentKind */ Regular, - /* `Persistent` file segment can't be evicted from cache, - * it should be removed manually. - */ - Persistent, - /* `Temporary` file segment is removed right after releasing. * Also corresponding files are removed during cache loading (if any). */ @@ -155,8 +150,6 @@ public: FileSegmentKind getKind() const { return segment_kind; } - bool isPersistent() const { return segment_kind == FileSegmentKind::Persistent; } - bool isUnbound() const { return is_unbound; } String getPathInLocalCache() const; diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 20af931bbc2..495e335b7ae 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -141,9 +141,6 @@ String CacheMetadata::getFileNameForFileSegment(size_t offset, FileSegmentKind s String file_suffix; switch (segment_kind) { - case FileSegmentKind::Persistent: - file_suffix = "_persistent"; - break; case FileSegmentKind::Temporary: file_suffix = "_temporary"; break; diff --git a/src/Interpreters/InterpreterDescribeCacheQuery.cpp b/src/Interpreters/InterpreterDescribeCacheQuery.cpp index ca875ee57b2..1d2c567d16f 100644 --- a/src/Interpreters/InterpreterDescribeCacheQuery.cpp +++ b/src/Interpreters/InterpreterDescribeCacheQuery.cpp @@ -24,7 +24,6 @@ static Block getSampleBlock() ColumnWithTypeAndName{std::make_shared(), "current_size"}, ColumnWithTypeAndName{std::make_shared(), "current_elements"}, ColumnWithTypeAndName{std::make_shared(), "path"}, - ColumnWithTypeAndName{std::make_shared>(), "do_not_evict_index_and_mark_files"}, }; return Block(columns); } @@ -49,7 +48,6 @@ BlockIO InterpreterDescribeCacheQuery::execute() res_columns[5]->insert(cache->getUsedCacheSize()); res_columns[6]->insert(cache->getFileSegmentsNum()); res_columns[7]->insert(cache->getBasePath()); - res_columns[8]->insert(settings.do_not_evict_index_and_mark_files); BlockIO res; size_t num_rows = res_columns[0]->size(); diff --git a/src/Storages/System/StorageSystemFilesystemCache.cpp b/src/Storages/System/StorageSystemFilesystemCache.cpp index 8e9ad2ac501..e03fd9ca081 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.cpp +++ b/src/Storages/System/StorageSystemFilesystemCache.cpp @@ -26,7 +26,6 @@ NamesAndTypesList StorageSystemFilesystemCache::getNamesAndTypes() {"cache_hits", std::make_shared()}, {"references", std::make_shared()}, {"downloaded_size", std::make_shared()}, - {"persistent", std::make_shared>()}, {"kind", std::make_shared()}, {"unbound", std::make_shared>()}, }; @@ -48,25 +47,25 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex for (const auto & file_segment : *file_segments) { - res_columns[0]->insert(cache_name); - res_columns[1]->insert(cache->getBasePath()); + size_t i = 0; + res_columns[i++]->insert(cache_name); + res_columns[i++]->insert(cache->getBasePath()); /// Do not use `file_segment->getPathInLocalCache` here because it will lead to nullptr dereference /// (because file_segments in getSnapshot doesn't have `cache` field set) - res_columns[2]->insert(cache->getPathInLocalCache(file_segment->key(), file_segment->offset(), file_segment->getKind())); - res_columns[3]->insert(file_segment->key().toString()); + res_columns[i++]->insert(cache->getPathInLocalCache(file_segment->key(), file_segment->offset(), file_segment->getKind())); + res_columns[i++]->insert(file_segment->key().toString()); const auto & range = file_segment->range(); - res_columns[4]->insert(range.left); - res_columns[5]->insert(range.right); - res_columns[6]->insert(range.size()); - res_columns[7]->insert(FileSegment::stateToString(file_segment->state())); - res_columns[8]->insert(file_segment->getHitsCount()); - res_columns[9]->insert(file_segment->getRefCount()); - res_columns[10]->insert(file_segment->getDownloadedSize(false)); - res_columns[11]->insert(file_segment->isPersistent()); - res_columns[12]->insert(toString(file_segment->getKind())); - res_columns[13]->insert(file_segment->isUnbound()); + res_columns[i++]->insert(range.left); + res_columns[i++]->insert(range.right); + res_columns[i++]->insert(range.size()); + res_columns[i++]->insert(FileSegment::stateToString(file_segment->state())); + res_columns[i++]->insert(file_segment->getHitsCount()); + res_columns[i++]->insert(file_segment->getRefCount()); + res_columns[i++]->insert(file_segment->getDownloadedSize(false)); + res_columns[i++]->insert(toString(file_segment->getKind())); + res_columns[i++]->insert(file_segment->isUnbound()); } } } From 9240a82c6481e3d1caf7d5c28e1e549d143a55e3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 21 Jun 2023 21:49:00 +0300 Subject: [PATCH 0621/1997] Update DatabaseReplicated.cpp --- src/Databases/DatabaseReplicated.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 7ab450c24f2..f219c73e19d 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -710,8 +710,9 @@ BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, Contex static UUID getTableUUIDIfReplicated(const String & metadata, ContextPtr context) { - bool looks_like_replicated = metadata.find("ReplicatedMergeTree") != std::string::npos; - if (!looks_like_replicated) + bool looks_like_replicated = metadata.find("Replicated") != std::string::npos; + bool looks_like_merge_tree = metadata.find("MergeTree") != std::string::npos; + if (!looks_like_replicated || !looks_like_merge_tree) return UUIDHelpers::Nil; ParserCreateQuery parser; From 50ee424148913665d3209102823b3bfc1f69c800 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 21 Jun 2023 21:07:59 +0200 Subject: [PATCH 0622/1997] do not update digest during recovery --- src/Databases/DatabaseReplicated.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index f219c73e19d..e468e533818 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -1181,7 +1181,7 @@ void DatabaseReplicated::dropTable(ContextPtr local_context, const String & tabl std::lock_guard lock{metadata_mutex}; UInt64 new_digest = tables_metadata_digest; new_digest -= getMetadataHash(table_name); - if (txn && !txn->isCreateOrReplaceQuery()) + if (txn && !txn->isCreateOrReplaceQuery() && !is_recovering) txn->addOp(zkutil::makeSetRequest(replica_path + "/digest", toString(new_digest), -1)); DatabaseAtomic::dropTableImpl(local_context, table_name, sync); @@ -1235,7 +1235,7 @@ void DatabaseReplicated::renameTable(ContextPtr local_context, const String & ta new_digest -= DB::getMetadataHash(to_table_name, statement_to); new_digest += DB::getMetadataHash(table_name, statement_to); } - if (txn) + if (txn && !is_recovering) txn->addOp(zkutil::makeSetRequest(replica_path + "/digest", toString(new_digest), -1)); DatabaseAtomic::renameTable(local_context, table_name, to_database, to_table_name, exchange, dictionary); @@ -1261,7 +1261,7 @@ void DatabaseReplicated::commitCreateTable(const ASTCreateQuery & query, const S std::lock_guard lock{metadata_mutex}; UInt64 new_digest = tables_metadata_digest; new_digest += DB::getMetadataHash(query.getTable(), statement); - if (txn && !txn->isCreateOrReplaceQuery()) + if (txn && !txn->isCreateOrReplaceQuery() && !is_recovering) txn->addOp(zkutil::makeSetRequest(replica_path + "/digest", toString(new_digest), -1)); DatabaseAtomic::commitCreateTable(query, table, table_metadata_tmp_path, table_metadata_path, query_context); @@ -1285,7 +1285,7 @@ void DatabaseReplicated::commitAlterTable(const StorageID & table_id, UInt64 new_digest = tables_metadata_digest; new_digest -= getMetadataHash(table_id.table_name); new_digest += DB::getMetadataHash(table_id.table_name, statement); - if (txn) + if (txn && !is_recovering) txn->addOp(zkutil::makeSetRequest(replica_path + "/digest", toString(new_digest), -1)); DatabaseAtomic::commitAlterTable(table_id, table_metadata_tmp_path, table_metadata_path, statement, query_context); @@ -1308,7 +1308,7 @@ void DatabaseReplicated::detachTablePermanently(ContextPtr local_context, const std::lock_guard lock{metadata_mutex}; UInt64 new_digest = tables_metadata_digest; new_digest -= getMetadataHash(table_name); - if (txn) + if (txn && !is_recovering) txn->addOp(zkutil::makeSetRequest(replica_path + "/digest", toString(new_digest), -1)); DatabaseAtomic::detachTablePermanently(local_context, table_name); @@ -1332,7 +1332,7 @@ void DatabaseReplicated::removeDetachedPermanentlyFlag(ContextPtr local_context, if (attach) { new_digest += getMetadataHash(table_name); - if (txn) + if (txn && !is_recovering) txn->addOp(zkutil::makeSetRequest(replica_path + "/digest", toString(new_digest), -1)); } From 5229544b559366a9a10df0ecb17485a54eee51fb Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 21 Jun 2023 22:17:39 +0200 Subject: [PATCH 0623/1997] Added function getCurrrentBatchAndScheduleNext to IObjectStorageIteratorAsync --- src/Disks/ObjectStorages/ObjectStorageIterator.h | 6 ++++++ .../ObjectStorageIteratorAsync.cpp | 16 ++++++++++++++++ .../ObjectStorages/ObjectStorageIteratorAsync.h | 3 ++- src/Storages/StorageAzureBlob.cpp | 11 +++++++---- 4 files changed, 31 insertions(+), 5 deletions(-) diff --git a/src/Disks/ObjectStorages/ObjectStorageIterator.h b/src/Disks/ObjectStorages/ObjectStorageIterator.h index 2ff5ce60acc..841b0ea6664 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIterator.h +++ b/src/Disks/ObjectStorages/ObjectStorageIterator.h @@ -14,6 +14,7 @@ public: virtual bool isValid() = 0; virtual RelativePathWithMetadata current() = 0; virtual RelativePathsWithMetadata currentBatch() = 0; + virtual std::optional getCurrrentBatchAndScheduleNext() = 0; virtual size_t getAccumulatedSize() const = 0; virtual ~IObjectStorageIterator() = default; @@ -53,6 +54,11 @@ public: return batch; } + virtual std::optional getCurrrentBatchAndScheduleNext() override + { + return std::nullopt; + } + size_t getAccumulatedSize() const override { return batch.size(); diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp index f91c19f2fb9..7425f629a5a 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp @@ -100,6 +100,22 @@ RelativePathsWithMetadata IObjectStorageIteratorAsync::currentBatch() return current_batch; } +std::optional IObjectStorageIteratorAsync::getCurrrentBatchAndScheduleNext() +{ + std::lock_guard lock(mutex); + if (!is_initialized) + nextBatch(); + + if (current_batch_iterator != current_batch.end()) + { + auto temp_current_batch = current_batch; + nextBatch(); + return temp_current_batch; + } + + return std::nullopt; +} + size_t IObjectStorageIteratorAsync::getAccumulatedSize() const { return accumulated_size.load(std::memory_order_relaxed); diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h index a2b06da9a91..b0dd3cef39c 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h +++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h @@ -27,6 +27,7 @@ public: RelativePathWithMetadata current() override; RelativePathsWithMetadata currentBatch() override; size_t getAccumulatedSize() const override; + std::optional getCurrrentBatchAndScheduleNext() override; ~IObjectStorageIteratorAsync() override { @@ -48,7 +49,7 @@ protected: bool is_initialized{false}; bool is_finished{false}; - mutable std::mutex mutex; + mutable std::recursive_mutex mutex; ThreadPool list_objects_pool; ThreadPoolCallbackRunner list_objects_scheduler; std::future outcome_future; diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index b9d59f04001..91dc92f09e8 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -882,7 +882,6 @@ StorageAzureBlobSource::Iterator::Iterator( RelativePathWithMetadata StorageAzureBlobSource::Iterator::next() { - std::lock_guard lock(next_mutex); if (is_finished) return {}; @@ -901,6 +900,7 @@ RelativePathWithMetadata StorageAzureBlobSource::Iterator::next() { bool need_new_batch = false; { + std::lock_guard lock(next_mutex); need_new_batch = !blobs_with_metadata || index >= blobs_with_metadata->size(); } @@ -909,10 +909,10 @@ RelativePathWithMetadata StorageAzureBlobSource::Iterator::next() RelativePathsWithMetadata new_batch; while (new_batch.empty()) { - if (object_storage_iterator->isValid()) + auto result = object_storage_iterator->getCurrrentBatchAndScheduleNext(); + if (result.has_value()) { - new_batch = object_storage_iterator->currentBatch(); - object_storage_iterator->nextBatch(); + new_batch = result.value(); } else { @@ -945,6 +945,7 @@ RelativePathWithMetadata StorageAzureBlobSource::Iterator::next() VirtualColumnUtils::filterBlockWithQuery(query, block, getContext(), filter_ast); const auto & idxs = typeid_cast(*block.getByName("_idx").column); + std::lock_guard lock(next_mutex); blob_path_with_globs.reset(); blob_path_with_globs.emplace(); for (UInt64 idx : idxs.getData()) @@ -960,6 +961,7 @@ RelativePathWithMetadata StorageAzureBlobSource::Iterator::next() if (outer_blobs) outer_blobs->insert(outer_blobs->end(), new_batch.begin(), new_batch.end()); + std::lock_guard lock(next_mutex); blobs_with_metadata = std::move(new_batch); for (const auto & [_, info] : *blobs_with_metadata) total_size.fetch_add(info.size_bytes, std::memory_order_relaxed); @@ -967,6 +969,7 @@ RelativePathWithMetadata StorageAzureBlobSource::Iterator::next() } size_t current_index = index.fetch_add(1, std::memory_order_relaxed); + std::lock_guard lock(next_mutex); return (*blobs_with_metadata)[current_index]; } } From 23120c44679902163832c27d2a9787a502e45d50 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 21 Jun 2023 22:32:33 +0200 Subject: [PATCH 0624/1997] fix build --- src/IO/LZMADeflatingWriteBuffer.h | 2 ++ src/IO/Lz4DeflatingWriteBuffer.h | 2 ++ src/IO/ZlibDeflatingWriteBuffer.h | 2 ++ src/IO/ZstdDeflatingWriteBuffer.h | 2 ++ 4 files changed, 8 insertions(+) diff --git a/src/IO/LZMADeflatingWriteBuffer.h b/src/IO/LZMADeflatingWriteBuffer.h index 5a0864d6071..2e135455e00 100644 --- a/src/IO/LZMADeflatingWriteBuffer.h +++ b/src/IO/LZMADeflatingWriteBuffer.h @@ -21,6 +21,8 @@ public: char * existing_memory = nullptr, size_t alignment = 0); + ~LZMADeflatingWriteBuffer() override; + private: void nextImpl() override; diff --git a/src/IO/Lz4DeflatingWriteBuffer.h b/src/IO/Lz4DeflatingWriteBuffer.h index a6440f8854f..68873b5f8ee 100644 --- a/src/IO/Lz4DeflatingWriteBuffer.h +++ b/src/IO/Lz4DeflatingWriteBuffer.h @@ -21,6 +21,8 @@ public: char * existing_memory = nullptr, size_t alignment = 0); + ~Lz4DeflatingWriteBuffer() override; + private: void nextImpl() override; diff --git a/src/IO/ZlibDeflatingWriteBuffer.h b/src/IO/ZlibDeflatingWriteBuffer.h index 05d6e528a23..58e709b54e6 100644 --- a/src/IO/ZlibDeflatingWriteBuffer.h +++ b/src/IO/ZlibDeflatingWriteBuffer.h @@ -24,6 +24,8 @@ public: char * existing_memory = nullptr, size_t alignment = 0); + ~ZlibDeflatingWriteBuffer() override; + private: void nextImpl() override; diff --git a/src/IO/ZstdDeflatingWriteBuffer.h b/src/IO/ZstdDeflatingWriteBuffer.h index 11e34e2890f..ba83c18d354 100644 --- a/src/IO/ZstdDeflatingWriteBuffer.h +++ b/src/IO/ZstdDeflatingWriteBuffer.h @@ -21,6 +21,8 @@ public: char * existing_memory = nullptr, size_t alignment = 0); + ~ZstdDeflatingWriteBuffer() override; + void sync() override { out->sync(); From 0af028ab3f19dc1dc3501d5e5b2dd0e9aa540fdc Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 21 Jun 2023 20:55:27 +0000 Subject: [PATCH 0625/1997] Refactor a bit more. --- src/Interpreters/ActionsVisitor.cpp | 12 +- src/Interpreters/GlobalSubqueriesVisitor.h | 9 +- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Interpreters/PreparedSets.cpp | 137 +++++++++++------ src/Interpreters/PreparedSets.h | 145 +++++++++--------- src/Planner/CollectSets.cpp | 28 +--- src/Planner/Planner.cpp | 8 +- src/Processors/QueryPlan/CreatingSetsStep.cpp | 26 ++-- src/Processors/QueryPlan/CreatingSetsStep.h | 16 +- .../Transforms/CreatingSetsTransform.cpp | 60 +++----- .../Transforms/CreatingSetsTransform.h | 8 +- src/QueryPipeline/QueryPipelineBuilder.cpp | 8 +- src/QueryPipeline/QueryPipelineBuilder.h | 8 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 4 +- 14 files changed, 232 insertions(+), 239 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 9125b38b48c..7c3af4ce691 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1406,7 +1406,7 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool if (data.getContext()->getSettingsRef().allow_experimental_analyzer && !identifier) { InterpreterSelectQueryAnalyzer interpreter(right_in_operand, data.getContext(), SelectQueryOptions().analyze(true).subquery()); - auto query_tree = interpreter.getQueryTree(); + const auto & query_tree = interpreter.getQueryTree(); if (auto * query_node = query_tree->as()) query_node->setIsSubquery(true); set_key = query_tree->getTreeHash(); @@ -1449,11 +1449,7 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool } } - /// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery. - // String set_id = right_in_operand->getColumnName(); - //bool transform_null_in = data.getContext()->getSettingsRef().transform_null_in; - SubqueryForSet subquery_for_set; // = data.prepared_sets->createOrGetSubquery(set_id, set_key, data.set_size_limit, transform_null_in); - subquery_for_set.key = PreparedSets::toString(set_key, {}); //right_in_operand->getColumnName(); + std::unique_ptr source = std::make_unique(); /** The following happens for GLOBAL INs or INs: * - in the addExternalStorage function, the IN (SELECT ...) subquery is replaced with IN _data1, @@ -1465,10 +1461,10 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool */ { auto interpreter = interpretSubquery(right_in_operand, data.getContext(), data.subquery_depth, {}); - subquery_for_set.createSource(*interpreter); + interpreter->buildQueryPlan(*source); } - return data.prepared_sets->addFromSubquery(set_key, std::move(subquery_for_set), data.getContext()->getSettingsRef(), std::move(external_table_set)); + return data.prepared_sets->addFromSubquery(set_key, std::move(source), nullptr, std::move(external_table_set), data.getContext()->getSettingsRef()); } else { diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 5b8f0c60d35..8c784d3c2ff 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -188,20 +188,17 @@ public: { // auto & subquery_for_set = prepared_sets->getSubquery(external_table_name); // subquery_for_set.createSource(*interpreter, external_storage); - auto key = subquery_or_table_name->getColumnName(); auto set_key = database_and_table_name->getTreeHash(); // std::cerr << "====== Adding key " << set_key.toString() << std::endl; if (!prepared_sets->findSubquery(set_key)) { - SubqueryForSet subquery_for_set; - subquery_for_set.key = std::move(key); - subquery_for_set.table = std::move(external_storage); - subquery_for_set.createSource(*interpreter); + std::unique_ptr source = std::make_unique(); + interpreter->buildQueryPlan(*source); //std::cerr << reinterpret_cast(prepared_sets.get()) << std::endl; - auto future_set = prepared_sets->addFromSubquery(set_key, std::move(subquery_for_set), getContext()->getSettingsRef(), nullptr); + auto future_set = prepared_sets->addFromSubquery(set_key, std::move(source), std::move(external_storage), nullptr, getContext()->getSettingsRef()); // std::cerr << "... Future set " << reinterpret_cast(external_storage_holder.get()) << " " << reinterpret_cast(future_set.get()) << std::endl; external_storage_holder->future_set = std::move(future_set); } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 52238ad5932..01736731f2e 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -3151,7 +3151,7 @@ void InterpreterSelectQuery::executeExtremes(QueryPlan & query_plan) void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPlan & query_plan) { - auto subqueries = prepared_sets->detachSubqueries(); + auto subqueries = prepared_sets->getSubqueries(); if (!subqueries.empty()) { diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index 03c18730cc5..2aa3e02c713 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -200,17 +200,42 @@ FutureSetPtr PreparedSets::addFromStorage(const Hash & key, SetPtr set_) return from_storage; } -FutureSetPtr PreparedSets::addFromSubquery(const Hash & key, SubqueryForSet subquery, const Settings & settings, FutureSetPtr external_table_set) +FutureSetPtr PreparedSets::addFromSubquery( + const Hash & key, + std::unique_ptr source, + StoragePtr external_table, + FutureSetPtr external_table_set, + const Settings & settings) { - auto from_subquery = std::make_shared(std::move(subquery), std::move(external_table_set), settings); + auto from_subquery = std::make_shared( + toString(key, {}), + std::move(source), + std::move(external_table), + std::move(external_table_set), + settings); + auto [it, inserted] = sets_from_subqueries.emplace(key, from_subquery); if (!inserted) throw Exception(ErrorCodes::LOGICAL_ERROR, "Duplicate set: {}", toString(key, {})); - // std::cerr << key.toString() << std::endl; - // std::cerr << "========= PreparedSets::addFromSubquery\n"; - // std::cerr << StackTrace().toString() << std::endl; + return from_subquery; +} + +FutureSetPtr PreparedSets::addFromSubquery( + const Hash & key, + QueryTreeNodePtr query_tree, + const Settings & settings) +{ + auto from_subquery = std::make_shared( + toString(key, {}), + std::move(query_tree), + settings); + + auto [it, inserted] = sets_from_subqueries.emplace(key, from_subquery); + + if (!inserted) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Duplicate set: {}", toString(key, {})); return from_subquery; } @@ -269,7 +294,7 @@ std::shared_ptr PreparedSets::findStorage(const Hash & key // return res; // } -std::vector> PreparedSets::detachSubqueries() +std::vector> PreparedSets::getSubqueries() { std::vector> res; res.reserve(sets_from_subqueries.size()); @@ -279,25 +304,25 @@ std::vector> PreparedSets::detachSubqueri return res; } -void SubqueryForSet::createSource(InterpreterSelectWithUnionQuery & interpreter, StoragePtr table_) -{ - source = std::make_unique(); - interpreter.buildQueryPlan(*source); - if (table_) - table = table_; -} +// void SubqueryForSet::createSource(InterpreterSelectWithUnionQuery & interpreter, StoragePtr table_) +// { +// source = std::make_unique(); +// interpreter.buildQueryPlan(*source); +// if (table_) +// table = table_; +// } -bool SubqueryForSet::hasSource() const -{ - return source != nullptr; -} +// bool SubqueryForSet::hasSource() const +// { +// return source != nullptr; +// } -QueryPlanPtr SubqueryForSet::detachSource() -{ - auto res = std::move(source); - source = nullptr; - return res; -} +// QueryPlanPtr SubqueryForSet::detachSource() +// { +// auto res = std::move(source); +// source = nullptr; +// return res; +// } std::variant, SharedSet> PreparedSetsCache::findOrPromiseToBuild(const String & key) @@ -339,15 +364,15 @@ SetPtr FutureSetFromSubquery::buildOrderedSetInplace(const ContextPtr & context) { auto set = external_table_set->buildOrderedSetInplace(context); if (set) - return subquery.set = set; + return set_and_key->set = set; } auto plan = buildPlan(context); if (!plan) return nullptr; - subquery.set->fillSetElements(); - subquery.set->initSetElements(); + set_and_key->set->fillSetElements(); + set_and_key->set->initSetElements(); auto builder = plan->buildQueryPipeline(QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder)); pipeline.complete(std::make_shared(Block())); @@ -355,15 +380,15 @@ SetPtr FutureSetFromSubquery::buildOrderedSetInplace(const ContextPtr & context) CompletedPipelineExecutor executor(pipeline); executor.execute(); - subquery.set->checkIsCreated(); + set_and_key->set->checkIsCreated(); - return subquery.set; + return set_and_key->set; } SetPtr FutureSetFromSubquery::get() const { - if (subquery.set != nullptr && subquery.set->isCreated()) - return subquery.set; + if (set_and_key->set != nullptr && set_and_key->set->isCreated()) + return set_and_key->set; return nullptr; } @@ -380,22 +405,20 @@ static SizeLimits getSizeLimitsForSet(const Settings & settings) std::unique_ptr FutureSetFromSubquery::buildPlan(const ContextPtr & context) { - if (subquery.set->isCreated()) + if (set_and_key->set->isCreated()) return nullptr; const auto & settings = context->getSettingsRef(); - auto plan = subquery.detachSource(); - auto description = subquery.key; + auto plan = std::move(source); if (!plan) return nullptr; auto creating_set = std::make_unique( plan->getCurrentDataStream(), - description, - subquery, - shared_from_this(), + set_and_key, + external_table, SizeLimits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode), context); creating_set->setStepDescription("Create set for subquery"); @@ -421,25 +444,49 @@ FutureSetFromTuple::FutureSetFromTuple(Block block, const Settings & settings) set->finishInsert(); } -FutureSetFromSubquery::FutureSetFromSubquery(SubqueryForSet subquery_, FutureSetPtr external_table_set_, const Settings & settings) - : subquery(std::move(subquery_)), external_table_set(std::move(external_table_set_)) +FutureSetFromSubquery::FutureSetFromSubquery( + String key, + std::unique_ptr source_, + StoragePtr external_table_, + FutureSetPtr external_table_set_, + const Settings & settings) + : external_table(std::move(external_table_)) + , external_table_set(std::move(external_table_set_)) + , source(std::move(source_)) { + set_and_key = std::make_shared(); + set_and_key->key = std::move(key); + bool create_ordered_set = false; auto size_limits = getSizeLimitsForSet(settings); - subquery.set = std::make_shared(size_limits, create_ordered_set, settings.use_index_for_in_with_subqueries_max_values, settings.transform_null_in); - if (subquery.source) - subquery.set->setHeader(subquery.source->getCurrentDataStream().header.getColumnsWithTypeAndName()); + set_and_key->set = std::make_shared(size_limits, create_ordered_set, settings.use_index_for_in_with_subqueries_max_values, settings.transform_null_in); + set_and_key->set->setHeader(source->getCurrentDataStream().header.getColumnsWithTypeAndName()); } -void FutureSetFromSubquery::setQueryPlan(std::unique_ptr source) +FutureSetFromSubquery::FutureSetFromSubquery( + String key, + QueryTreeNodePtr query_tree_, + //FutureSetPtr external_table_set_, + const Settings & settings) + : query_tree(std::move(query_tree_)) { - subquery.source = std::move(source); - subquery.set->setHeader(subquery.source->getCurrentDataStream().header.getColumnsWithTypeAndName()); + set_and_key = std::make_shared(); + set_and_key->key = std::move(key); + + bool create_ordered_set = false; + auto size_limits = getSizeLimitsForSet(settings); + set_and_key->set = std::make_shared(size_limits, create_ordered_set, settings.use_index_for_in_with_subqueries_max_values, settings.transform_null_in); +} + +void FutureSetFromSubquery::setQueryPlan(std::unique_ptr source_) +{ + source = std::move(source_); + set_and_key->set->setHeader(source->getCurrentDataStream().header.getColumnsWithTypeAndName()); } const DataTypes & FutureSetFromSubquery::getTypes() const { - return subquery.set->getElementsTypes(); + return set_and_key->set->getElementsTypes(); } SetPtr FutureSetFromTuple::buildOrderedSetInplace(const ContextPtr & context) diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index 49545ad1e2a..339f4a6b435 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -24,6 +24,16 @@ using QueryTreeNodePtr = std::shared_ptr; struct Settings; +/// This is a structure for prepared sets cache. +/// SetPtr can be taken from cache, so we should pass holder for it. +struct SetAndKey +{ + String key; + SetPtr set; +}; + +using SetAndKeyPtr = std::shared_ptr; + /// Represents a set in a query that might be referenced at analysis time and built later during execution. /// Also it can represent a constant set that is ready to use. /// At analysis stage the FutureSets are created but not necessarily filled. Then for non-constant sets there @@ -35,22 +45,20 @@ public: virtual ~FutureSet() = default; virtual SetPtr get() const = 0; - virtual SetPtr buildOrderedSetInplace(const ContextPtr & context) = 0; - virtual const DataTypes & getTypes() const = 0; + virtual SetPtr buildOrderedSetInplace(const ContextPtr & context) = 0; }; using FutureSetPtr = std::shared_ptr; - class FutureSetFromStorage : public FutureSet { public: FutureSetFromStorage(SetPtr set_); SetPtr get() const override; - SetPtr buildOrderedSetInplace(const ContextPtr &) override; const DataTypes & getTypes() const override; + SetPtr buildOrderedSetInplace(const ContextPtr &) override; private: SetPtr set; @@ -58,80 +66,67 @@ private: /// Information on how to build set for the [GLOBAL] IN section. -class SubqueryForSet +// class SubqueryForSet +// { +// public: + +// void createSource(InterpreterSelectWithUnionQuery & interpreter, StoragePtr table_ = nullptr); + +// bool hasSource() const; + +// /// Returns query plan for the set's source +// /// and removes it from SubqueryForSet because we need to build it only once. +// std::unique_ptr detachSource(); + +// /// Build this set from the result of the subquery. +// String key; +// SetPtr set; + +// /// If set, put the result into the table. +// /// This is a temporary table for transferring to remote servers for distributed query processing. +// StoragePtr table; + +// /// The source is obtained using the InterpreterSelectQuery subquery. +// std::unique_ptr source; +// QueryTreeNodePtr query_tree; +// }; + +class FutureSetFromSubquery : public FutureSet { public: + FutureSetFromSubquery( + String key, + std::unique_ptr source_, + StoragePtr external_table_, + FutureSetPtr external_table_set_, + const Settings & settings); - void createSource(InterpreterSelectWithUnionQuery & interpreter, StoragePtr table_ = nullptr); - - bool hasSource() const; - - /// Returns query plan for the set's source - /// and removes it from SubqueryForSet because we need to build it only once. - std::unique_ptr detachSource(); - - /// Build this set from the result of the subquery. - String key; - SetPtr set; - - /// If set, put the result into the table. - /// This is a temporary table for transferring to remote servers for distributed query processing. - StoragePtr table; - - /// The source is obtained using the InterpreterSelectQuery subquery. - std::unique_ptr source; - QueryTreeNodePtr query_tree; -}; - -class FutureSetFromSubquery : public FutureSet, public std::enable_shared_from_this -{ -public: - FutureSetFromSubquery(SubqueryForSet subquery_, FutureSetPtr external_table_set_, const Settings & settings); + FutureSetFromSubquery( + String key, + QueryTreeNodePtr query_tree_, + //FutureSetPtr external_table_set_, + const Settings & settings); SetPtr get() const override; - + const DataTypes & getTypes() const override; SetPtr buildOrderedSetInplace(const ContextPtr & context) override; std::unique_ptr build(const ContextPtr & context); - const DataTypes & getTypes() const override; - - SubqueryForSet & getSubquery() { return subquery; } - void setQueryPlan(std::unique_ptr source); + QueryTreeNodePtr detachQueryTree() { return std::move(query_tree); } + void setQueryPlan(std::unique_ptr source_); private: - SubqueryForSet subquery; + SetAndKeyPtr set_and_key; + StoragePtr external_table; FutureSetPtr external_table_set; + std::unique_ptr source; + QueryTreeNodePtr query_tree; + std::unique_ptr buildPlan(const ContextPtr & context); }; -// struct PreparedSetKey -// { -// using Hash = std::pair; - -// /// Prepared sets for tuple literals are indexed by the hash of the tree contents and by the desired -// /// data types of set elements (two different Sets can be required for two tuples with the same contents -// /// if left hand sides of the IN operators have different types). -// static PreparedSetKey forLiteral(Hash hash, DataTypes types_); - -// /// Prepared sets for subqueries are indexed only by the AST contents because the type of the resulting -// /// set is fully determined by the subquery. -// static PreparedSetKey forSubquery(Hash hash); - -// Hash ast_hash; -// DataTypes types; /// Empty for subqueries. - -// bool operator==(const PreparedSetKey & other) const; - -// String toString() const; - -// struct Hashing -// { -// UInt64 operator()(const PreparedSetKey & key) const { return key.ast_hash.first; } -// }; -// }; - class PreparedSets { public: @@ -142,30 +137,30 @@ public: UInt64 operator()(const Hash & key) const { return key.first ^ key.second; } }; - // struct SetAndName - // { - // String name; - // std::shared_ptr set; - // }; - using SetsFromTuple = std::unordered_map>, Hashing>; using SetsFromStorage = std::unordered_map, Hashing>; using SetsFromSubqueries = std::unordered_map, Hashing>; FutureSetPtr addFromStorage(const Hash & key, SetPtr set_); FutureSetPtr addFromTuple(const Hash & key, Block block, const Settings & settings); - FutureSetPtr addFromSubquery(const Hash & key, SubqueryForSet subquery, const Settings & settings, FutureSetPtr external_table_set); + + FutureSetPtr addFromSubquery( + const Hash & key, + std::unique_ptr source, + StoragePtr external_table, + FutureSetPtr external_table_set, + const Settings & settings); + + FutureSetPtr addFromSubquery( + const Hash & key, + QueryTreeNodePtr query_tree, + const Settings & settings); FutureSetPtr findTuple(const Hash & key, const DataTypes & types) const; std::shared_ptr findStorage(const Hash & key) const; std::shared_ptr findSubquery(const Hash & key) const; - //FutureSetPtr getFuture(const PreparedSetKey & key) const; - - /// Get subqueries and clear them. - /// We need to build a plan for subqueries just once. That's why we can clear them after accessing them. - /// SetPtr would still be available for consumers of PreparedSets. - std::vector> detachSubqueries(); + std::vector> getSubqueries(); const SetsFromTuple & getSetsFromTuple() const { return sets_from_tuple; } const SetsFromStorage & getSetsFromStorage() const { return sets_from_storage; } diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp index b0e6b272c9f..df3558745fb 100644 --- a/src/Planner/CollectSets.cpp +++ b/src/Planner/CollectSets.cpp @@ -46,12 +46,6 @@ public: auto in_second_argument_node_type = in_second_argument->getNodeType(); const auto & settings = planner_context.getQueryContext()->getSettingsRef(); - - // String set_key = planner_context.createSetKey(in_second_argument); - - // if (planner_context.hasSet(set_key)) - // return; - auto & sets = planner_context.getPreparedSets(); /// Tables and table functions are replaced with subquery at Analysis stage, except special Set table. @@ -129,27 +123,7 @@ public: subquery_to_execute = std::move(subquery_for_table); } - // auto subquery_options = select_query_options.subquery(); - // Planner subquery_planner( - // in_second_argument, - // subquery_options, - // planner_context.getGlobalPlannerContext()); - // subquery_planner.buildQueryPlanIfNeeded(); - - // const auto & settings = planner_context.getQueryContext()->getSettingsRef(); - // SizeLimits size_limits_for_set = {settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode}; - // bool tranform_null_in = settings.transform_null_in; - // auto set = std::make_shared(size_limits_for_set, false /*fill_set_elements*/, tranform_null_in); - - SubqueryForSet subquery_for_set; - subquery_for_set.key = planner_context.createSetKey(in_second_argument); - subquery_for_set.query_tree = std::move(subquery_to_execute); - //subquery_for_set.source = std::make_unique(std::move(subquery_planner).extractQueryPlan()); - - /// TODO - sets.addFromSubquery(set_key, std::move(subquery_for_set), settings, nullptr); - - //planner_context.registerSet(set_key, PlannerSet(in_second_argument)); + sets.addFromSubquery(set_key, std::move(subquery_to_execute), settings); } else { diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index e7c10f6ef7a..d524f2bed98 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -930,7 +930,7 @@ void addBuildSubqueriesForSetsStepIfNeeded( const PlannerContextPtr & planner_context, const std::vector & result_actions_to_execute) { - auto subqueries = planner_context->getPreparedSets().detachSubqueries(); + auto subqueries = planner_context->getPreparedSets().getSubqueries(); std::unordered_set useful_sets; //PreparedSets::SubqueriesForSets subqueries_for_sets; @@ -944,10 +944,10 @@ void addBuildSubqueriesForSetsStepIfNeeded( for (auto & subquery : subqueries) { - auto & subquery_for_set = subquery->getSubquery(); + auto query_tree = subquery->detachQueryTree(); auto subquery_options = select_query_options.subquery(); Planner subquery_planner( - subquery_for_set.query_tree, + query_tree, subquery_options, planner_context->getGlobalPlannerContext()); subquery_planner.buildQueryPlanIfNeeded(); @@ -955,8 +955,6 @@ void addBuildSubqueriesForSetsStepIfNeeded( subquery->setQueryPlan(std::make_unique(std::move(subquery_planner).extractQueryPlan())); } - //addCreatingSetsStep(query_plan, std::move(subqueries_for_sets), planner_context->getQueryContext()); - if (!subqueries.empty()) { auto step = std::make_unique( diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp index 54d81dd7650..06099eef872 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.cpp +++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp @@ -36,15 +36,13 @@ static ITransformingStep::Traits getTraits() CreatingSetStep::CreatingSetStep( const DataStream & input_stream_, - String description_, - SubqueryForSet & subquery_for_set_, - FutureSetPtr set_, + SetAndKeyPtr set_and_key_, + StoragePtr external_table_, SizeLimits network_transfer_limits_, ContextPtr context_) : ITransformingStep(input_stream_, Block{}, getTraits()) - , description(std::move(description_)) - , subquery_for_set(subquery_for_set_) - , set(std::move(set_)) + , set_and_key(std::move(set_and_key_)) + , external_table(std::move(external_table_)) , network_transfer_limits(std::move(network_transfer_limits_)) , context(std::move(context_)) { @@ -52,7 +50,7 @@ CreatingSetStep::CreatingSetStep( void CreatingSetStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - pipeline.addCreatingSetsTransform(getOutputStream().header, subquery_for_set, std::move(set), network_transfer_limits, context->getPreparedSetsCache()); + pipeline.addCreatingSetsTransform(getOutputStream().header, std::move(set_and_key), std::move(external_table), network_transfer_limits, context->getPreparedSetsCache()); } void CreatingSetStep::updateOutputStream() @@ -65,16 +63,16 @@ void CreatingSetStep::describeActions(FormatSettings & settings) const String prefix(settings.offset, ' '); settings.out << prefix; - if (subquery_for_set.set) + if (set_and_key->set) settings.out << "Set: "; - settings.out << description << '\n'; + settings.out << set_and_key->key << '\n'; } void CreatingSetStep::describeActions(JSONBuilder::JSONMap & map) const { - if (subquery_for_set.set) - map.add("Set", description); + if (set_and_key->set) + map.add("Set", set_and_key->key); } @@ -126,7 +124,7 @@ void CreatingSetsStep::describePipeline(FormatSettings & settings) const IQueryPlanStep::describePipeline(processors, settings); } -void addCreatingSetsStep(QueryPlan & query_plan, std::vector> sets_from_subqueries, ContextPtr context) +void addCreatingSetsStep(QueryPlan & query_plan, std::vector> sets_from_subquery, ContextPtr context) { DataStreams input_streams; input_streams.emplace_back(query_plan.getCurrentDataStream()); @@ -135,7 +133,7 @@ void addCreatingSetsStep(QueryPlan & query_plan, std::vector(std::move(query_plan))); query_plan = QueryPlan(); - for (auto & future_set : sets_from_subqueries) + for (auto & future_set : sets_from_subquery) { if (future_set->get()) continue; @@ -193,7 +191,7 @@ void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, if (!prepared_sets) return; - auto subqueries = prepared_sets->detachSubqueries(); + auto subqueries = prepared_sets->getSubqueries(); if (subqueries.empty()) return; diff --git a/src/Processors/QueryPlan/CreatingSetsStep.h b/src/Processors/QueryPlan/CreatingSetsStep.h index c41bb782e0b..7110775da79 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.h +++ b/src/Processors/QueryPlan/CreatingSetsStep.h @@ -13,12 +13,11 @@ class CreatingSetStep : public ITransformingStep { public: CreatingSetStep( - const DataStream & input_stream_, - String description_, - SubqueryForSet & subquery_for_set_, - FutureSetPtr set_, - SizeLimits network_transfer_limits_, - ContextPtr context_); + const DataStream & input_stream_, + SetAndKeyPtr set_and_key_, + StoragePtr external_table_, + SizeLimits network_transfer_limits_, + ContextPtr context_); String getName() const override { return "CreatingSet"; } @@ -30,9 +29,8 @@ public: private: void updateOutputStream() override; - String description; - SubqueryForSet & subquery_for_set; - FutureSetPtr set; + SetAndKeyPtr set_and_key; + StoragePtr external_table; SizeLimits network_transfer_limits; ContextPtr context; }; diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp index aa374446f3e..afce1355f7a 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.cpp +++ b/src/Processors/Transforms/CreatingSetsTransform.cpp @@ -25,13 +25,13 @@ CreatingSetsTransform::~CreatingSetsTransform() = default; CreatingSetsTransform::CreatingSetsTransform( Block in_header_, Block out_header_, - SubqueryForSet & subquery_for_set_, - FutureSetPtr set_, + SetAndKeyPtr set_and_key_, + StoragePtr external_table_, SizeLimits network_transfer_limits_, PreparedSetsCachePtr prepared_sets_cache_) : IAccumulatingTransform(std::move(in_header_), std::move(out_header_)) - , subquery(subquery_for_set_) - , set(std::move(set_)) + , set_and_key(std::move(set_and_key_)) + , external_table(std::move(external_table_)) , network_transfer_limits(std::move(network_transfer_limits_)) , prepared_sets_cache(std::move(prepared_sets_cache_)) { @@ -54,31 +54,30 @@ void CreatingSetsTransform::work() void CreatingSetsTransform::startSubquery() { /// Lookup the set in the cache if we don't need to build table. - if (prepared_sets_cache && !subquery.table) + if (prepared_sets_cache && !external_table) { /// Try to find the set in the cache and wait for it to be built. /// Retry if the set from cache fails to be built. while (true) { - auto from_cache = prepared_sets_cache->findOrPromiseToBuild(subquery.key); + auto from_cache = prepared_sets_cache->findOrPromiseToBuild(set_and_key->key); if (from_cache.index() == 0) { - LOG_TRACE(log, "Building set, key: {}", subquery.key); + LOG_TRACE(log, "Building set, key: {}", set_and_key->key); promise_to_build = std::move(std::get<0>(from_cache)); } else { - LOG_TRACE(log, "Waiting for set to be build by another thread, key: {}", subquery.key); + LOG_TRACE(log, "Waiting for set to be build by another thread, key: {}", set_and_key->key); SharedSet set_built_by_another_thread = std::move(std::get<1>(from_cache)); const SetPtr & ready_set = set_built_by_another_thread.get(); if (!ready_set) { - LOG_TRACE(log, "Failed to use set from cache, key: {}", subquery.key); + LOG_TRACE(log, "Failed to use set from cache, key: {}", set_and_key->key); continue; } - //subquery.promise_to_fill_set.set_value(ready_set); - subquery.set = ready_set; //.reset(); + set_and_key->set = ready_set; done_with_set = true; set_from_cache = true; } @@ -86,21 +85,19 @@ void CreatingSetsTransform::startSubquery() } } - if (subquery.set && !set_from_cache) - LOG_TRACE(log, "Creating set, key: {}", subquery.key); - if (subquery.table) + if (set_and_key->set && !set_from_cache) + LOG_TRACE(log, "Creating set, key: {}", set_and_key->key); + if (external_table) LOG_TRACE(log, "Filling temporary table."); - // std::cerr << StackTrace().toString() << std::endl; - - if (subquery.table) + if (external_table) /// TODO: make via port - table_out = QueryPipeline(subquery.table->write({}, subquery.table->getInMemoryMetadataPtr(), nullptr, /*async_insert=*/false)); + table_out = QueryPipeline(external_table->write({}, external_table->getInMemoryMetadataPtr(), nullptr, /*async_insert=*/false)); - done_with_set = !subquery.set || set_from_cache; - done_with_table = !subquery.table; + done_with_set = !set_and_key->set || set_from_cache; + done_with_table = !external_table; - if ((done_with_set && !set_from_cache) /*&& done_with_join*/ && done_with_table) + if ((done_with_set && !set_from_cache) && done_with_table) throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: nothing to do with subquery"); if (table_out.initialized()) @@ -120,9 +117,9 @@ void CreatingSetsTransform::finishSubquery() } else if (read_rows != 0) { - if (subquery.set) - LOG_DEBUG(log, "Created Set with {} entries from {} rows in {} sec.", subquery.set->getTotalRowCount(), read_rows, seconds); - if (subquery.table) + if (set_and_key->set) + LOG_DEBUG(log, "Created Set with {} entries from {} rows in {} sec.", set_and_key->set->getTotalRowCount(), read_rows, seconds); + if (external_table) LOG_DEBUG(log, "Created Table with {} rows in {} sec.", read_rows, seconds); } else @@ -135,12 +132,6 @@ void CreatingSetsTransform::init() { is_initialized = true; - // if (subquery.set) - // { - // //std::cerr << "=========== " << getInputPort().getHeader().dumpStructure() << std::endl; - // subquery.set->setHeader(getInputPort().getHeader().getColumnsWithTypeAndName()); - // } - watch.restart(); startSubquery(); } @@ -152,7 +143,7 @@ void CreatingSetsTransform::consume(Chunk chunk) if (!done_with_set) { - if (!subquery.set->insertFromBlock(block.getColumnsWithTypeAndName())) + if (!set_and_key->set->insertFromBlock(block.getColumnsWithTypeAndName())) done_with_set = true; } @@ -175,12 +166,11 @@ void CreatingSetsTransform::consume(Chunk chunk) Chunk CreatingSetsTransform::generate() { - if (subquery.set && !set_from_cache) + if (set_and_key->set && !set_from_cache) { - subquery.set->finishInsert(); - //subquery.promise_to_fill_set.set_value(subquery.set); + set_and_key->set->finishInsert(); if (promise_to_build) - promise_to_build->set_value(subquery.set); + promise_to_build->set_value(set_and_key->set); } if (table_out.initialized()) diff --git a/src/Processors/Transforms/CreatingSetsTransform.h b/src/Processors/Transforms/CreatingSetsTransform.h index 27c330bdbc3..d1ec7dcbca7 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.h +++ b/src/Processors/Transforms/CreatingSetsTransform.h @@ -29,8 +29,8 @@ public: CreatingSetsTransform( Block in_header_, Block out_header_, - SubqueryForSet & subquery_for_set_, - FutureSetPtr set_, + SetAndKeyPtr set_and_key_, + StoragePtr external_table_, SizeLimits network_transfer_limits_, PreparedSetsCachePtr prepared_sets_cache_); @@ -43,8 +43,8 @@ public: Chunk generate() override; private: - SubqueryForSet & subquery; - FutureSetPtr set; + SetAndKeyPtr set_and_key; + StoragePtr external_table; std::optional> promise_to_build; QueryPipeline table_out; diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index dca185cec8b..dedf85e409c 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -571,8 +571,8 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe void QueryPipelineBuilder::addCreatingSetsTransform( const Block & res_header, - SubqueryForSet & subquery_for_set, - FutureSetPtr set, + SetAndKeyPtr set_and_key, + StoragePtr external_table, const SizeLimits & limits, PreparedSetsCachePtr prepared_sets_cache) { @@ -581,8 +581,8 @@ void QueryPipelineBuilder::addCreatingSetsTransform( auto transform = std::make_shared( getHeader(), res_header, - subquery_for_set, - std::move(set), + std::move(set_and_key), + std::move(external_table), limits, std::move(prepared_sets_cache)); diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index 134de59520d..e744e3612ce 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -33,8 +33,8 @@ class TableJoin; class QueryPipelineBuilder; using QueryPipelineBuilderPtr = std::unique_ptr; -class FutureSet; -using FutureSetPtr = std::shared_ptr; +struct SetAndKey; +using SetAndKeyPtr = std::shared_ptr; class PreparedSetsCache; using PreparedSetsCachePtr = std::shared_ptr; @@ -146,8 +146,8 @@ public: void addCreatingSetsTransform( const Block & res_header, - SubqueryForSet & subquery_for_set, - FutureSetPtr set, + SetAndKeyPtr set_and_key, + StoragePtr external_table, const SizeLimits & limits, PreparedSetsCachePtr prepared_sets_cache); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index c3cf146e88e..125fe98203f 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -978,7 +978,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd if (ranges.ranges.empty()) break; - auto & index_and_condition = skip_indexes.useful_indices[idx]; + const auto & index_and_condition = skip_indexes.useful_indices[idx]; auto & stat = useful_indices_stat[idx]; stat.total_parts.fetch_add(1, std::memory_order_relaxed); stat.total_granules.fetch_add(ranges.ranges.getNumberOfMarks(), std::memory_order_relaxed); @@ -1006,7 +1006,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd if (ranges.ranges.empty()) break; - auto & indices_and_condition = skip_indexes.merged_indices[idx]; + const auto & indices_and_condition = skip_indexes.merged_indices[idx]; auto & stat = merged_indices_stat[idx]; stat.total_parts.fetch_add(1, std::memory_order_relaxed); From ad0677147f6b99de4d5b4ed2b96bb9a5fdc2059f Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 21 Jun 2023 21:31:54 +0000 Subject: [PATCH 0626/1997] Trying slightly improve eviction path + less copying of shared pointers + less indirection in overflow check --- src/Interpreters/Cache/FileCache.cpp | 40 ++++++++++++--------- src/Interpreters/Cache/IFileCachePriority.h | 2 +- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 710d08a01cd..3c5cf555ac9 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -615,9 +615,9 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) struct EvictionCandidates { - explicit EvictionCandidates(KeyMetadataPtr key_metadata_) : key_metadata(key_metadata_) {} + explicit EvictionCandidates(const KeyMetadataPtr & key_metadata_) : key_metadata(key_metadata_) {} - void add(FileSegmentMetadataPtr candidate) + void add(const FileSegmentMetadataPtr & candidate) { candidate->removal_candidate = true; candidates.push_back(candidate); @@ -638,7 +638,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) std::unordered_map to_delete; size_t freeable_space = 0, freeable_count = 0; - auto iterate_func = [&](LockedKey & locked_key, FileSegmentMetadataPtr segment_metadata) + auto iterate_func = [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) { chassert(segment_metadata->file_segment->assertCorrectness()); @@ -658,7 +658,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) it->second.add(segment_metadata); freeable_space += segment_metadata->size(); - freeable_count += 1; + ++freeable_count; return PriorityIterationResult::CONTINUE; } @@ -683,7 +683,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) if (is_query_priority_overflow()) { query_priority->iterate( - [&](LockedKey & locked_key, FileSegmentMetadataPtr segment_metadata) + [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) { return is_query_priority_overflow() ? iterate_func(locked_key, segment_metadata) : PriorityIterationResult::BREAK; }, cache_lock); @@ -696,14 +696,22 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) file_segment.key(), file_segment.offset()); } - auto is_main_priority_overflow = [&] + auto is_main_priority_overflow = [main_priority_size_limit = main_priority->getSizeLimit(), + main_priority_elements_limit = main_priority->getElementsLimit(), + size, + &freeable_space, + &freeable_count, + &file_segment, + &cache_lock, + main_priority = this->main_priority.get(), + log = this->log] { - /// max_size == 0 means unlimited cache size, - /// max_element_size == 0 means unlimited number of cache elements. - const bool is_overflow = (main_priority->getSizeLimit() != 0 - && (main_priority->getSize(cache_lock) + size - freeable_space > main_priority->getSizeLimit())) - || (main_priority->getElementsLimit() != 0 - && freeable_count == 0 && main_priority->getElementsCount(cache_lock) == main_priority->getElementsLimit()); + const bool is_overflow = + /// size_limit == 0 means unlimited cache size + (main_priority_size_limit != 0 && (main_priority->getSize(cache_lock) + size - freeable_space > main_priority_size_limit)) + /// elements_limit == 0 means unlimited number of cache elements + || (main_priority_elements_limit != 0 && freeable_count == 0 + && main_priority->getElementsCount(cache_lock) == main_priority_elements_limit); LOG_TEST( log, "Overflow: {}, size: {}, ready to remove: {} ({} in number), current cache size: {}/{}, elements: {}/{}, while reserving for {}:{}", @@ -718,7 +726,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) if (is_main_priority_overflow()) { main_priority->iterate( - [&](LockedKey & locked_key, FileSegmentMetadataPtr segment_metadata) + [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) { return is_main_priority_overflow() ? iterate_func(locked_key, segment_metadata) : PriorityIterationResult::BREAK; }, cache_lock); @@ -831,7 +839,7 @@ void FileCache::removeAllReleasable() auto lock = lockCache(); - main_priority->iterate([&](LockedKey & locked_key, FileSegmentMetadataPtr segment_metadata) + main_priority->iterate([&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) { if (segment_metadata->releasable()) { @@ -1065,7 +1073,7 @@ FileSegmentsHolderPtr FileCache::dumpQueue() assertInitialized(); FileSegments file_segments; - main_priority->iterate([&](LockedKey &, FileSegmentMetadataPtr segment_metadata) + main_priority->iterate([&](LockedKey &, const FileSegmentMetadataPtr & segment_metadata) { file_segments.push_back(FileSegment::getSnapshot(segment_metadata->file_segment)); return PriorityIterationResult::CONTINUE; @@ -1105,7 +1113,7 @@ size_t FileCache::getFileSegmentsNum() const void FileCache::assertCacheCorrectness() { auto lock = lockCache(); - main_priority->iterate([&](LockedKey &, FileSegmentMetadataPtr segment_metadata) + main_priority->iterate([&](LockedKey &, const FileSegmentMetadataPtr & segment_metadata) { const auto & file_segment = *segment_metadata->file_segment; UNUSED(file_segment); diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h index 93343398783..9af869357d3 100644 --- a/src/Interpreters/Cache/IFileCachePriority.h +++ b/src/Interpreters/Cache/IFileCachePriority.h @@ -64,7 +64,7 @@ public: CONTINUE, REMOVE_AND_CONTINUE, }; - using IterateFunc = std::function; + using IterateFunc = std::function; IFileCachePriority(size_t max_size_, size_t max_elements_) : max_size(max_size_), max_elements(max_elements_) {} From e3b43c2b8862d25934b7274bf9d5f3f2a4292a55 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 21 Jun 2023 21:54:44 +0000 Subject: [PATCH 0627/1997] Move instead of copy --- src/Interpreters/Cache/FileCache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 3c5cf555ac9..17d2c332887 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -615,7 +615,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) struct EvictionCandidates { - explicit EvictionCandidates(const KeyMetadataPtr & key_metadata_) : key_metadata(key_metadata_) {} + explicit EvictionCandidates(KeyMetadataPtr key_metadata_) : key_metadata(std::move(key_metadata_)) {} void add(const FileSegmentMetadataPtr & candidate) { From a050b9e9d98fb25270e5cf41b98135fe44f6d8f2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 22 Jun 2023 01:28:52 +0300 Subject: [PATCH 0628/1997] Update FileCache.cpp --- src/Interpreters/Cache/FileCache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index c1256e43afa..f42fc1f8dfc 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -923,7 +923,7 @@ void FileCache::loadMetadata() parsed = tryParse(offset, offset_with_suffix.substr(0, delim_pos)); if (offset_with_suffix.substr(delim_pos+1) == "persistent") { - /// For compatibility. Persistent files are no loger supported. + /// For compatibility. Persistent files are no longer supported. fs::remove(offset_it->path()); continue; } From c96a3a8b15754a140d70bf4dc7acd2ccb0e657ab Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Mon, 19 Jun 2023 16:47:19 +0000 Subject: [PATCH 0629/1997] Unforget to pass callback to readBigAt() in ParallelReadBuffer --- src/IO/ParallelReadBuffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp index fd228336093..92af1ed0b04 100644 --- a/src/IO/ParallelReadBuffer.cpp +++ b/src/IO/ParallelReadBuffer.cpp @@ -256,7 +256,7 @@ void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker) return false; }; - size_t r = input.readBigAt(read_worker->segment.data(), read_worker->segment.size(), read_worker->start_offset); + size_t r = input.readBigAt(read_worker->segment.data(), read_worker->segment.size(), read_worker->start_offset, on_progress); if (!on_progress(r) && r < read_worker->segment.size()) throw Exception( From f0dcefa0e19c62e3200808065cf1617faa85aa97 Mon Sep 17 00:00:00 2001 From: santrancisco Date: Thu, 22 Jun 2023 18:00:39 +1000 Subject: [PATCH 0630/1997] Fixing git submodule back to our own fork as this pull https://github.com/apache/orc/pull/1542 is now merged into apache/orc repository --- .gitmodules | 2 +- contrib/orc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 955b5cc27bb..8bf4f6e8975 100644 --- a/.gitmodules +++ b/.gitmodules @@ -86,7 +86,7 @@ url = https://github.com/ClickHouse/fastops [submodule "contrib/orc"] path = contrib/orc - url = https://github.com/wpleonardo/orc + url = https://github.com/ClickHouse/orc [submodule "contrib/sparsehash-c11"] path = contrib/sparsehash-c11 url = https://github.com/sparsehash/sparsehash-c11 diff --git a/contrib/orc b/contrib/orc index 7d90ebb7c11..568d1d60c25 160000 --- a/contrib/orc +++ b/contrib/orc @@ -1 +1 @@ -Subproject commit 7d90ebb7c11b73041dadd276088cb2c1ee2d93d1 +Subproject commit 568d1d60c250af1890f226c182bc15bd8cc94cf1 From 0c440b9d6f98bf1630e1b23ed058450086488697 Mon Sep 17 00:00:00 2001 From: Anton Kozlov Date: Wed, 3 May 2023 12:59:19 +0000 Subject: [PATCH 0631/1997] Report loading status for executable dictionaries correctly --- .../ExecutableDictionarySource.cpp | 17 +++++-- .../ExecutablePoolDictionarySource.cpp | 1 + src/Processors/Sources/ShellCommandSource.cpp | 4 ++ src/Processors/Sources/ShellCommandSource.h | 3 ++ ...utable_missing_strict_check_dictionary.xml | 29 +++++++++++ .../test_executable_dictionary/test.py | 48 +++++++++++-------- 6 files changed, 79 insertions(+), 23 deletions(-) create mode 100644 tests/integration/test_executable_dictionary/dictionaries/executable_missing_strict_check_dictionary.xml diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp index 91f914fb7ad..e40ef07de9e 100644 --- a/src/Dictionaries/ExecutableDictionarySource.cpp +++ b/src/Dictionaries/ExecutableDictionarySource.cpp @@ -114,7 +114,10 @@ QueryPipeline ExecutableDictionarySource::loadAll() auto command = configuration.command; updateCommandIfNeeded(command, coordinator_configuration.execute_direct, context); - return QueryPipeline(coordinator->createPipe(command, configuration.command_arguments, sample_block, context)); + ShellCommandSourceConfiguration command_configuration { + .check_exit_code = true, + }; + return QueryPipeline(coordinator->createPipe(command, configuration.command_arguments, {}, sample_block, context, command_configuration)); } QueryPipeline ExecutableDictionarySource::loadUpdatedAll() @@ -148,7 +151,11 @@ QueryPipeline ExecutableDictionarySource::loadUpdatedAll() update_time = new_update_time; LOG_TRACE(log, "loadUpdatedAll {}", command); - return QueryPipeline(coordinator->createPipe(command, command_arguments, sample_block, context)); + + ShellCommandSourceConfiguration command_configuration { + .check_exit_code = true, + }; + return QueryPipeline(coordinator->createPipe(command, command_arguments, {}, sample_block, context, command_configuration)); } QueryPipeline ExecutableDictionarySource::loadIds(const std::vector & ids) @@ -179,7 +186,11 @@ QueryPipeline ExecutableDictionarySource::getStreamForBlock(const Block & block) Pipes shell_input_pipes; shell_input_pipes.emplace_back(std::move(shell_input_pipe)); - auto pipe = coordinator->createPipe(command, configuration.command_arguments, std::move(shell_input_pipes), sample_block, context); + ShellCommandSourceConfiguration command_configuration { + .check_exit_code = true, + }; + + auto pipe = coordinator->createPipe(command, configuration.command_arguments, std::move(shell_input_pipes), sample_block, context, command_configuration); if (configuration.implicit_key) pipe.addTransform(std::make_shared(block, pipe.getHeader())); diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.cpp b/src/Dictionaries/ExecutablePoolDictionarySource.cpp index 0cc7696585f..94685060a46 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp +++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp @@ -132,6 +132,7 @@ QueryPipeline ExecutablePoolDictionarySource::getStreamForBlock(const Block & bl ShellCommandSourceConfiguration command_configuration; command_configuration.read_fixed_number_of_rows = true; command_configuration.number_of_rows_to_read = block.rows(); + command_configuration.check_exit_code = true; Pipes shell_input_pipes; shell_input_pipes.emplace_back(std::move(shell_input_pipe)); diff --git a/src/Processors/Sources/ShellCommandSource.cpp b/src/Processors/Sources/ShellCommandSource.cpp index 8a7bbcf3261..ace175f251c 100644 --- a/src/Processors/Sources/ShellCommandSource.cpp +++ b/src/Processors/Sources/ShellCommandSource.cpp @@ -352,7 +352,11 @@ namespace } if (!executor->pull(chunk)) + { + if (configuration.check_exit_code) + command->wait(); return {}; + } current_read_rows += chunk.getNumRows(); } diff --git a/src/Processors/Sources/ShellCommandSource.h b/src/Processors/Sources/ShellCommandSource.h index b9afaa345cf..6dc6781cc4c 100644 --- a/src/Processors/Sources/ShellCommandSource.h +++ b/src/Processors/Sources/ShellCommandSource.h @@ -33,6 +33,9 @@ struct ShellCommandSourceConfiguration size_t number_of_rows_to_read = 0; /// Max block size size_t max_block_size = DEFAULT_BLOCK_SIZE; + /// Will throw if the command exited with + /// non-zero status code + size_t check_exit_code = false; }; class ShellCommandSourceCoordinator diff --git a/tests/integration/test_executable_dictionary/dictionaries/executable_missing_strict_check_dictionary.xml b/tests/integration/test_executable_dictionary/dictionaries/executable_missing_strict_check_dictionary.xml new file mode 100644 index 00000000000..8562851e69c --- /dev/null +++ b/tests/integration/test_executable_dictionary/dictionaries/executable_missing_strict_check_dictionary.xml @@ -0,0 +1,29 @@ + + + executable_input_missing_executable + + + + executable_not_found + CSV + + + + 86400 + 90000 + + + + + + + input + + + result + String + + + + + diff --git a/tests/integration/test_executable_dictionary/test.py b/tests/integration/test_executable_dictionary/test.py index 43e6ec0a800..22f3442bb95 100644 --- a/tests/integration/test_executable_dictionary/test.py +++ b/tests/integration/test_executable_dictionary/test.py @@ -217,33 +217,21 @@ def test_executable_implicit_input_argument_python(started_cluster): def test_executable_input_signalled_python(started_cluster): skip_test_msan(node) - assert ( - node.query( - "SELECT dictGet('executable_input_signalled_python', 'result', toUInt64(1))" - ) - == "Default result\n" + assert node.query_and_get_error( + "SELECT dictGet('executable_input_signalled_python', 'result', toUInt64(1))" ) - assert ( - node.query( - "SELECT dictGet('executable_input_signalled_pool_python', 'result', toUInt64(1))" - ) - == "Default result\n" + assert node.query_and_get_error( + "SELECT dictGet('executable_input_signalled_pool_python', 'result', toUInt64(1))" ) def test_executable_implicit_input_signalled_python(started_cluster): skip_test_msan(node) - assert ( - node.query( - "SELECT dictGet('executable_implicit_input_signalled_python', 'result', toUInt64(1))" - ) - == "Default result\n" + assert node.query_and_get_error( + "SELECT dictGet('executable_implicit_input_signalled_python', 'result', toUInt64(1))" ) - assert ( - node.query( - "SELECT dictGet('executable_implicit_input_signalled_pool_python', 'result', toUInt64(1))" - ) - == "Default result\n" + assert node.query_and_get_error( + "SELECT dictGet('executable_implicit_input_signalled_pool_python', 'result', toUInt64(1))" ) @@ -482,3 +470,23 @@ def test_executable_source_updated_python(started_cluster): ) == "Value 1 1\n" ) + + +def test_executable_source_exit_code_check(started_cluster): + skip_test_msan(node) + assert "DB::Exception" in node.query_and_get_error( + "SELECT * FROM dictionary(executable_input_missing_executable) ORDER BY input" + ) + assert "DB::Exception" in node.query_and_get_error( + "SELECT dictGet('executable_input_missing_executable', 'result', toUInt64(1))" + ) + + assert ( + node.query( + "SELECT status FROM system.dictionaries WHERE name='executable_input_missing_executable'" + ) + == "FAILED\n" + ) + assert "DB::Exception" in node.query( + "SELECT last_exception FROM system.dictionaries WHERE name='executable_input_missing_executable'" + ) From 2c3a4cb90de34569277edb3e4cf9f50fa9e5d5a2 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 22 Jun 2023 10:47:07 +0000 Subject: [PATCH 0632/1997] Style fix --- src/Processors/Formats/Impl/CSVRowInputFormat.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index a727a5bc490..59b0f25f0bf 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -156,8 +156,7 @@ void CSVFormatReader::skipFieldDelimiter() { skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter); - bool res = checkChar(format_settings.csv.delimiter, *buf); - if (!res) + if (!checkChar(format_settings.csv.delimiter, *buf)) { if (!format_settings.csv.missing_as_default) { @@ -165,9 +164,7 @@ void CSVFormatReader::skipFieldDelimiter() throwAtAssertionFailed(err, *buf); } else - { current_row_has_missing_fields = true; - } } } From a0fde6a55b3ddb9cac0b3914fc18af58f6419eac Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 22 Jun 2023 10:50:14 +0000 Subject: [PATCH 0633/1997] Style fix --- .../Formats/Impl/CSVRowInputFormat.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 59b0f25f0bf..edbc33fb3c3 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -156,16 +156,17 @@ void CSVFormatReader::skipFieldDelimiter() { skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter); - if (!checkChar(format_settings.csv.delimiter, *buf)) + bool res = checkChar(format_settings.csv.delimiter, *buf); + if (res) + return; + + if (!format_settings.csv.missing_as_default) { - if (!format_settings.csv.missing_as_default) - { - char err[2] = {format_settings.csv.delimiter, '\0'}; - throwAtAssertionFailed(err, *buf); - } - else - current_row_has_missing_fields = true; + char err[2] = {format_settings.csv.delimiter, '\0'}; + throwAtAssertionFailed(err, *buf); } + else + current_row_has_missing_fields = true; } template From b3ce64caa5d548edbfa93d15f55062f572b60fd7 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 22 Jun 2023 12:55:05 +0200 Subject: [PATCH 0634/1997] Fix tests --- tests/config/config.d/storage_conf.xml | 13 --- .../02344_describe_cache.reference | 4 +- ...ilesystem_cache_persistent_files.reference | 79 ------------------- ...2382_filesystem_cache_persistent_files.sql | 76 ------------------ 4 files changed, 2 insertions(+), 170 deletions(-) delete mode 100644 tests/queries/0_stateless/02382_filesystem_cache_persistent_files.reference delete mode 100644 tests/queries/0_stateless/02382_filesystem_cache_persistent_files.sql diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index dee03307177..deee71bd812 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -57,7 +57,6 @@ s3_cache/ 128Mi 1 - 0 100 @@ -65,7 +64,6 @@ s3_disk_2 s3_cache_2/ 128Mi - 0 100Mi 100 @@ -77,7 +75,6 @@ 22548578304 1 1 - 0 100 @@ -87,7 +84,6 @@ 128Mi 1 1 - 0 100 @@ -95,7 +91,6 @@ s3_disk_5 s3_cache_5/ 128Mi - 0 100 @@ -103,7 +98,6 @@ s3_disk_6 s3_cache_6/ 128Mi - 0 1 100 100 @@ -113,7 +107,6 @@ s3_disk_6 s3_cache_small/ 1000 - 1 100 @@ -122,7 +115,6 @@ s3_cache_small_segment_size/ 128Mi 10Ki - 0 1 100 @@ -146,7 +138,6 @@ local_cache/ 22548578304 1 - 0 100 @@ -154,7 +145,6 @@ local_disk local_cache_2/ 22548578304 - 0 100 @@ -164,7 +154,6 @@ 22548578304 1 1 - 0 100 @@ -173,7 +162,6 @@ s3_cache_5 s3_cache_multi/ 22548578304 - 0 100 @@ -181,7 +169,6 @@ s3_cache_multi s3_cache_multi_2/ 22548578304 - 0 100 diff --git a/tests/queries/0_stateless/02344_describe_cache.reference b/tests/queries/0_stateless/02344_describe_cache.reference index d8a2ffab1fa..2e6df4a6a0b 100644 --- a/tests/queries/0_stateless/02344_describe_cache.reference +++ b/tests/queries/0_stateless/02344_describe_cache.reference @@ -1,2 +1,2 @@ -134217728 10000000 8388608 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ 0 -134217728 10000000 104857600 0 0 0 0 /var/lib/clickhouse/caches/s3_cache_2/ 0 +134217728 10000000 8388608 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ +134217728 10000000 104857600 0 0 0 0 /var/lib/clickhouse/caches/s3_cache_2/ diff --git a/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.reference b/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.reference deleted file mode 100644 index e77afc98007..00000000000 --- a/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.reference +++ /dev/null @@ -1,79 +0,0 @@ --- { echo } - -SET enable_filesystem_cache_on_write_operations=0; -SYSTEM DROP FILESYSTEM CACHE; -DROP TABLE IF EXISTS nopers; -CREATE TABLE nopers (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES nopers; -INSERT INTO nopers SELECT number, toString(number) FROM numbers(10); -SELECT * FROM nopers FORMAT Null; -SELECT sum(size) FROM system.filesystem_cache; -195 -SELECT extract(local_path, '.*/([\w.]+)') as file, extract(cache_path, '.*/([\w.]+)') as cache, size -FROM -( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths -) AS data_paths -INNER JOIN - system.filesystem_cache AS caches -ON data_paths.cache_path = caches.cache_path -ORDER BY file, cache, size; -data.bin 0 114 -data.mrk3 0 80 -format_version.txt 0 1 -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test FORMAT Null; -SELECT sum(size) FROM system.filesystem_cache; -1021 -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; -5 -SELECT count() FROM system.filesystem_cache; -5 -SELECT extract(local_path, '.*/([\w.]+)') as file, extract(cache_path, '.*/([\w.]+)') as cache, size -FROM -( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths -) AS data_paths -INNER JOIN - system.filesystem_cache AS caches -ON data_paths.cache_path = caches.cache_path -ORDER BY file, cache, size; -data.bin 0 114 -data.bin 0 746 -data.mrk3 0 80 -data.mrk3 0_persistent 80 -format_version.txt 0 1 -DROP TABLE IF EXISTS test2; -CREATE TABLE test2 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test2; -INSERT INTO test2 SELECT number, toString(number) FROM numbers(100000); -SELECT * FROM test2 FORMAT Null; -SELECT sum(size) FROM system.filesystem_cache; -795 -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; -5 -SELECT count() FROM system.filesystem_cache; -5 -SELECT extract(local_path, '.*/([\w.]+)') as file, extract(cache_path, '.*/([\w.]+)') as cache, size -FROM -( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths -) AS data_paths -INNER JOIN - system.filesystem_cache AS caches -ON data_paths.cache_path = caches.cache_path -ORDER BY file, cache, size; -data.bin 0 114 -data.mrk3 0 80 -data.mrk3 0_persistent 80 -data.mrk3 0_persistent 520 -format_version.txt 0 1 -DROP TABLE test; -DROP TABLE test2; -DROP TABLE nopers; diff --git a/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.sql b/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.sql deleted file mode 100644 index 6486840602e..00000000000 --- a/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.sql +++ /dev/null @@ -1,76 +0,0 @@ --- Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings - --- { echo } - -SET enable_filesystem_cache_on_write_operations=0; - -SYSTEM DROP FILESYSTEM CACHE; - -DROP TABLE IF EXISTS nopers; -CREATE TABLE nopers (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES nopers; - -INSERT INTO nopers SELECT number, toString(number) FROM numbers(10); -SELECT * FROM nopers FORMAT Null; -SELECT sum(size) FROM system.filesystem_cache; - -SELECT extract(local_path, '.*/([\w.]+)') as file, extract(cache_path, '.*/([\w.]+)') as cache, size -FROM -( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths -) AS data_paths -INNER JOIN - system.filesystem_cache AS caches -ON data_paths.cache_path = caches.cache_path -ORDER BY file, cache, size; - -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; - -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test FORMAT Null; - -SELECT sum(size) FROM system.filesystem_cache; - -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; -SELECT count() FROM system.filesystem_cache; - -SELECT extract(local_path, '.*/([\w.]+)') as file, extract(cache_path, '.*/([\w.]+)') as cache, size -FROM -( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths -) AS data_paths -INNER JOIN - system.filesystem_cache AS caches -ON data_paths.cache_path = caches.cache_path -ORDER BY file, cache, size; - -DROP TABLE IF EXISTS test2; -CREATE TABLE test2 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test2; - -INSERT INTO test2 SELECT number, toString(number) FROM numbers(100000); -SELECT * FROM test2 FORMAT Null; - -SELECT sum(size) FROM system.filesystem_cache; - -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; -SELECT count() FROM system.filesystem_cache; - -SELECT extract(local_path, '.*/([\w.]+)') as file, extract(cache_path, '.*/([\w.]+)') as cache, size -FROM -( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths -) AS data_paths -INNER JOIN - system.filesystem_cache AS caches -ON data_paths.cache_path = caches.cache_path -ORDER BY file, cache, size; - -DROP TABLE test; -DROP TABLE test2; -DROP TABLE nopers; From 95349a405b46c00fb6024a2e90defe0ee28b0b89 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 22 Jun 2023 11:09:26 +0200 Subject: [PATCH 0635/1997] release buffers with exception context --- src/Daemon/BaseDaemon.cpp | 4 ++++ .../Transforms/ExceptionKeepingTransform.cpp | 10 +++++----- .../Transforms/ExceptionKeepingTransform.h | 2 +- src/Storages/HDFS/StorageHDFS.cpp | 2 +- src/Storages/MessageQueueSink.h | 2 +- src/Storages/PartitionedSink.cpp | 4 ++-- src/Storages/PartitionedSink.h | 2 +- src/Storages/StorageAzureBlob.cpp | 2 +- src/Storages/StorageFile.cpp | 2 +- src/Storages/StorageS3.cpp | 18 +++++++++++------- src/Storages/StorageURL.cpp | 2 +- src/Storages/StorageURL.h | 2 +- 12 files changed, 30 insertions(+), 22 deletions(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index bfd5568b71d..732d5b1d2f4 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -417,6 +417,8 @@ private: { SentryWriter::onFault(sig, error_message, stack_trace); +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunreachable-code" /// Advice the user to send it manually. if constexpr (std::string_view(VERSION_OFFICIAL).contains("official build")) { @@ -436,6 +438,8 @@ private: { LOG_FATAL(log, "This ClickHouse version is not official and should be upgraded to the official build."); } +#pragma clang diagnostic pop + } /// ClickHouse Keeper does not link to some part of Settings. diff --git a/src/Processors/Transforms/ExceptionKeepingTransform.cpp b/src/Processors/Transforms/ExceptionKeepingTransform.cpp index 266407f21a5..3c40c078225 100644 --- a/src/Processors/Transforms/ExceptionKeepingTransform.cpp +++ b/src/Processors/Transforms/ExceptionKeepingTransform.cpp @@ -76,7 +76,7 @@ IProcessor::Status ExceptionKeepingTransform::prepare() if (data.exception) { stage = Stage::Exception; - onException(); + onException(data.exception); output.pushData(std::move(data)); return Status::PortFull; } @@ -139,7 +139,7 @@ void ExceptionKeepingTransform::work() stage = Stage::Exception; ready_output = true; data.exception = exception; - onException(); + onException(data.exception); } } else if (stage == Stage::Consume || stage == Stage::Generate) @@ -153,7 +153,7 @@ void ExceptionKeepingTransform::work() stage = Stage::Exception; ready_output = true; data.exception = exception; - onException(); + onException(data.exception); } else stage = Stage::Generate; @@ -167,7 +167,7 @@ void ExceptionKeepingTransform::work() stage = Stage::Exception; ready_output = true; data.exception = exception; - onException(); + onException(data.exception); } else { @@ -189,7 +189,7 @@ void ExceptionKeepingTransform::work() stage = Stage::Exception; ready_output = true; data.exception = exception; - onException(); + onException(data.exception); } } } diff --git a/src/Processors/Transforms/ExceptionKeepingTransform.h b/src/Processors/Transforms/ExceptionKeepingTransform.h index e2bc161971e..cec0e0eea31 100644 --- a/src/Processors/Transforms/ExceptionKeepingTransform.h +++ b/src/Processors/Transforms/ExceptionKeepingTransform.h @@ -52,7 +52,7 @@ protected: virtual void onConsume(Chunk chunk) = 0; virtual GenerateResult onGenerate() = 0; virtual void onFinish() {} - virtual void onException() {} + virtual void onException(std::exception_ptr /* exception */) {} public: ExceptionKeepingTransform(const Block & in_header, const Block & out_header, bool ignore_on_start_and_finish_ = true); diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 827d89e26a4..7a9d996e3c2 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -512,7 +512,7 @@ public: cancelled = true; } - void onException() override + void onException(std::exception_ptr /* exception */) override { std::lock_guard lock(cancel_mutex); finalize(); diff --git a/src/Storages/MessageQueueSink.h b/src/Storages/MessageQueueSink.h index 590bee7ee4f..b3c1e61734f 100644 --- a/src/Storages/MessageQueueSink.h +++ b/src/Storages/MessageQueueSink.h @@ -40,7 +40,7 @@ public: void onStart() override; void onFinish() override; void onCancel() override { onFinish(); } - void onException() override { onFinish(); } + void onException(std::exception_ptr /* exception */) override { onFinish(); } protected: /// Do some specific initialization before consuming data. diff --git a/src/Storages/PartitionedSink.cpp b/src/Storages/PartitionedSink.cpp index 363b4557290..18442a8691f 100644 --- a/src/Storages/PartitionedSink.cpp +++ b/src/Storages/PartitionedSink.cpp @@ -111,11 +111,11 @@ void PartitionedSink::consume(Chunk chunk) } } -void PartitionedSink::onException() +void PartitionedSink::onException(std::exception_ptr exception) { for (auto & [_, sink] : partition_id_to_sink) { - sink->onException(); + sink->onException(exception); } } diff --git a/src/Storages/PartitionedSink.h b/src/Storages/PartitionedSink.h index c4bf9c0622c..68edeb6fd73 100644 --- a/src/Storages/PartitionedSink.h +++ b/src/Storages/PartitionedSink.h @@ -22,7 +22,7 @@ public: void consume(Chunk chunk) override; - void onException() override; + void onException(std::exception_ptr exception) override; void onFinish() override; diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 336c4eaed9b..65af2ed6d6e 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -489,7 +489,7 @@ public: cancelled = true; } - void onException() override + void onException(std::exception_ptr /* exception */) override { std::lock_guard lock(cancel_mutex); finalize(); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 7c04de1a28a..50aa47ffca9 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1031,7 +1031,7 @@ public: cancelled = true; } - void onException() override + void onException(std::exception_ptr /* exception */) override { std::lock_guard cancel_lock(cancel_mutex); finalize(); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 3c0dbdcd766..292ae4813dd 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -813,10 +813,18 @@ public: cancelled = true; } - void onException() override + void onException(std::exception_ptr exception) override { std::lock_guard lock(cancel_mutex); - release(); + try + { + std::rethrow_exception(exception); + } + catch (...) + { + /// An exception context is needed to proper delete write buffers without finalization + release(); + } } void onFinish() override @@ -840,17 +848,13 @@ private: catch (...) { /// Stop ParallelFormattingOutputFormat correctly. - writer.reset(); - write_buf->finalize(); + release(); throw; } } void release() { - if (!writer) - return; - writer.reset(); write_buf.reset(); } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 1d6aed204cb..651b50518d2 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -480,7 +480,7 @@ void StorageURLSink::onCancel() cancelled = true; } -void StorageURLSink::onException() +void StorageURLSink::onException(std::exception_ptr /* exception */) { std::lock_guard lock(cancel_mutex); finalize(); diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index e3305cda89e..c4a5ce1aa7b 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -236,7 +236,7 @@ public: std::string getName() const override { return "StorageURLSink"; } void consume(Chunk chunk) override; void onCancel() override; - void onException() override; + void onException(std::exception_ptr exception) override; void onFinish() override; private: From 6929a9b852a6dc25e4bbcb85ba384f416c90c89e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 22 Jun 2023 14:01:28 +0300 Subject: [PATCH 0636/1997] Update WriteBufferFromOStream.cpp --- src/IO/WriteBufferFromOStream.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/IO/WriteBufferFromOStream.cpp b/src/IO/WriteBufferFromOStream.cpp index 2d0d5976f85..5074c226dd3 100644 --- a/src/IO/WriteBufferFromOStream.cpp +++ b/src/IO/WriteBufferFromOStream.cpp @@ -18,7 +18,14 @@ void WriteBufferFromOStream::nextImpl() ostr->flush(); if (!ostr->good()) - throw Exception(ErrorCodes::CANNOT_WRITE_TO_OSTREAM, "Cannot write to ostream at offset {}", count()); + { + /// FIXME do not call finalize in dtors (and remove iostreams) + bool avoid_throwing_exceptions = std::uncaught_exceptions(); + if (avoid_throwing_exceptions) + LOG_ERROR(&Poco::Logger::get("WriteBufferFromOStream"), "Cannot write to ostream at offset {}. Stack trace: {}", count(), StackTrace().toString()); + else + throw Exception(ErrorCodes::CANNOT_WRITE_TO_OSTREAM, "Cannot write to ostream at offset {}", count()); + } } WriteBufferFromOStream::WriteBufferFromOStream( From 4fbf52f2de95983b40921d65c68df6a59d6cd570 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 22 Jun 2023 13:08:38 +0200 Subject: [PATCH 0637/1997] Less loggers --- src/Interpreters/Cache/Metadata.cpp | 16 ++++++++-------- src/Interpreters/Cache/Metadata.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 20af931bbc2..af8520fab8a 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -58,6 +58,11 @@ KeyMetadata::KeyMetadata( , key_path(key_path_) , cleanup_queue(cleanup_queue_) , created_base_directory(created_base_directory_) +#ifdef ABORT_ON_LOGICAL_ERROR + , log(&Poco::Logger::get("LockedKey(" + key.toString() + ")")) +#else + , log(&Poco::Logger::get("LockedKey")) +#endif { if (created_base_directory) chassert(fs::exists(key_path)); @@ -324,11 +329,6 @@ void CacheMetadata::doCleanup() LockedKey::LockedKey(std::shared_ptr key_metadata_) : key_metadata(key_metadata_) , lock(key_metadata->guard.lock()) -#ifdef ABORT_ON_LOGICAL_ERROR - , log(&Poco::Logger::get("LockedKey(" + key_metadata_->key.toString() + ")")) -#else - , log(&Poco::Logger::get("LockedKey")) -#endif { } @@ -338,7 +338,7 @@ LockedKey::~LockedKey() return; key_metadata->key_state = KeyMetadata::KeyState::REMOVING; - LOG_DEBUG(log, "Submitting key {} for removal", getKey()); + LOG_DEBUG(key_metadata->log, "Submitting key {} for removal", getKey()); key_metadata->cleanup_queue.add(getKey()); } @@ -396,7 +396,7 @@ KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegm auto file_segment = it->second->file_segment; LOG_DEBUG( - log, "Remove from cache. Key: {}, offset: {}, size: {}", + key_metadata->log, "Remove from cache. Key: {}, offset: {}, size: {}", getKey(), offset, file_segment->reserved_size); chassert(file_segment->assertCorrectnessUnlocked(segment_lock)); @@ -409,7 +409,7 @@ KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegm if (exists) { fs::remove(path); - LOG_TEST(log, "Removed file segment at path: {}", path); + LOG_TEST(key_metadata->log, "Removed file segment at path: {}", path); } else if (file_segment->downloaded_size) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected path {} to exist", path); diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index 6172808bcf5..c9a8e97beee 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -70,6 +70,7 @@ private: KeyGuard guard; CleanupQueue & cleanup_queue; std::atomic created_base_directory = false; + Poco::Logger * log; }; using KeyMetadataPtr = std::shared_ptr; @@ -171,7 +172,6 @@ struct LockedKey : private boost::noncopyable private: const std::shared_ptr key_metadata; KeyGuard::Lock lock; /// `lock` must be destructed before `key_metadata`. - Poco::Logger * log; }; } From 8ff8125ebda3352e802ecc46b3f74b79caad6e05 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 22 Jun 2023 13:24:50 +0200 Subject: [PATCH 0638/1997] Better --- src/Interpreters/Cache/Metadata.cpp | 7 ++++--- src/Interpreters/Cache/Metadata.h | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index af8520fab8a..b19d095692d 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -53,15 +53,16 @@ KeyMetadata::KeyMetadata( const Key & key_, const std::string & key_path_, CleanupQueue & cleanup_queue_, + [[maybe_unused]] Poco::Logger * log_, bool created_base_directory_) : key(key_) , key_path(key_path_) , cleanup_queue(cleanup_queue_) , created_base_directory(created_base_directory_) #ifdef ABORT_ON_LOGICAL_ERROR - , log(&Poco::Logger::get("LockedKey(" + key.toString() + ")")) + , log(&Poco::Logger::get("Key(" + key.toString() + ")")) #else - , log(&Poco::Logger::get("LockedKey")) + , log(log_) #endif { if (created_base_directory) @@ -195,7 +196,7 @@ LockedKeyPtr CacheMetadata::lockKeyMetadata( it = emplace( key, std::make_shared( - key, getPathForKey(key), *cleanup_queue, is_initial_load)).first; + key, getPathForKey(key), *cleanup_queue, log, is_initial_load)).first; } key_metadata = it->second; diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index c9a8e97beee..635594a289a 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -44,6 +44,7 @@ struct KeyMetadata : public std::map, const Key & key_, const std::string & key_path_, CleanupQueue & cleanup_queue_, + Poco::Logger * log_, bool created_base_directory_ = false); enum class KeyState From a1c131c0da77e16b9cf63e8a081ac6e01f08ad7f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 8 Jun 2023 10:29:51 +0000 Subject: [PATCH 0639/1997] Query Cache: Try to fix bad cast from ColumnConst to ColumnVector Resolves #49445 The query cache buffers query result chunks and eventually squashes them before insertion into the cache. Here, squashing failed because not all chunks were of the same type. Looks like chunks of the same underlying type (e.g. UInt8) in a query result be of mixed const, sparse or low-cardinality type. Fix this by always materializing the data regardless of the compression. Strange thing is that the failing query in the stress test (*) isn't able to reproduce the bug, and I haven't managed to trigger the issue otherwise, so no test case is added. (*) SELECT 1 UNION ALL SELECT 1 INTERSECT SELECT 1 E.g. here: https://s3.amazonaws.com/clickhouse-test-reports/0/18817517ed6f8849e3d979e10fbb273e0edf0eaa/stress_test__debug_/fatal_messages.txt --- src/Interpreters/Cache/QueryCache.cpp | 10 +++++----- src/Processors/Chunk.cpp | 26 ++++++++++++++++++++++++++ src/Processors/Chunk.h | 3 +++ 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index 3118f386111..364a740abe0 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -263,23 +263,23 @@ void QueryCache::Writer::finalizeWrite() if (auto entry = cache.getWithKey(key); entry.has_value() && !IsStale()(entry->key)) { - /// same check as in ctor because a parallel Writer could have inserted the current key in the meantime + /// Same check as in ctor because a parallel Writer could have inserted the current key in the meantime LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.queryStringFromAst()); return; } if (squash_partial_results) { - // Squash partial result chunks to chunks of size 'max_block_size' each. This costs some performance but provides a more natural - // compression of neither too small nor big blocks. Also, it will look like 'max_block_size' is respected when the query result is - // served later on from the query cache. + /// Squash partial result chunks to chunks of size 'max_block_size' each. This costs some performance but provides a more natural + /// compression of neither too small nor big blocks. Also, it will look like 'max_block_size' is respected when the query result is + /// served later on from the query cache. Chunks squashed_chunks; size_t rows_remaining_in_squashed = 0; /// how many further rows can the last squashed chunk consume until it reaches max_block_size for (auto & chunk : query_result->chunks) { - convertToFullIfSparse(chunk); + convertToFullIfNeeded(chunk); const size_t rows_chunk = chunk.getNumRows(); if (rows_chunk == 0) diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 0a4b2413e4c..24181866087 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB { @@ -202,6 +203,24 @@ const ChunkMissingValues::RowsBitMask & ChunkMissingValues::getDefaultsBitmask(s return none; } +void convertToFullIfConst(Chunk & chunk) +{ + size_t num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + for (auto & column : columns) + column = column->convertToFullColumnIfConst(); + chunk.setColumns(std::move(columns), num_rows); +} + +void convertToFullIfLowCardinality(Chunk & chunk) +{ + size_t num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + for (auto & column : columns) + column = recursiveRemoveLowCardinality(column); + chunk.setColumns(std::move(columns), num_rows); +} + void convertToFullIfSparse(Chunk & chunk) { size_t num_rows = chunk.getNumRows(); @@ -212,4 +231,11 @@ void convertToFullIfSparse(Chunk & chunk) chunk.setColumns(std::move(columns), num_rows); } +void convertToFullIfNeeded(Chunk & chunk) +{ + convertToFullIfSparse(chunk); + convertToFullIfConst(chunk); + convertToFullIfLowCardinality(chunk); +} + } diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 6f2097b71f1..45610ae3276 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -149,6 +149,9 @@ private: /// It's needed, when you have to access to the internals of the column, /// or when you need to perform operation with two columns /// and their structure must be equal (e.g. compareAt). +void convertToFullIfConst(Chunk & chunk); +void convertToFullIfLowCardinality(Chunk & chunk); void convertToFullIfSparse(Chunk & chunk); +void convertToFullIfNeeded(Chunk & chunk); } From a9f1393862c628f0cd46bdf7ef3e963b3f516846 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 22 Jun 2023 11:43:11 +0000 Subject: [PATCH 0640/1997] Make the fix more minimal --- src/Interpreters/Cache/QueryCache.cpp | 4 +++- src/Processors/Chunk.cpp | 17 ----------------- src/Processors/Chunk.h | 2 -- 3 files changed, 3 insertions(+), 20 deletions(-) diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index 364a740abe0..74a1c3c9b18 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -233,6 +233,7 @@ void QueryCache::Writer::buffer(Chunk && chunk, ChunkType chunk_type) auto & buffered_chunk = (chunk_type == ChunkType::Totals) ? query_result->totals : query_result->extremes; convertToFullIfSparse(chunk); + convertToFullIfConst(chunk); if (!buffered_chunk.has_value()) buffered_chunk = std::move(chunk); @@ -279,7 +280,8 @@ void QueryCache::Writer::finalizeWrite() for (auto & chunk : query_result->chunks) { - convertToFullIfNeeded(chunk); + convertToFullIfSparse(chunk); + convertToFullIfConst(chunk); const size_t rows_chunk = chunk.getNumRows(); if (rows_chunk == 0) diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 24181866087..9ec5bb7adde 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -212,30 +212,13 @@ void convertToFullIfConst(Chunk & chunk) chunk.setColumns(std::move(columns), num_rows); } -void convertToFullIfLowCardinality(Chunk & chunk) -{ - size_t num_rows = chunk.getNumRows(); - auto columns = chunk.detachColumns(); - for (auto & column : columns) - column = recursiveRemoveLowCardinality(column); - chunk.setColumns(std::move(columns), num_rows); -} - void convertToFullIfSparse(Chunk & chunk) { size_t num_rows = chunk.getNumRows(); auto columns = chunk.detachColumns(); for (auto & column : columns) column = recursiveRemoveSparse(column); - chunk.setColumns(std::move(columns), num_rows); } -void convertToFullIfNeeded(Chunk & chunk) -{ - convertToFullIfSparse(chunk); - convertToFullIfConst(chunk); - convertToFullIfLowCardinality(chunk); -} - } diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 45610ae3276..413872d512d 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -150,8 +150,6 @@ private: /// or when you need to perform operation with two columns /// and their structure must be equal (e.g. compareAt). void convertToFullIfConst(Chunk & chunk); -void convertToFullIfLowCardinality(Chunk & chunk); void convertToFullIfSparse(Chunk & chunk); -void convertToFullIfNeeded(Chunk & chunk); } From df2284b8079dc047b621693696d5f32324b40b82 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 22 Jun 2023 11:56:00 +0000 Subject: [PATCH 0641/1997] Update version_date.tsv and changelogs after v23.3.5.9-lts --- docs/changelogs/v23.3.5.9-lts.md | 19 +++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 2 files changed, 20 insertions(+) create mode 100644 docs/changelogs/v23.3.5.9-lts.md diff --git a/docs/changelogs/v23.3.5.9-lts.md b/docs/changelogs/v23.3.5.9-lts.md new file mode 100644 index 00000000000..df1aab541ec --- /dev/null +++ b/docs/changelogs/v23.3.5.9-lts.md @@ -0,0 +1,19 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.5.9-lts (f5fbc2fd2b3) FIXME as compared to v23.3.4.17-lts (2c99b73ff40) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix broken index analysis when binary operator contains a null constant argument [#50177](https://github.com/ClickHouse/ClickHouse/pull/50177) ([Amos Bird](https://github.com/amosbird)). +* Cleanup moving parts [#50489](https://github.com/ClickHouse/ClickHouse/pull/50489) ([vdimir](https://github.com/vdimir)). +* Do not apply projection if read-in-order was enabled. [#50923](https://github.com/ClickHouse/ClickHouse/pull/50923) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Increase max array size in group bitmap [#50620](https://github.com/ClickHouse/ClickHouse/pull/50620) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 5c32ebb2683..dd21cc7e953 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -5,6 +5,7 @@ v23.4.4.16-stable 2023-06-17 v23.4.3.48-stable 2023-06-12 v23.4.2.11-stable 2023-05-02 v23.4.1.1943-stable 2023-04-27 +v23.3.5.9-lts 2023-06-22 v23.3.4.17-lts 2023-06-17 v23.3.3.52-lts 2023-06-12 v23.3.2.37-lts 2023-04-22 From 35c5483ad25d7c34adb45adcff8c16cd73a7ad09 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 22 Jun 2023 14:56:20 +0300 Subject: [PATCH 0642/1997] Update WriteBufferFromOStream.cpp --- src/IO/WriteBufferFromOStream.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/IO/WriteBufferFromOStream.cpp b/src/IO/WriteBufferFromOStream.cpp index 5074c226dd3..e0ec0b770e2 100644 --- a/src/IO/WriteBufferFromOStream.cpp +++ b/src/IO/WriteBufferFromOStream.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB From ee68e85d310a6843ed833fe9c985a540620f9940 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 22 Jun 2023 12:23:51 +0000 Subject: [PATCH 0643/1997] better reporting of broken parts --- .../IMergeTreeDataPartInfoForReader.h | 2 ++ src/Storages/MergeTree/IMergeTreeReader.cpp | 9 +++++++ src/Storages/MergeTree/IMergeTreeReader.h | 2 ++ .../LoadedMergeTreeDataPartInfoForReader.h | 2 ++ src/Storages/MergeTree/MergeTreeData.cpp | 21 --------------- .../MergeTree/MergeTreeReaderCompact.cpp | 24 ++++++++++++++--- .../MergeTree/MergeTreeReaderWide.cpp | 26 ++++++++----------- .../ReplicatedMergeTreePartCheckThread.cpp | 2 +- src/Storages/MergeTree/checkDataPart.cpp | 24 ++++++++++++++++- src/Storages/MergeTree/checkDataPart.h | 1 + 10 files changed, 72 insertions(+), 41 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h b/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h index dbed58ba91c..2cc73556f04 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h @@ -66,6 +66,8 @@ public: virtual const SerializationInfoByName & getSerializationInfos() const = 0; + virtual String getTableName() const = 0; + virtual void reportBroken() = 0; }; diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index be9aedaf4d5..d93e88fdeb2 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -278,4 +278,13 @@ void IMergeTreeReader::checkNumberOfColumns(size_t num_columns_to_read) const "Expected {}, got {}", requested_columns.size(), num_columns_to_read); } +String IMergeTreeReader::getMessageForDiagnosticOfBrokenPart(size_t from_mark, size_t max_rows_to_read) const +{ + return fmt::format( + "(while reading from part {} in table {} from mark {} with max_rows_to_read = {})", + data_part_info_for_read->getDataPartStorage()->getFullPath(), + data_part_info_for_read->getTableName(), + from_mark, max_rows_to_read); +} + } diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index a72d83a55e4..fcab35fb4c2 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -74,6 +74,8 @@ protected: void checkNumberOfColumns(size_t num_columns_to_read) const; + String getMessageForDiagnosticOfBrokenPart(size_t from_mark, size_t max_rows_to_read) const; + /// avg_value_size_hints are used to reduce the number of reallocations when creating columns of variable size. ValueSizeMap avg_value_size_hints; /// Stores states for IDataType::deserializeBinaryBulk diff --git a/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h b/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h index aec102f3f7d..f1cb8b34ecf 100644 --- a/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h +++ b/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h @@ -56,6 +56,8 @@ public: SerializationPtr getSerialization(const NameAndTypePair & column) const override { return data_part->getSerialization(column.name); } + String getTableName() const override { return data_part->storage.getStorageID().getNameForLogs(); } + MergeTreeData::DataPartPtr getDataPart() const { return data_part; } private: diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index c3ffb892871..33d65ceb5da 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -179,8 +179,6 @@ namespace ErrorCodes extern const int ZERO_COPY_REPLICATION_ERROR; extern const int NOT_INITIALIZED; extern const int SERIALIZATION_ERROR; - extern const int NETWORK_ERROR; - extern const int SOCKET_TIMEOUT; extern const int TOO_MANY_MUTATIONS; } @@ -1174,25 +1172,6 @@ static void preparePartForRemoval(const MergeTreeMutableDataPartPtr & part) } } -static bool isRetryableException(const Exception & e) -{ - if (isNotEnoughMemoryErrorCode(e.code())) - return true; - - if (e.code() == ErrorCodes::NETWORK_ERROR || e.code() == ErrorCodes::SOCKET_TIMEOUT) - return true; - -#if USE_AWS_S3 - const auto * s3_exception = dynamic_cast(&e); - if (s3_exception && s3_exception->isRetryableError()) - return true; -#endif - - /// In fact, there can be other similar situations. - /// But it is OK, because there is a safety guard against deleting too many parts. - return false; -} - static constexpr size_t loading_parts_initial_backoff_ms = 100; static constexpr size_t loading_parts_max_backoff_ms = 5000; static constexpr size_t loading_parts_max_tries = 3; diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index d22684eaa9d..f65e66ff52d 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -10,7 +11,6 @@ namespace ErrorCodes { extern const int CANNOT_READ_ALL_DATA; extern const int ARGUMENT_OUT_OF_BOUND; - extern const int MEMORY_LIMIT_EXCEEDED; } @@ -112,6 +112,12 @@ void MergeTreeReaderCompact::initialize() compressed_data_buffer = non_cached_buffer.get(); } } + catch (const Exception & e) + { + if (!isRetryableException(e)) + data_part_info_for_read->reportBroken(); + throw; + } catch (...) { data_part_info_for_read->reportBroken(); @@ -207,11 +213,11 @@ size_t MergeTreeReaderCompact::readRows( } catch (Exception & e) { - if (e.code() != ErrorCodes::MEMORY_LIMIT_EXCEEDED) + if (!isRetryableException(e)) data_part_info_for_read->reportBroken(); /// Better diagnostics. - e.addMessage("(while reading column " + columns_to_read[pos].name + ")"); + e.addMessage(getMessageForDiagnosticOfBrokenPart(from_mark, max_rows_to_read)); throw; } catch (...) @@ -315,6 +321,7 @@ void MergeTreeReaderCompact::readData( } void MergeTreeReaderCompact::prefetchBeginOfRange(Priority priority) +try { if (!initialized) { @@ -326,6 +333,17 @@ void MergeTreeReaderCompact::prefetchBeginOfRange(Priority priority) seekToMark(all_mark_ranges.front().begin, 0); data_buffer->prefetch(priority); } +catch (const Exception & e) +{ + if (!isRetryableException(e)) + data_part_info_for_read->reportBroken(); + throw; +} +catch (...) +{ + data_part_info_for_read->reportBroken(); + throw; +} void MergeTreeReaderCompact::seekToMark(size_t row_index, size_t column_index) { diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 38032fae826..140fb6da5df 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -20,11 +21,6 @@ namespace constexpr auto DATA_FILE_EXTENSION = ".bin"; } -namespace ErrorCodes -{ - extern const int MEMORY_LIMIT_EXCEEDED; -} - MergeTreeReaderWide::MergeTreeReaderWide( MergeTreeDataPartInfoForReaderPtr data_part_info_, NamesAndTypesList columns_, @@ -51,6 +47,12 @@ MergeTreeReaderWide::MergeTreeReaderWide( for (size_t i = 0; i < columns_to_read.size(); ++i) addStreams(columns_to_read[i], serializations[i], profile_callback_, clock_type_); } + catch (const Exception & e) + { + if (!isRetryableException(e)) + data_part_info_for_read->reportBroken(); + throw; + } catch (...) { data_part_info_for_read->reportBroken(); @@ -76,9 +78,9 @@ void MergeTreeReaderWide::prefetchBeginOfRange(Priority priority) /// of range only once so there is no such problem. /// 4. continue_reading == false, as we haven't read anything yet. } - catch (Exception & e) + catch (const Exception & e) { - if (e.code() != ErrorCodes::MEMORY_LIMIT_EXCEEDED) + if (!isRetryableException(e)) data_part_info_for_read->reportBroken(); throw; } @@ -184,22 +186,16 @@ size_t MergeTreeReaderWide::readRows( } catch (Exception & e) { - if (e.code() != ErrorCodes::MEMORY_LIMIT_EXCEEDED) + if (!isRetryableException(e)) data_part_info_for_read->reportBroken(); /// Better diagnostics. - const auto & part_storage = data_part_info_for_read->getDataPartStorage(); - e.addMessage( - fmt::format( - "(while reading from part {} located on disk {} of type {}, from mark {} with max_rows_to_read = {})", - part_storage->getFullPath(), part_storage->getDiskName(), part_storage->getDiskType(), - toString(from_mark), toString(max_rows_to_read))); + e.addMessage(getMessageForDiagnosticOfBrokenPart(from_mark, max_rows_to_read)); throw; } catch (...) { data_part_info_for_read->reportBroken(); - throw; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 7bb8d9d758e..43256505135 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -422,7 +422,7 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na /// Don't count the part as broken if there is not enough memory to load it. /// In fact, there can be many similar situations. /// But it is OK, because there is a safety guard against deleting too many parts. - if (isNotEnoughMemoryErrorCode(e.code())) + if (isRetryableException(e)) throw; tryLogCurrentException(log, __PRETTY_FUNCTION__); diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 5c090cdd739..1967357a840 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -33,6 +34,8 @@ namespace ErrorCodes extern const int CANNOT_MUNMAP; extern const int CANNOT_MREMAP; extern const int UNEXPECTED_FILE_IN_DATA_PART; + extern const int NETWORK_ERROR; + extern const int SOCKET_TIMEOUT; } @@ -47,6 +50,25 @@ bool isNotEnoughMemoryErrorCode(int code) || code == ErrorCodes::CANNOT_MREMAP; } +bool isRetryableException(const Exception & e) +{ + if (isNotEnoughMemoryErrorCode(e.code())) + return true; + + if (e.code() == ErrorCodes::NETWORK_ERROR || e.code() == ErrorCodes::SOCKET_TIMEOUT) + return true; + +#if USE_AWS_S3 + const auto * s3_exception = dynamic_cast(&e); + if (s3_exception && s3_exception->isRetryableError()) + return true; +#endif + + /// In fact, there can be other similar situations. + /// But it is OK, because there is a safety guard against deleting too many parts. + return false; +} + static IMergeTreeDataPart::Checksums checkDataPart( MergeTreeData::DataPartPtr data_part, @@ -302,7 +324,7 @@ IMergeTreeDataPart::Checksums checkDataPart( } catch (const Exception & e) { - if (isNotEnoughMemoryErrorCode(e.code())) + if (isRetryableException(e)) throw; return drop_cache_and_check(); diff --git a/src/Storages/MergeTree/checkDataPart.h b/src/Storages/MergeTree/checkDataPart.h index d0cf25b87be..20ddecad3ed 100644 --- a/src/Storages/MergeTree/checkDataPart.h +++ b/src/Storages/MergeTree/checkDataPart.h @@ -13,5 +13,6 @@ IMergeTreeDataPart::Checksums checkDataPart( std::function is_cancelled = []{ return false; }); bool isNotEnoughMemoryErrorCode(int code); +bool isRetryableException(const Exception & e); } From 94a194d153488c7d1afac6aa212e9f3fea706f7f Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Thu, 22 Jun 2023 08:29:32 -0400 Subject: [PATCH 0644/1997] move redirects to docusaurus config --- docs/redirects.txt | 459 +-------------------------------------------- 1 file changed, 6 insertions(+), 453 deletions(-) diff --git a/docs/redirects.txt b/docs/redirects.txt index cea138f7237..ebeda125e01 100644 --- a/docs/redirects.txt +++ b/docs/redirects.txt @@ -1,453 +1,6 @@ -agg_functions/combinators.md query-language/agg-functions/combinators.md -agg_functions/index.md query-language/agg-functions/index.md -agg_functions/parametric_functions.md query-language/agg-functions/parametric-functions.md -agg_functions/reference.md query-language/agg-functions/reference.md -changelog/2017.md whats-new/changelog/2017.md -changelog/2018.md whats-new/changelog/2018.md -changelog/2019.md whats-new/changelog/2019.md -changelog/index.md whats-new/changelog/index.md -commercial/cloud.md https://clickhouse.com/cloud/ -data_types/array.md sql-reference/data-types/array.md -data_types/boolean.md sql-reference/data-types/boolean.md -data_types/date.md sql-reference/data-types/date.md -data_types/datetime.md sql-reference/data-types/datetime.md -data_types/datetime64.md sql-reference/data-types/datetime64.md -data_types/decimal.md sql-reference/data-types/decimal.md -data_types/domains/ipv4.md sql-reference/data-types/domains/ipv4.md -data_types/domains/ipv6.md sql-reference/data-types/domains/ipv6.md -data_types/domains/overview.md sql-reference/data-types/domains/overview.md -data_types/enum.md sql-reference/data-types/enum.md -data_types/fixedstring.md sql-reference/data-types/fixedstring.md -data_types/float.md sql-reference/data-types/float.md -data_types/index.md sql-reference/data-types/index.md -data_types/int_uint.md sql-reference/data-types/int-uint.md -data_types/nested_data_structures/aggregatefunction.md sql-reference/data-types/aggregatefunction.md -data_types/nested_data_structures/index.md sql-reference/data-types/nested-data-structures/index.md -data_types/nested_data_structures/nested.md sql-reference/data-types/nested-data-structures/nested.md -data_types/nullable.md sql-reference/data-types/nullable.md -data_types/special_data_types/expression.md sql-reference/data-types/special-data-types/expression.md -data_types/special_data_types/index.md sql-reference/data-types/special-data-types/index.md -data_types/special_data_types/interval.md sql-reference/data-types/special-data-types/interval.md -data_types/special_data_types/nothing.md sql-reference/data-types/special-data-types/nothing.md -data_types/special_data_types/set.md sql-reference/data-types/special-data-types/set.md -data_types/string.md sql-reference/data-types/string.md -data_types/tuple.md sql-reference/data-types/tuple.md -data_types/uuid.md sql-reference/data-types/uuid.md -database_engines/index.md engines/database-engines/index.md -database_engines/lazy.md engines/database-engines/lazy.md -database_engines/mysql.md engines/database-engines/mysql.md -development/browse_code.md development/browse-code.md -development/build_cross_arm.md development/build-cross-arm.md -development/build_cross_osx.md development/build-cross-osx.md -development/build_osx.md development/build-osx.md -development/developer_instruction.md development/developer-instruction.md -dicts/external_dicts.md query-language/dicts/external-dicts.md -dicts/external_dicts_dict.md query-language/dicts/external-dicts-dict.md -dicts/external_dicts_dict_layout.md query-language/dicts/external-dicts-dict-layout.md -dicts/external_dicts_dict_lifetime.md query-language/dicts/external-dicts-dict-lifetime.md -dicts/external_dicts_dict_sources.md query-language/dicts/external-dicts-dict-sources.md -dicts/external_dicts_dict_structure.md query-language/dicts/external-dicts-dict-structure.md -dicts/index.md query-language/dicts/index.md -dicts/internal_dicts.md query-language/dicts/internal-dicts.md -engines/database_engines/index.md engines/database-engines/index.md -engines/database_engines/lazy.md engines/database-engines/lazy.md -engines/database_engines/mysql.md engines/database-engines/mysql.md -engines/table-engines/log-family/log-family.md engines/table-engines/log-family/index.md -engines/table_engines/index.md engines/table-engines/index.md -engines/table_engines/integrations/hdfs.md engines/table-engines/integrations/hdfs.md -engines/table_engines/integrations/index.md engines/table-engines/integrations/index.md -engines/table_engines/integrations/jdbc.md engines/table-engines/integrations/jdbc.md -engines/table_engines/integrations/kafka.md engines/table-engines/integrations/kafka.md -engines/table_engines/integrations/mysql.md engines/table-engines/integrations/mysql.md -engines/table_engines/integrations/odbc.md engines/table-engines/integrations/odbc.md -engines/table_engines/log_family/index.md engines/table-engines/log-family/index.md -engines/table_engines/log_family/log.md engines/table-engines/log-family/log.md -engines/table_engines/log_family/log_family.md engines/table-engines/log-family/log-family.md -engines/table_engines/log_family/stripelog.md engines/table-engines/log-family/stripelog.md -engines/table_engines/log_family/tinylog.md engines/table-engines/log-family/tinylog.md -engines/table_engines/mergetree_family/aggregatingmergetree.md engines/table-engines/mergetree-family/aggregatingmergetree.md -engines/table_engines/mergetree_family/collapsingmergetree.md engines/table-engines/mergetree-family/collapsingmergetree.md -engines/table_engines/mergetree_family/custom_partitioning_key.md engines/table-engines/mergetree-family/custom-partitioning-key.md -engines/table_engines/mergetree_family/graphitemergetree.md engines/table-engines/mergetree-family/graphitemergetree.md -engines/table_engines/mergetree_family/index.md engines/table-engines/mergetree-family/index.md -engines/table_engines/mergetree_family/mergetree.md engines/table-engines/mergetree-family/mergetree.md -engines/table_engines/mergetree_family/replacingmergetree.md engines/table-engines/mergetree-family/replacingmergetree.md -engines/table_engines/mergetree_family/replication.md engines/table-engines/mergetree-family/replication.md -engines/table_engines/mergetree_family/summingmergetree.md engines/table-engines/mergetree-family/summingmergetree.md -engines/table_engines/mergetree_family/versionedcollapsingmergetree.md engines/table-engines/mergetree-family/versionedcollapsingmergetree.md -engines/table_engines/special/buffer.md engines/table-engines/special/buffer.md -engines/table_engines/special/dictionary.md engines/table-engines/special/dictionary.md -engines/table_engines/special/distributed.md engines/table-engines/special/distributed.md -engines/table_engines/special/external_data.md engines/table-engines/special/external-data.md -engines/table_engines/special/file.md engines/table-engines/special/file.md -engines/table_engines/special/generate.md engines/table-engines/special/generate.md -engines/table_engines/special/index.md engines/table-engines/special/index.md -engines/table_engines/special/join.md engines/table-engines/special/join.md -engines/table_engines/special/materializedview.md engines/table-engines/special/materializedview.md -engines/table_engines/special/memory.md engines/table-engines/special/memory.md -engines/table_engines/special/merge.md engines/table-engines/special/merge.md -engines/table_engines/special/null.md engines/table-engines/special/null.md -engines/table_engines/special/set.md engines/table-engines/special/set.md -engines/table_engines/special/url.md engines/table-engines/special/url.md -engines/table_engines/special/view.md engines/table-engines/special/view.md -extended_roadmap.md whats-new/extended-roadmap.md -formats.md interfaces/formats.md -formats/capnproto.md interfaces/formats.md -formats/csv.md interfaces/formats.md -formats/csvwithnames.md interfaces/formats.md -formats/json.md interfaces/formats.md -formats/jsoncompact.md interfaces/formats.md -formats/jsoneachrow.md interfaces/formats.md -formats/native.md interfaces/formats.md -formats/null.md interfaces/formats.md -formats/pretty.md interfaces/formats.md -formats/prettycompact.md interfaces/formats.md -formats/prettycompactmonoblock.md interfaces/formats.md -formats/prettynoescapes.md interfaces/formats.md -formats/prettyspace.md interfaces/formats.md -formats/rowbinary.md interfaces/formats.md -formats/tabseparated.md interfaces/formats.md -formats/tabseparatedraw.md interfaces/formats.md -formats/tabseparatedwithnames.md interfaces/formats.md -formats/tabseparatedwithnamesandtypes.md interfaces/formats.md -formats/tskv.md interfaces/formats.md -formats/values.md interfaces/formats.md -formats/vertical.md interfaces/formats.md -formats/verticalraw.md interfaces/formats.md -formats/xml.md interfaces/formats.md -functions/arithmetic_functions.md query-language/functions/arithmetic-functions.md -functions/array_functions.md query-language/functions/array-functions.md -functions/array_join.md query-language/functions/array-join.md -functions/bit_functions.md query-language/functions/bit-functions.md -functions/bitmap_functions.md query-language/functions/bitmap-functions.md -functions/comparison_functions.md query-language/functions/comparison-functions.md -functions/conditional_functions.md query-language/functions/conditional-functions.md -functions/date_time_functions.md query-language/functions/date-time-functions.md -functions/encoding_functions.md query-language/functions/encoding-functions.md -functions/ext_dict_functions.md query-language/functions/ext-dict-functions.md -functions/hash_functions.md query-language/functions/hash-functions.md -functions/higher_order_functions.md query-language/functions/higher-order-functions.md -functions/in_functions.md query-language/functions/in-functions.md -functions/index.md query-language/functions/index.md -functions/ip_address_functions.md query-language/functions/ip-address-functions.md -functions/json_functions.md query-language/functions/json-functions.md -functions/logical_functions.md query-language/functions/logical-functions.md -functions/math_functions.md query-language/functions/math-functions.md -functions/other_functions.md query-language/functions/other-functions.md -functions/random_functions.md query-language/functions/random-functions.md -functions/rounding_functions.md query-language/functions/rounding-functions.md -functions/splitting_merging_functions.md query-language/functions/splitting-merging-functions.md -functions/string_functions.md query-language/functions/string-functions.md -functions/string_replace_functions.md query-language/functions/string-replace-functions.md -functions/string_search_functions.md query-language/functions/string-search-functions.md -functions/type_conversion_functions.md query-language/functions/type-conversion-functions.md -functions/url_functions.md query-language/functions/url-functions.md -functions/ym_dict_functions.md query-language/functions/ym-dict-functions.md -getting_started/example_datasets/amplab_benchmark.md getting-started/example-datasets/amplab-benchmark.md -getting_started/example_datasets/criteo.md getting-started/example-datasets/criteo.md -getting_started/example_datasets/index.md getting-started/example-datasets/index.md -getting_started/example_datasets/metrica.md getting-started/example-datasets/metrica.md -getting_started/example_datasets/nyc_taxi.md getting-started/example-datasets/nyc-taxi.md -getting_started/example_datasets/ontime.md getting-started/example-datasets/ontime.md -getting_started/example_datasets/star_schema.md getting-started/example-datasets/star-schema.md -getting_started/example_datasets/wikistat.md getting-started/example-datasets/wikistat.md -getting_started/index.md getting-started/index.md -getting_started/install.md getting-started/install.md -getting_started/playground.md getting-started/playground.md -getting_started/tutorial.md getting-started/tutorial.md -images/column_oriented.gif images/column-oriented.gif -images/row_oriented.gif images/row-oriented.gif -interfaces/http_interface.md interfaces/http.md -interfaces/third-party/client_libraries.md interfaces/third-party/client-libraries.md -interfaces/third-party_client_libraries.md interfaces/third-party/client-libraries.md -interfaces/third-party_gui.md interfaces/third-party/gui.md -interfaces/third_party/index.md interfaces/third-party/index.md -introduction/index.md -introduction/distinctive_features.md introduction/distinctive-features.md -introduction/features_considered_disadvantages.md introduction/distinctive-features.md -introduction/possible_silly_questions.md faq/general.md -introduction/ya_metrika_task.md introduction/history.md -operations/access_rights.md operations/access-rights.md -operations/configuration_files.md operations/configuration-files.md -operations/optimizing_performance/index.md operations/optimizing-performance/index.md -operations/optimizing_performance/sampling_query_profiler.md operations/optimizing-performance/sampling-query-profiler.md -operations/performance/sampling_query_profiler.md operations/optimizing-performance/sampling-query-profiler.md -operations/performance_test.md operations/performance-test.md -operations/server_configuration_parameters/index.md operations/server-configuration-parameters/index.md -operations/server_configuration_parameters/settings.md operations/server-configuration-parameters/settings.md -operations/server_settings/index.md operations/server-configuration-parameters/index.md -operations/server_settings/settings.md operations/server-configuration-parameters/settings.md -operations/settings/constraints_on_settings.md operations/settings/constraints-on-settings.md -operations/settings/permissions_for_queries.md operations/settings/permissions-for-queries.md -operations/settings/query_complexity.md operations/settings/query-complexity.md -operations/settings/settings_profiles.md operations/settings/settings-profiles.md -operations/settings/settings_users.md operations/settings/settings-users.md -operations/system_tables.md operations/system-tables.md -operations/table_engines/aggregatingmergetree.md engines/table-engines/mergetree-family/aggregatingmergetree.md -operations/table_engines/buffer.md engines/table-engines/special/buffer.md -operations/table_engines/collapsingmergetree.md engines/table-engines/mergetree-family/collapsingmergetree.md -operations/table_engines/custom_partitioning_key.md engines/table-engines/mergetree-family/custom-partitioning-key.md -operations/table_engines/dictionary.md engines/table-engines/special/dictionary.md -operations/table_engines/distributed.md engines/table-engines/special/distributed.md -operations/table_engines/external_data.md engines/table-engines/special/external-data.md -operations/table_engines/file.md engines/table-engines/special/file.md -operations/table_engines/generate.md engines/table-engines/special/generate.md -operations/table_engines/graphitemergetree.md engines/table-engines/mergetree-family/graphitemergetree.md -operations/table_engines/hdfs.md engines/table-engines/integrations/hdfs.md -operations/table_engines/index.md engines/table-engines/index.md -operations/table_engines/jdbc.md engines/table-engines/integrations/jdbc.md -operations/table_engines/join.md engines/table-engines/special/join.md -operations/table_engines/kafka.md engines/table-engines/integrations/kafka.md -operations/table_engines/log.md engines/table-engines/log-family/log.md -operations/table_engines/log_family.md engines/table-engines/log-family/log-family.md -operations/table_engines/materializedview.md engines/table-engines/special/materializedview.md -operations/table_engines/memory.md engines/table-engines/special/memory.md -operations/table_engines/merge.md engines/table-engines/special/merge.md -operations/table_engines/mergetree.md engines/table-engines/mergetree-family/mergetree.md -operations/table_engines/mysql.md engines/table-engines/integrations/mysql.md -operations/table_engines/null.md engines/table-engines/special/null.md -operations/table_engines/odbc.md engines/table-engines/integrations/odbc.md -operations/table_engines/replacingmergetree.md engines/table-engines/mergetree-family/replacingmergetree.md -operations/table_engines/replication.md engines/table-engines/mergetree-family/replication.md -operations/table_engines/set.md engines/table-engines/special/set.md -operations/table_engines/stripelog.md engines/table-engines/log-family/stripelog.md -operations/table_engines/summingmergetree.md engines/table-engines/mergetree-family/summingmergetree.md -operations/table_engines/tinylog.md engines/table-engines/log-family/tinylog.md -operations/table_engines/url.md engines/table-engines/special/url.md -operations/table_engines/versionedcollapsingmergetree.md engines/table-engines/mergetree-family/versionedcollapsingmergetree.md -operations/table_engines/view.md engines/table-engines/special/view.md -operations/utils/clickhouse-benchmark.md operations/utilities/clickhouse-benchmark.md -operations/utils/clickhouse-copier.md operations/utilities/clickhouse-copier.md -operations/utils/clickhouse-local.md operations/utilities/clickhouse-local.md -operations/utils/index.md operations/utilities/index.md -query_language/agg_functions/combinators.md sql-reference/aggregate-functions/combinators.md -query_language/agg_functions/index.md sql-reference/aggregate-functions/index.md -query_language/agg_functions/parametric_functions.md sql-reference/aggregate-functions/parametric-functions.md -query_language/agg_functions/reference.md sql-reference/aggregate-functions/reference.md -query_language/alter.md sql-reference/statements/alter.md -query_language/create.md sql-reference/statements/create.md -query_language/dicts/external_dicts.md sql-reference/dictionaries/external-dictionaries/external-dicts.md -query_language/dicts/external_dicts_dict.md sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md -query_language/dicts/external_dicts_dict_hierarchical.md sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md -query_language/dicts/external_dicts_dict_layout.md sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md -query_language/dicts/external_dicts_dict_lifetime.md sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md -query_language/dicts/external_dicts_dict_sources.md sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md -query_language/dicts/external_dicts_dict_structure.md sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md -query_language/dicts/index.md sql-reference/dictionaries/index.md -query_language/dicts/internal_dicts.md sql-reference/dictionaries/internal-dicts.md -query_language/functions/arithmetic_functions.md sql-reference/functions/arithmetic-functions.md -query_language/functions/array_functions.md sql-reference/functions/array-functions.md -query_language/functions/array_join.md sql-reference/functions/array-join.md -query_language/functions/bit_functions.md sql-reference/functions/bit-functions.md -query_language/functions/bitmap_functions.md sql-reference/functions/bitmap-functions.md -query_language/functions/comparison_functions.md sql-reference/functions/comparison-functions.md -query_language/functions/conditional_functions.md sql-reference/functions/conditional-functions.md -query_language/functions/date_time_functions.md sql-reference/functions/date-time-functions.md -query_language/functions/encoding_functions.md sql-reference/functions/encoding-functions.md -query_language/functions/ext_dict_functions.md sql-reference/functions/ext-dict-functions.md -query_language/functions/functions_for_nulls.md sql-reference/functions/functions-for-nulls.md -query_language/functions/geo.md sql-reference/functions/geo.md -query_language/functions/hash_functions.md sql-reference/functions/hash-functions.md -query_language/functions/higher_order_functions.md sql-reference/functions/higher-order-functions.md -query_language/functions/in_functions.md sql-reference/functions/in-functions.md -query_language/functions/index.md sql-reference/functions/index.md -query_language/functions/introspection.md sql-reference/functions/introspection.md -query_language/functions/ip_address_functions.md sql-reference/functions/ip-address-functions.md -query_language/functions/json_functions.md sql-reference/functions/json-functions.md -query_language/functions/logical_functions.md sql-reference/functions/logical-functions.md -query_language/functions/machine_learning_functions.md sql-reference/functions/machine-learning-functions.md -query_language/functions/math_functions.md sql-reference/functions/math-functions.md -query_language/functions/other_functions.md sql-reference/functions/other-functions.md -query_language/functions/random_functions.md sql-reference/functions/random-functions.md -query_language/functions/rounding_functions.md sql-reference/functions/rounding-functions.md -query_language/functions/splitting_merging_functions.md sql-reference/functions/splitting-merging-functions.md -query_language/functions/string_functions.md sql-reference/functions/string-functions.md -query_language/functions/string_replace_functions.md sql-reference/functions/string-replace-functions.md -query_language/functions/string_search_functions.md sql-reference/functions/string-search-functions.md -query_language/functions/type_conversion_functions.md sql-reference/functions/type-conversion-functions.md -query_language/functions/url_functions.md sql-reference/functions/url-functions.md -query_language/functions/uuid_functions.md sql-reference/functions/uuid-functions.md -query_language/functions/ym_dict_functions.md sql-reference/functions/ym-dict-functions.md -query_language/index.md sql-reference/index.md -query_language/insert_into.md sql-reference/statements/insert-into.md -query_language/misc.md sql-reference/statements/misc.md -query_language/operators.md sql-reference/operators.md -query_language/queries.md query-language.md -query_language/select.md sql-reference/statements/select.md -query_language/show.md sql-reference/statements/show.md -query_language/syntax.md sql-reference/syntax.md -query_language/system.md sql-reference/statements/system.md -query_language/table_functions/file.md sql-reference/table-functions/file.md -query_language/table_functions/generate.md sql-reference/table-functions/generate.md -query_language/table_functions/hdfs.md sql-reference/table-functions/hdfs.md -query_language/table_functions/index.md sql-reference/table-functions/index.md -query_language/table_functions/input.md sql-reference/table-functions/input.md -query_language/table_functions/jdbc.md sql-reference/table-functions/jdbc.md -query_language/table_functions/merge.md sql-reference/table-functions/merge.md -query_language/table_functions/mysql.md sql-reference/table-functions/mysql.md -query_language/table_functions/numbers.md sql-reference/table-functions/numbers.md -query_language/table_functions/odbc.md sql-reference/table-functions/odbc.md -query_language/table_functions/remote.md sql-reference/table-functions/remote.md -query_language/table_functions/url.md sql-reference/table-functions/url.md -roadmap.md whats-new/roadmap.md -security_changelog.md whats-new/security-changelog.md -sql-reference/data-types/domains/overview.md sql-reference/data-types/domains/index.md -sql_reference/aggregate_functions/combinators.md sql-reference/aggregate-functions/combinators.md -sql_reference/aggregate_functions/index.md sql-reference/aggregate-functions/index.md -sql_reference/aggregate_functions/parametric_functions.md sql-reference/aggregate-functions/parametric-functions.md -sql_reference/aggregate_functions/reference.md sql-reference/aggregate-functions/reference.md -sql_reference/ansi.md sql-reference/ansi.md -sql_reference/data_types/aggregatefunction.md sql-reference/data-types/aggregatefunction.md -sql_reference/data_types/array.md sql-reference/data-types/array.md -sql_reference/data_types/boolean.md sql-reference/data-types/boolean.md -sql_reference/data_types/date.md sql-reference/data-types/date.md -sql_reference/data_types/datetime.md sql-reference/data-types/datetime.md -sql_reference/data_types/datetime64.md sql-reference/data-types/datetime64.md -sql_reference/data_types/decimal.md sql-reference/data-types/decimal.md -sql_reference/data_types/domains/index.md sql-reference/data-types/domains/index.md -sql_reference/data_types/domains/ipv4.md sql-reference/data-types/domains/ipv4.md -sql_reference/data_types/domains/ipv6.md sql-reference/data-types/domains/ipv6.md -sql_reference/data_types/domains/overview.md sql-reference/data-types/domains/overview.md -sql_reference/data_types/enum.md sql-reference/data-types/enum.md -sql_reference/data_types/fixedstring.md sql-reference/data-types/fixedstring.md -sql_reference/data_types/float.md sql-reference/data-types/float.md -sql_reference/data_types/index.md sql-reference/data-types/index.md -sql_reference/data_types/int_uint.md sql-reference/data-types/int-uint.md -sql_reference/data_types/nested_data_structures/index.md sql-reference/data-types/nested-data-structures/index.md -sql_reference/data_types/nested_data_structures/nested.md sql-reference/data-types/nested-data-structures/nested.md -sql_reference/data_types/nullable.md sql-reference/data-types/nullable.md -sql_reference/data_types/simpleaggregatefunction.md sql-reference/data-types/simpleaggregatefunction.md -sql_reference/data_types/special_data_types/expression.md sql-reference/data-types/special-data-types/expression.md -sql_reference/data_types/special_data_types/index.md sql-reference/data-types/special-data-types/index.md -sql_reference/data_types/special_data_types/interval.md sql-reference/data-types/special-data-types/interval.md -sql_reference/data_types/special_data_types/nothing.md sql-reference/data-types/special-data-types/nothing.md -sql_reference/data_types/special_data_types/set.md sql-reference/data-types/special-data-types/set.md -sql_reference/data_types/string.md sql-reference/data-types/string.md -sql_reference/data_types/tuple.md sql-reference/data-types/tuple.md -sql_reference/data_types/uuid.md sql-reference/data-types/uuid.md -sql_reference/dictionaries/external_dictionaries/external_dicts.md sql-reference/dictionaries/external-dictionaries/external-dicts.md -sql_reference/dictionaries/external_dictionaries/external_dicts_dict.md sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md -sql_reference/dictionaries/external_dictionaries/external_dicts_dict_hierarchical.md sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md -sql_reference/dictionaries/external_dictionaries/external_dicts_dict_layout.md sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md -sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md -sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md -sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md -sql_reference/dictionaries/external_dictionaries/index.md sql-reference/dictionaries/external-dictionaries/index.md -sql_reference/dictionaries/index.md sql-reference/dictionaries/index.md -sql_reference/dictionaries/internal_dicts.md sql-reference/dictionaries/internal-dicts.md -sql_reference/functions/arithmetic_functions.md sql-reference/functions/arithmetic-functions.md -sql_reference/functions/array_functions.md sql-reference/functions/array-functions.md -sql_reference/functions/array_join.md sql-reference/functions/array-join.md -sql_reference/functions/bit_functions.md sql-reference/functions/bit-functions.md -sql_reference/functions/bitmap_functions.md sql-reference/functions/bitmap-functions.md -sql_reference/functions/comparison_functions.md sql-reference/functions/comparison-functions.md -sql_reference/functions/conditional_functions.md sql-reference/functions/conditional-functions.md -sql_reference/functions/date_time_functions.md sql-reference/functions/date-time-functions.md -sql_reference/functions/encoding_functions.md sql-reference/functions/encoding-functions.md -sql_reference/functions/ext_dict_functions.md sql-reference/functions/ext-dict-functions.md -sql_reference/functions/functions_for_nulls.md sql-reference/functions/functions-for-nulls.md -sql_reference/functions/geo.md sql-reference/functions/geo.md -sql_reference/functions/hash_functions.md sql-reference/functions/hash-functions.md -sql_reference/functions/higher_order_functions.md sql-reference/functions/higher-order-functions.md -sql_reference/functions/in_functions.md sql-reference/functions/in-functions.md -sql_reference/functions/index.md sql-reference/functions/index.md -sql_reference/functions/introspection.md sql-reference/functions/introspection.md -sql_reference/functions/ip_address_functions.md sql-reference/functions/ip-address-functions.md -sql_reference/functions/json_functions.md sql-reference/functions/json-functions.md -sql_reference/functions/logical_functions.md sql-reference/functions/logical-functions.md -sql_reference/functions/machine_learning_functions.md sql-reference/functions/machine-learning-functions.md -sql_reference/functions/math_functions.md sql-reference/functions/math-functions.md -sql_reference/functions/other_functions.md sql-reference/functions/other-functions.md -sql_reference/functions/random_functions.md sql-reference/functions/random-functions.md -sql_reference/functions/rounding_functions.md sql-reference/functions/rounding-functions.md -sql_reference/functions/splitting_merging_functions.md sql-reference/functions/splitting-merging-functions.md -sql_reference/functions/string_functions.md sql-reference/functions/string-functions.md -sql_reference/functions/string_replace_functions.md sql-reference/functions/string-replace-functions.md -sql_reference/functions/string_search_functions.md sql-reference/functions/string-search-functions.md -sql_reference/functions/type_conversion_functions.md sql-reference/functions/type-conversion-functions.md -sql_reference/functions/url_functions.md sql-reference/functions/url-functions.md -sql_reference/functions/uuid_functions.md sql-reference/functions/uuid-functions.md -sql_reference/functions/ym_dict_functions.md sql-reference/functions/ym-dict-functions.md -sql_reference/index.md sql-reference/index.md -sql_reference/operators.md sql-reference/operators.md -sql_reference/statements/alter.md sql-reference/statements/alter.md -sql_reference/statements/create.md sql-reference/statements/create.md -sql_reference/statements/index.md sql-reference/statements/index.md -sql_reference/statements/insert_into.md sql-reference/statements/insert-into.md -sql_reference/statements/misc.md sql-reference/statements/misc.md -sql_reference/statements/select.md sql-reference/statements/select.md -sql_reference/statements/show.md sql-reference/statements/show.md -sql_reference/statements/system.md sql-reference/statements/system.md -sql_reference/syntax.md sql-reference/syntax.md -sql_reference/table_functions/file.md sql-reference/table-functions/file.md -sql_reference/table_functions/generate.md sql-reference/table-functions/generate.md -sql_reference/table_functions/hdfs.md sql-reference/table-functions/hdfs.md -sql_reference/table_functions/index.md sql-reference/table-functions/index.md -sql_reference/table_functions/input.md sql-reference/table-functions/input.md -sql_reference/table_functions/jdbc.md sql-reference/table-functions/jdbc.md -sql_reference/table_functions/merge.md sql-reference/table-functions/merge.md -sql_reference/table_functions/mysql.md sql-reference/table-functions/mysql.md -sql_reference/table_functions/numbers.md sql-reference/table-functions/numbers.md -sql_reference/table_functions/odbc.md sql-reference/table-functions/odbc.md -sql_reference/table_functions/remote.md sql-reference/table-functions/remote.md -sql_reference/table_functions/url.md sql-reference/table-functions/url.md -system_tables.md operations/system-tables.md -system_tables/system.asynchronous_metrics.md operations/system-tables.md -system_tables/system.clusters.md operations/system-tables.md -system_tables/system.columns.md operations/system-tables.md -system_tables/system.databases.md operations/system-tables.md -system_tables/system.dictionaries.md operations/system-tables.md -system_tables/system.events.md operations/system-tables.md -system_tables/system.functions.md operations/system-tables.md -system_tables/system.merges.md operations/system-tables.md -system_tables/system.metrics.md operations/system-tables.md -system_tables/system.numbers.md operations/system-tables.md -system_tables/system.numbers_mt.md operations/system-tables.md -system_tables/system.one.md operations/system-tables.md -system_tables/system.parts.md operations/system-tables.md -system_tables/system.processes.md operations/system-tables.md -system_tables/system.replicas.md operations/system-tables.md -system_tables/system.settings.md operations/system-tables.md -system_tables/system.tables.md operations/system-tables.md -system_tables/system.zookeeper.md operations/system-tables.md -table_engines.md operations/table-engines.md -table_engines/aggregatingmergetree.md operations/table-engines/aggregatingmergetree.md -table_engines/buffer.md operations/table-engines/buffer.md -table_engines/collapsingmergetree.md operations/table-engines/collapsingmergetree.md -table_engines/custom_partitioning_key.md operations/table-engines/custom-partitioning-key.md -table_engines/dictionary.md operations/table-engines/dictionary.md -table_engines/distributed.md operations/table-engines/distributed.md -table_engines/external_data.md operations/table-engines/external-data.md -table_engines/file.md operations/table-engines/file.md -table_engines/graphitemergetree.md operations/table-engines/graphitemergetree.md -table_engines/index.md operations/table-engines/index.md -table_engines/join.md operations/table-engines/join.md -table_engines/kafka.md operations/table-engines/kafka.md -table_engines/log.md operations/table-engines/log.md -table_engines/materializedview.md operations/table-engines/materializedview.md -table_engines/memory.md operations/table-engines/memory.md -table_engines/merge.md operations/table-engines/merge.md -table_engines/mergetree.md operations/table-engines/mergetree.md -table_engines/mysql.md operations/table-engines/mysql.md -table_engines/null.md operations/table-engines/null.md -table_engines/replacingmergetree.md operations/table-engines/replacingmergetree.md -table_engines/replication.md operations/table-engines/replication.md -table_engines/set.md operations/table-engines/set.md -table_engines/summingmergetree.md operations/table-engines/summingmergetree.md -table_engines/tinylog.md operations/table-engines/tinylog.md -table_engines/view.md operations/table-engines/view.md -table_functions/file.md query-language/table-functions/file.md -table_functions/index.md query-language/table-functions/index.md -table_functions/merge.md query-language/table-functions/merge.md -table_functions/numbers.md query-language/table-functions/numbers.md -table_functions/remote.md query-language/table-functions/remote.md -utils.md operations/utils.md -utils/clickhouse-copier.md operations/utils/clickhouse-copier.md -utils/clickhouse-local.md operations/utils/clickhouse-local.md -whats_new/changelog/2017.md whats-new/changelog/2017.md -whats_new/changelog/2018.md whats-new/changelog/2018.md -whats_new/changelog/2019.md whats-new/changelog/2019.md -whats_new/changelog/index.md whats-new/changelog/index.md -whats_new/index.md whats-new/index.md -whats_new/roadmap.md whats-new/roadmap.md -whats_new/security_changelog.md whats-new/security-changelog.md +The redirects from this file were moved to the Docusaurus configuration file. +If you need to add a redirect please either open a PR in +https://github.com/clickhouse/clickhouse-docs adding the redirect to +https://github.com/ClickHouse/clickhouse-docs/blob/main/docusaurus.config.js +or open an issue in the same repo and provide the old URL and new URL to have +the redirect added. From d0bb9850614e9be96c489ecf3b8c3ad788aab5d3 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 22 Jun 2023 14:33:25 +0200 Subject: [PATCH 0645/1997] fix other classes based on SinkToStorage --- src/Storages/HDFS/StorageHDFS.cpp | 21 +++++++++++++++++---- src/Storages/StorageAzureBlob.cpp | 21 +++++++++++++++++---- src/Storages/StorageFile.cpp | 21 +++++++++++++++++---- src/Storages/StorageURL.cpp | 21 +++++++++++++++++---- src/Storages/StorageURL.h | 1 + 5 files changed, 69 insertions(+), 16 deletions(-) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 7a9d996e3c2..a41c65cdb2e 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -512,10 +512,18 @@ public: cancelled = true; } - void onException(std::exception_ptr /* exception */) override + void onException(std::exception_ptr exception) override { std::lock_guard lock(cancel_mutex); - finalize(); + try + { + std::rethrow_exception(exception); + } + catch (...) + { + /// An exception context is needed to proper delete write buffers without finalization + release(); + } } void onFinish() override @@ -540,12 +548,17 @@ private: catch (...) { /// Stop ParallelFormattingOutputFormat correctly. - writer.reset(); - write_buf->finalize(); + release(); throw; } } + void release() + { + writer.reset(); + write_buf->finalize(); + } + std::unique_ptr write_buf; OutputFormatPtr writer; std::mutex cancel_mutex; diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 65af2ed6d6e..133dbb6740f 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -489,10 +489,18 @@ public: cancelled = true; } - void onException(std::exception_ptr /* exception */) override + void onException(std::exception_ptr exception) override { std::lock_guard lock(cancel_mutex); - finalize(); + try + { + std::rethrow_exception(exception); + } + catch (...) + { + /// An exception context is needed to proper delete write buffers without finalization + release(); + } } void onFinish() override @@ -516,12 +524,17 @@ private: catch (...) { /// Stop ParallelFormattingOutputFormat correctly. - writer.reset(); - write_buf->finalize(); + release(); throw; } } + void release() + { + writer.reset(); + write_buf->finalize(); + } + Block sample_block; std::optional format_settings; std::unique_ptr write_buf; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 50aa47ffca9..ff67272e542 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1031,10 +1031,18 @@ public: cancelled = true; } - void onException(std::exception_ptr /* exception */) override + void onException(std::exception_ptr exception) override { std::lock_guard cancel_lock(cancel_mutex); - finalize(); + try + { + std::rethrow_exception(exception); + } + catch (...) + { + /// An exception context is needed to proper delete write buffers without finalization + release(); + } } void onFinish() override @@ -1058,12 +1066,17 @@ private: catch (...) { /// Stop ParallelFormattingOutputFormat correctly. - writer.reset(); - write_buf->finalize(); + release(); throw; } } + void release() + { + writer.reset(); + write_buf->finalize(); + } + StorageMetadataPtr metadata_snapshot; String table_name_for_log; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 651b50518d2..20071afb793 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -480,10 +480,18 @@ void StorageURLSink::onCancel() cancelled = true; } -void StorageURLSink::onException(std::exception_ptr /* exception */) +void StorageURLSink::onException(std::exception_ptr exception) { std::lock_guard lock(cancel_mutex); - finalize(); + try + { + std::rethrow_exception(exception); + } + catch (...) + { + /// An exception context is needed to proper delete write buffers without finalization + release(); + } } void StorageURLSink::onFinish() @@ -506,12 +514,17 @@ void StorageURLSink::finalize() catch (...) { /// Stop ParallelFormattingOutputFormat correctly. - writer.reset(); - write_buf->finalize(); + release(); throw; } } +void StorageURLSink::release() +{ + writer.reset(); + write_buf->finalize(); +} + class PartitionedStorageURLSink : public PartitionedSink { public: diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index c4a5ce1aa7b..345f813dd7c 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -241,6 +241,7 @@ public: private: void finalize(); + void release(); std::unique_ptr write_buf; OutputFormatPtr writer; std::mutex cancel_mutex; From 3c7d749d2ce80373fe7839a08fc3174e999b6a69 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 22 Jun 2023 14:47:04 +0200 Subject: [PATCH 0646/1997] Added check of StorageView before checking parameterized view to fix seg fault when materialized view is used with brackets --- src/Interpreters/Context.cpp | 2 +- tests/queries/0_stateless/02428_parameterized_view.reference | 1 + tests/queries/0_stateless/02428_parameterized_view.sh | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 995e78d8f0b..106264320b2 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1476,7 +1476,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const StoragePtr table = DatabaseCatalog::instance().tryGetTable({database_name, table_name}, getQueryContext()); if (table) { - if (table.get()->isView() && table->as()->isParameterizedView()) + if (table.get()->isView() && table->as() && table->as()->isParameterizedView()) { function->prefer_subquery_to_function_formatting = true; return table; diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference index b73c52c478f..422fdaa4983 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.reference +++ b/tests/queries/0_stateless/02428_parameterized_view.reference @@ -40,3 +40,4 @@ ERROR 10 10 1 +ERROR diff --git a/tests/queries/0_stateless/02428_parameterized_view.sh b/tests/queries/0_stateless/02428_parameterized_view.sh index 3abfbfc22fc..ad9c672f4c5 100755 --- a/tests/queries/0_stateless/02428_parameterized_view.sh +++ b/tests/queries/0_stateless/02428_parameterized_view.sh @@ -112,7 +112,10 @@ $CLICKHOUSE_CLIENT -q "CREATE VIEW 02428_trace_view AS WITH {trace_id:String} A FROM 02428_otel_traces" $CLICKHOUSE_CLIENT -q "SELECT * FROM 02428_trace_view(trace_id='1')" +$CLICKHOUSE_CLIENT -q "CREATE MATERIALIZED VIEW test_02428_mv1 ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/materialized_view', 'r1') ORDER BY Name AS SELECT * FROM test_02428_Catalog;" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02428_mv1(test)" 2>&1 | grep -Fq "UNKNOWN_FUNCTION" && echo 'ERROR' || echo 'OK' +$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_mv1" $CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv1" $CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv2" $CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv3" From 26ba3d8f13a078d7b41be00ca9c687e59f04774a Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Thu, 22 Jun 2023 10:01:22 -0300 Subject: [PATCH 0647/1997] test for #36894 --- ...02302_lc_nullable_string_insert_as_number.reference | 1 + .../02302_lc_nullable_string_insert_as_number.sql | 10 ++++++++++ 2 files changed, 11 insertions(+) create mode 100644 tests/queries/0_stateless/02302_lc_nullable_string_insert_as_number.reference create mode 100644 tests/queries/0_stateless/02302_lc_nullable_string_insert_as_number.sql diff --git a/tests/queries/0_stateless/02302_lc_nullable_string_insert_as_number.reference b/tests/queries/0_stateless/02302_lc_nullable_string_insert_as_number.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02302_lc_nullable_string_insert_as_number.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02302_lc_nullable_string_insert_as_number.sql b/tests/queries/0_stateless/02302_lc_nullable_string_insert_as_number.sql new file mode 100644 index 00000000000..89b31e2b2b0 --- /dev/null +++ b/tests/queries/0_stateless/02302_lc_nullable_string_insert_as_number.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS lc_nullable_string; + +CREATE TABLE lc_nullable_string(`c1` LowCardinality(Nullable(String)) DEFAULT CAST(NULL, 'LowCardinality(Nullable(String))')) +ENGINE = Memory; + +INSERT INTO lc_nullable_string (c1) FORMAT Values (0); + +SELECT * FROM lc_nullable_string; + +DROP TABLE lc_nullable_string; From b1d37fbc6bbeac4613c4e58555a56b1092f4018b Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Thu, 22 Jun 2023 10:23:32 -0300 Subject: [PATCH 0648/1997] test for #36894 --- .../02302_lc_nullable_string_insert_as_number.reference | 1 + .../0_stateless/02302_lc_nullable_string_insert_as_number.sql | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02302_lc_nullable_string_insert_as_number.reference b/tests/queries/0_stateless/02302_lc_nullable_string_insert_as_number.reference index 573541ac970..0d66ea1aee9 100644 --- a/tests/queries/0_stateless/02302_lc_nullable_string_insert_as_number.reference +++ b/tests/queries/0_stateless/02302_lc_nullable_string_insert_as_number.reference @@ -1 +1,2 @@ 0 +1 diff --git a/tests/queries/0_stateless/02302_lc_nullable_string_insert_as_number.sql b/tests/queries/0_stateless/02302_lc_nullable_string_insert_as_number.sql index 89b31e2b2b0..9859c1559d5 100644 --- a/tests/queries/0_stateless/02302_lc_nullable_string_insert_as_number.sql +++ b/tests/queries/0_stateless/02302_lc_nullable_string_insert_as_number.sql @@ -4,7 +4,8 @@ CREATE TABLE lc_nullable_string(`c1` LowCardinality(Nullable(String)) DEFAULT CA ENGINE = Memory; INSERT INTO lc_nullable_string (c1) FORMAT Values (0); +INSERT INTO lc_nullable_string (c1) Values (1); -SELECT * FROM lc_nullable_string; +SELECT * FROM lc_nullable_string ORDER BY c1; DROP TABLE lc_nullable_string; From 22e49748b5ff129422fca4ff2da92b26e38d3906 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 22 Jun 2023 14:23:04 +0000 Subject: [PATCH 0649/1997] Cleanup. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 4 +- src/Analyzer/SetUtils.cpp | 13 +- src/Analyzer/SetUtils.h | 10 +- src/Functions/in.cpp | 9 +- src/Interpreters/ActionsVisitor.cpp | 37 +- .../ClusterProxy/SelectStreamFactory.cpp | 8 +- .../ClusterProxy/SelectStreamFactory.h | 4 +- .../ClusterProxy/executeQuery.cpp | 1 - src/Interpreters/ExpressionAnalyzer.cpp | 119 ----- src/Interpreters/ExpressionAnalyzer.h | 8 - src/Interpreters/GlobalSubqueriesVisitor.h | 48 +- src/Interpreters/InterpreterSelectQuery.cpp | 3 - src/Interpreters/PreparedSets.cpp | 493 ++++++------------ src/Interpreters/PreparedSets.h | 65 ++- src/Interpreters/Set.cpp | 13 +- src/Interpreters/Set.h | 22 +- src/Interpreters/SetKeys.h | 18 + src/Planner/CollectSets.cpp | 18 +- src/Planner/CollectSets.h | 2 +- src/Planner/Planner.cpp | 9 +- src/Planner/PlannerActionsVisitor.cpp | 6 +- src/Planner/PlannerContext.cpp | 46 -- src/Planner/PlannerContext.h | 72 +-- src/Planner/PlannerJoinTree.cpp | 21 +- src/Planner/Utils.cpp | 3 +- src/Planner/Utils.h | 1 - .../CreateSetAndFilterOnTheFlyStep.cpp | 2 +- src/Processors/QueryPlan/CreatingSetsStep.cpp | 18 +- src/Processors/QueryPlan/CreatingSetsStep.h | 10 +- .../QueryPlan/DistributedCreateLocalPlan.cpp | 3 +- .../QueryPlan/DistributedCreateLocalPlan.h | 1 - .../QueryPlan/ReadFromMergeTree.cpp | 27 - src/Processors/QueryPlan/ReadFromRemote.cpp | 6 +- src/Processors/QueryPlan/ReadFromRemote.h | 2 - src/Storages/KVStorageUtils.cpp | 1 - src/Storages/MergeTree/KeyCondition.cpp | 33 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 4 +- .../MergeTreeIndexConditionBloomFilter.cpp | 44 +- .../MergeTree/MergeTreeIndexFullText.cpp | 9 +- .../MergeTree/MergeTreeIndexInverted.cpp | 14 +- src/Storages/MergeTree/PartitionPruner.cpp | 9 - src/Storages/StorageDistributed.cpp | 5 +- src/Storages/StorageMergeTree.cpp | 3 +- src/Storages/StorageReplicatedMergeTree.cpp | 3 +- src/Storages/StorageSet.cpp | 4 +- .../System/StorageSystemZooKeeper.cpp | 10 +- src/Storages/VirtualColumnUtils.cpp | 20 - src/Storages/buildQueryTreeForShard.cpp | 17 +- 48 files changed, 343 insertions(+), 955 deletions(-) create mode 100644 src/Interpreters/SetKeys.h diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 98f40dc4c15..163092f1b7f 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -5211,14 +5211,14 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi const auto & settings = scope.context->getSettingsRef(); - auto result_block = makeSetForConstantValue(first_argument_constant_type, + auto result_block = getSetElementsForConstantValue(first_argument_constant_type, second_argument_constant_literal, second_argument_constant_type, settings.transform_null_in); SizeLimits size_limits_for_set = {settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode}; - auto set = std::make_shared(size_limits_for_set, false /*fill_set_elements*/, 0, settings.transform_null_in); + auto set = std::make_shared(size_limits_for_set, 0, settings.transform_null_in); set->setHeader(result_block.cloneEmpty().getColumnsWithTypeAndName()); set->insertFromBlock(result_block.getColumnsWithTypeAndName()); diff --git a/src/Analyzer/SetUtils.cpp b/src/Analyzer/SetUtils.cpp index 2fb05449714..15eec16e899 100644 --- a/src/Analyzer/SetUtils.cpp +++ b/src/Analyzer/SetUtils.cpp @@ -118,7 +118,7 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes & } -Block makeSetForConstantValue(const DataTypePtr & expression_type, const Field & value, const DataTypePtr & value_type, bool transform_null_in) +Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const Field & value, const DataTypePtr & value_type, bool transform_null_in) { DataTypes set_element_types = {expression_type}; const auto * lhs_tuple_type = typeid_cast(expression_type.get()); @@ -135,9 +135,6 @@ Block makeSetForConstantValue(const DataTypePtr & expression_type, const Field & size_t lhs_type_depth = getCompoundTypeDepth(*expression_type); size_t rhs_type_depth = getCompoundTypeDepth(*value_type); - // SizeLimits size_limits_for_set = {settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode}; - // bool transform_null_in = settings.transform_null_in; - Block result_block; if (lhs_type_depth == rhs_type_depth) @@ -171,14 +168,6 @@ Block makeSetForConstantValue(const DataTypePtr & expression_type, const Field & } return result_block; - - // auto set = std::make_shared(size_limits_for_set, true /*fill_set_elements*/, tranform_null_in); - - // set->setHeader(result_block.cloneEmpty().getColumnsWithTypeAndName()); - // set->insertFromBlock(result_block.getColumnsWithTypeAndName()); - // set->finishInsert(); - - // return set; } } diff --git a/src/Analyzer/SetUtils.h b/src/Analyzer/SetUtils.h index fdeaa4a3c48..c35b45dce59 100644 --- a/src/Analyzer/SetUtils.h +++ b/src/Analyzer/SetUtils.h @@ -12,18 +12,12 @@ namespace DB class Set; using SetPtr = std::shared_ptr; -/** Make set for constant part of IN subquery. +/** Get set elements for constant part of IN subquery. * Throws exception if parameters are not valid for IN function. * * Example: SELECT id FROM test_table WHERE id IN (1, 2, 3, 4); * Example: SELECT id FROM test_table WHERE id IN ((1, 2), (3, 4)); - * - * @param expression_type - type of first argument of function IN. - * @param value - constant value of second argument of function IN. - * @param value_type - type of second argument of function IN. - * - * @return SetPtr for constant value. */ -Block makeSetForConstantValue(const DataTypePtr & expression_type, const Field & value, const DataTypePtr & value_type, bool transform_null_in); +Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const Field & value, const DataTypePtr & value_type, bool transform_null_in); } diff --git a/src/Functions/in.cpp b/src/Functions/in.cpp index c19d1e72003..9045ba677f2 100644 --- a/src/Functions/in.cpp +++ b/src/Functions/in.cpp @@ -123,9 +123,12 @@ public: } auto future_set = column_set->getData(); - auto set = future_set ? future_set->get() : nullptr; - if (!future_set || !set) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Not-ready Set passed as the second argument for function '{}'", getName()); + if (!future_set) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No Set is passed as the second argument for function '{}'", getName()); + + auto set = future_set->get(); + if (!set) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not-ready Set is passed as the second argument for function '{}'", getName()); auto set_types = set->getDataTypes(); diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 7c3af4ce691..bf78868463a 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -445,19 +445,16 @@ FutureSetPtr makeExplicitSet( if (left_tuple_type && left_tuple_type->getElements().size() != 1) set_element_types = left_tuple_type->getElements(); - for (auto & element_type : set_element_types) - if (const auto * low_cardinality_type = typeid_cast(element_type.get())) - element_type = low_cardinality_type->getDictionaryType(); - - // if (!set_element_types.empty()) - // std::cerr << "========== " << set_element_types[0]->getName() << std::endl; - auto set_element_keys = Set::getElementTypes(set_element_types, context->getSettingsRef().transform_null_in); auto set_key = right_arg->getTreeHash(); if (auto set = prepared_sets.findTuple(set_key, set_element_keys)) return set; /// Already prepared. + for (auto & element_type : set_element_types) + if (const auto * low_cardinality_type = typeid_cast(element_type.get())) + element_type = low_cardinality_type->getDictionaryType(); + Block block; const auto & right_arg_func = std::dynamic_pointer_cast(right_arg); if (right_arg_func && (right_arg_func->name == "tuple" || right_arg_func->name == "array")) @@ -1400,11 +1397,21 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool { if (no_subqueries) return {}; - //std::cerr << queryToString(right_in_operand) << std::endl; PreparedSets::Hash set_key; if (data.getContext()->getSettingsRef().allow_experimental_analyzer && !identifier) { + /// Here we can be only from mutation interpreter. Normal selects with analyzed use other interpreter. + /// This is a hacky way to allow reusing cache for prepared sets. + /// + /// Mutation is executed in two stages: + /// * first, query 'SELECT count() FROM table WHERE ...' is executed to get the set of affected parts (using analyzer) + /// * second, every part is mutated separately, where plan is build "manually", usign this code as well + /// To share the Set in between first and second stage, we should use the same hash. + /// New analyzer is uses a hash from query tree, so here we also build a query tree. + /// + /// Note : this code can be safely removed, but the test 02581_share_big_sets will be too slow (and fail by timeout). + /// Note : we should use new analyzer for mutations and remove this hack. InterpreterSelectQueryAnalyzer interpreter(right_in_operand, data.getContext(), SelectQueryOptions().analyze(true).subquery()); const auto & query_tree = interpreter.getQueryTree(); if (auto * query_node = query_tree->as()) @@ -1414,12 +1421,6 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool else set_key = right_in_operand->getTreeHash(); - // std::cerr << set_key.toString() << std::endl; - // std::cerr << data.prepared_sets->getSets().size() << std::endl; - // std::cerr << reinterpret_cast(data.prepared_sets.get()) << std::endl; - // for (const auto & [k, v] : data.prepared_sets->getSets()) - // std::cerr << "... " << k.toString(); - if (auto set = data.prepared_sets->findSubquery(set_key)) return set; @@ -1440,12 +1441,12 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool if (!data.getContext()->isGlobalContext()) { - // std::cerr << ".... checking for " << identifier->getColumnName() << std::endl; + /// If we are reading from storage, it can be an external table which is used for GLOBAL IN. + /// Here, we take FutureSet which is used to build external table. + /// It will be used if set is useful for primary key. During PK analysis + /// temporary table is not filled yet, so we need to fill it first. if (auto tmp_table = data.getContext()->findExternalTable(identifier->getColumnName())) - { external_table_set = tmp_table->future_set; - // std::cerr << "Found " << reinterpret_cast(tmp_table.get()) << " " << reinterpret_cast(external_table_set.get()) << std::endl; - } } } diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 0f8a725e144..0cf3f360994 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -92,13 +92,11 @@ SelectStreamFactory::SelectStreamFactory( const Block & header_, const ColumnsDescriptionByShardNum & objects_by_shard_, const StorageSnapshotPtr & storage_snapshot_, - QueryProcessingStage::Enum processed_stage_, - PreparedSetsPtr prepared_sets_) + QueryProcessingStage::Enum processed_stage_) : header(header_), objects_by_shard(objects_by_shard_), storage_snapshot(storage_snapshot_), - processed_stage(processed_stage_), - prepared_sets(std::move(prepared_sets_)) + processed_stage(processed_stage_) { } @@ -119,7 +117,7 @@ void SelectStreamFactory::createForShard( auto emplace_local_stream = [&]() { local_plans.emplace_back(createLocalPlan( - query_ast, header, context, processed_stage, prepared_sets, shard_info.shard_num, shard_count, /*replica_num=*/0, /*replica_count=*/0, /*coordinator=*/nullptr)); + query_ast, header, context, processed_stage, shard_info.shard_num, shard_count, /*replica_num=*/0, /*replica_count=*/0, /*coordinator=*/nullptr)); }; auto emplace_remote_stream = [&](bool lazy = false, time_t local_delay = 0) diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h index b19012ddba6..030c0b77dd5 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h @@ -69,8 +69,7 @@ public: const Block & header_, const ColumnsDescriptionByShardNum & objects_by_shard_, const StorageSnapshotPtr & storage_snapshot_, - QueryProcessingStage::Enum processed_stage_, - PreparedSetsPtr prepared_sets_); + QueryProcessingStage::Enum processed_stage_); void createForShard( const Cluster::ShardInfo & shard_info, @@ -95,7 +94,6 @@ public: const ColumnsDescriptionByShardNum objects_by_shard; const StorageSnapshotPtr storage_snapshot; QueryProcessingStage::Enum processed_stage; - PreparedSetsPtr prepared_sets; }; } diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index a9cf3d55392..e2f1dfe8ba7 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -226,7 +226,6 @@ void executeQuery( std::move(remote_shards), header, processed_stage, - stream_factory.prepared_sets, main_table, table_func_ptr, new_context, diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index b05563cf29f..f27d23e8e94 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -450,77 +450,6 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global, b } -// void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name, const SelectQueryOptions & query_options) -// { -// if (!prepared_sets) -// return; - -// auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name); - -// if (prepared_sets->getFuture(set_key).isValid()) -// return; /// Already prepared. - -// if (auto set_ptr_from_storage_set = isPlainStorageSetInSubquery(subquery_or_table_name)) -// { -// prepared_sets->set(set_key, set_ptr_from_storage_set); -// return; -// } - -// auto build_set = [&] () -> SetPtr -// { -// LOG_TRACE(getLogger(), "Building set, key: {}", set_key.toString()); - -// auto interpreter_subquery = interpretSubquery(subquery_or_table_name, getContext(), {}, query_options); -// auto io = interpreter_subquery->execute(); -// PullingAsyncPipelineExecutor executor(io.pipeline); - -// SetPtr set = std::make_shared(settings.size_limits_for_set_used_with_index, true, getContext()->getSettingsRef().transform_null_in); -// set->setHeader(executor.getHeader().getColumnsWithTypeAndName()); - -// Block block; -// while (executor.pull(block)) -// { -// if (block.rows() == 0) -// continue; - -// /// If the limits have been exceeded, give up and let the default subquery processing actions take place. -// if (!set->insertFromBlock(block.getColumnsWithTypeAndName())) -// return nullptr; -// } - -// set->finishInsert(); - -// return set; -// }; - -// SetPtr set; - -// auto set_cache = getContext()->getPreparedSetsCache(); -// if (set_cache) -// { -// auto from_cache = set_cache->findOrPromiseToBuild(set_key.toString()); -// if (from_cache.index() == 0) -// { -// set = build_set(); -// std::get<0>(from_cache).set_value(set); -// } -// else -// { -// LOG_TRACE(getLogger(), "Waiting for set, key: {}", set_key.toString()); -// set = std::get<1>(from_cache).get(); -// } -// } -// else -// { -// set = build_set(); -// } - -// if (!set) -// return; - -// prepared_sets->set(set_key, std::move(set)); -// } - SetPtr ExpressionAnalyzer::isPlainStorageSetInSubquery(const ASTPtr & subquery_or_table_name) { const auto * table = subquery_or_table_name->as(); @@ -534,54 +463,6 @@ SetPtr ExpressionAnalyzer::isPlainStorageSetInSubquery(const ASTPtr & subquery_o return storage_set->getSet(); } - -/// Performance optimization for IN() if storage supports it. -// void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) -// { -// if (!node || !storage() || !storage()->supportsIndexForIn()) -// return; - -// for (auto & child : node->children) -// { -// /// Don't descend into subqueries. -// if (child->as()) -// continue; - -// /// Don't descend into lambda functions -// const auto * func = child->as(); -// if (func && func->name == "lambda") -// continue; - -// makeSetsForIndex(child); -// } - -// const auto * func = node->as(); -// if (func && functionIsInOrGlobalInOperator(func->name)) -// { -// const IAST & args = *func->arguments; -// const ASTPtr & left_in_operand = args.children.at(0); - -// if (storage()->mayBenefitFromIndexForIn(left_in_operand, getContext(), metadata_snapshot)) -// { -// const ASTPtr & arg = args.children.at(1); -// if (arg->as() || arg->as()) -// { -// if (settings.use_index_for_in_with_subqueries) -// tryMakeSetForIndexFromSubquery(arg, query_options); -// } -// else -// { -// auto temp_actions = std::make_shared(columns_after_join); -// getRootActions(left_in_operand, true, temp_actions); - -// if (prepared_sets && temp_actions->tryFindInOutputs(left_in_operand->getColumnName())) -// makeExplicitSet(func, *temp_actions, true, getContext(), settings.size_limits_for_set, *prepared_sets); -// } -// } -// } -// } - - void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts) { LogAST log; diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index e4a9e96c517..271c3943afc 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -141,11 +141,6 @@ public: void makeWindowDescriptionFromAST(const Context & context, const WindowDescriptions & existing_descriptions, WindowDescription & desc, const IAST * ast); void makeWindowDescriptions(ActionsDAGPtr actions); - /** Create Set from a subquery or a table expression in the query. The created set is suitable for using the index. - * The set will not be created if its size hits the limit. - */ - // void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name, const SelectQueryOptions & query_options = {}); - /** Checks if subquery is not a plain StorageSet. * Because while making set we will read data from StorageSet which is not allowed. * Returns valid SetPtr from StorageSet if the latter is used after IN or nullptr otherwise. @@ -363,9 +358,6 @@ public: /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases. ActionsDAGPtr appendProjectResult(ExpressionActionsChain & chain) const; - /// Create Set-s that we make from IN section to use index on them. - // void makeSetsForIndex(const ASTPtr & node); - private: StorageMetadataPtr metadata_snapshot; /// If non-empty, ignore all expressions not from this list. diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 8c784d3c2ff..5b633fee9b6 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -69,7 +69,6 @@ public: void addExternalStorage(ASTPtr & ast, const Names & required_columns, bool set_alias = false) { - // std::cerr << "=============== addExternalStorage is remote " << is_remote << std::endl; /// With nondistributed queries, creating temporary tables does not make sense. if (!is_remote) return; @@ -163,49 +162,20 @@ public: nullptr, /*create_for_global_subquery*/ true); StoragePtr external_storage = external_storage_holder->getTable(); - - // std::cerr << "......... adding external table " << external_table_name << std::endl; - external_tables.emplace(external_table_name, external_storage_holder); - /// We need to materialize external tables immediately because reading from distributed - /// tables might generate local plans which can refer to external tables during index - /// analysis. It's too late to populate the external table via CreatingSetsTransform. - // if (is_explain) - // { - // /// Do not materialize external tables if it's explain statement. - // } - // else if (getContext()->getSettingsRef().use_index_for_in_with_subqueries) - // { - // auto external_table = external_storage_holder->getTable(); - // auto table_out = external_table->write({}, external_table->getInMemoryMetadataPtr(), getContext()); - // auto io = interpreter->execute(); - // io.pipeline.complete(std::move(table_out)); - // CompletedPipelineExecutor executor(io.pipeline); - // executor.execute(); - // } - // else + auto set_key = database_and_table_name->getTreeHash(); + + if (!prepared_sets->findSubquery(set_key)) { - // auto & subquery_for_set = prepared_sets->getSubquery(external_table_name); - // subquery_for_set.createSource(*interpreter, external_storage); - auto set_key = database_and_table_name->getTreeHash(); + std::unique_ptr source = std::make_unique(); + interpreter->buildQueryPlan(*source); - // std::cerr << "====== Adding key " << set_key.toString() << std::endl; - - if (!prepared_sets->findSubquery(set_key)) - { - std::unique_ptr source = std::make_unique(); - interpreter->buildQueryPlan(*source); - - //std::cerr << reinterpret_cast(prepared_sets.get()) << std::endl; - auto future_set = prepared_sets->addFromSubquery(set_key, std::move(source), std::move(external_storage), nullptr, getContext()->getSettingsRef()); - // std::cerr << "... Future set " << reinterpret_cast(external_storage_holder.get()) << " " << reinterpret_cast(future_set.get()) << std::endl; - external_storage_holder->future_set = std::move(future_set); - } - else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Set is already created for GLOBAL IN"); - //prepared_sets->addStorageToSubquery(key, std::move(external_storage)); + auto future_set = prepared_sets->addFromSubquery(set_key, std::move(source), std::move(external_storage), nullptr, getContext()->getSettingsRef()); + external_storage_holder->future_set = std::move(future_set); } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Set is already created for GLOBAL IN"); /** NOTE If it was written IN tmp_table - the existing temporary (but not external) table, * then a new temporary table will be created (for example, _data1), diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 85c3457c6b9..6ea15312ec4 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -953,10 +953,7 @@ Block InterpreterSelectQuery::getSampleBlockImpl() if (storage && !options.only_analyze) { - // query_analyzer->makeSetsForIndex(select_query.where()); - // query_analyzer->makeSetsForIndex(select_query.prewhere()); query_info.prepared_sets = query_analyzer->getPreparedSets(); - from_stage = storage->getQueryProcessingStage(context, options.to_stage, storage_snapshot, query_info); } diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index 2aa3e02c713..428ef873bc5 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -25,20 +25,25 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -class FutureSetFromTuple final : public FutureSet +static SizeLimits getSizeLimitsForSet(const Settings & settings) { -public: - FutureSetFromTuple(Block block, const Settings & settings); + return SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode); +} - SetPtr get() const override { return set; } - SetPtr buildOrderedSetInplace(const ContextPtr & context) override; +static bool equals(const DataTypes & lhs, const DataTypes & rhs) +{ + size_t size = lhs.size(); + if (size != rhs.size()) + return false; - const DataTypes & getTypes() const override { return set->getElementsTypes(); } + for (size_t i = 0; i < size; ++i) + { + if (!lhs[i]->equals(*rhs[i])) + return false; + } -private: - SetPtr set; - SetKeyColumns set_key_columns; -}; + return true; +} FutureSetFromStorage::FutureSetFromStorage(SetPtr set_) : set(std::move(set_)) {} @@ -51,42 +56,153 @@ SetPtr FutureSetFromStorage::buildOrderedSetInplace(const ContextPtr &) } -// PreparedSetKey PreparedSetKey::forLiteral(Hash hash, DataTypes types_) -// { -// /// Remove LowCardinality types from type list because Set doesn't support LowCardinality keys now, -// /// just converts LowCardinality to ordinary types. -// for (auto & type : types_) -// type = recursiveRemoveLowCardinality(type); +FutureSetFromTuple::FutureSetFromTuple(Block block, const Settings & settings) +{ + auto size_limits = getSizeLimitsForSet(settings); + set = std::make_shared(size_limits, settings.use_index_for_in_with_subqueries_max_values, settings.transform_null_in); + set->setHeader(block.cloneEmpty().getColumnsWithTypeAndName()); -// PreparedSetKey key; -// key.ast_hash = hash; -// key.types = std::move(types_); -// return key; -// } + Columns columns; + columns.reserve(block.columns()); + for (const auto & column : block) + columns.emplace_back(column.column); -// PreparedSetKey PreparedSetKey::forSubquery(Hash hash) -// { -// PreparedSetKey key; -// key.ast_hash = hash; -// return key; -// } + set_key_columns.filter = ColumnUInt8::create(block.rows()); -// bool PreparedSetKey::operator==(const PreparedSetKey & other) const -// { -// if (ast_hash != other.ast_hash) -// return false; + set->insertFromColumns(columns, set_key_columns); + set->finishInsert(); +} -// if (types.size() != other.types.size()) -// return false; +const DataTypes & FutureSetFromTuple::getTypes() const { return set->getElementsTypes(); } -// for (size_t i = 0; i < types.size(); ++i) -// { -// if (!types[i]->equals(*other.types[i])) -// return false; -// } +SetPtr FutureSetFromTuple::buildOrderedSetInplace(const ContextPtr & context) +{ + if (set->hasExplicitSetElements()) + return set; + + const auto & settings = context->getSettingsRef(); + size_t max_values = settings.use_index_for_in_with_subqueries_max_values; + bool too_many_values = max_values && max_values < set->getTotalRowCount(); + if (!too_many_values) + { + set->fillSetElements(); + set->appendSetElements(set_key_columns); + } + + return set; +} + + +FutureSetFromSubquery::FutureSetFromSubquery( + String key, + std::unique_ptr source_, + StoragePtr external_table_, + FutureSetPtr external_table_set_, + const Settings & settings) + : external_table(std::move(external_table_)) + , external_table_set(std::move(external_table_set_)) + , source(std::move(source_)) +{ + set_and_key = std::make_shared(); + set_and_key->key = std::move(key); + + auto size_limits = getSizeLimitsForSet(settings); + set_and_key->set = std::make_shared(size_limits, settings.use_index_for_in_with_subqueries_max_values, settings.transform_null_in); + set_and_key->set->setHeader(source->getCurrentDataStream().header.getColumnsWithTypeAndName()); +} + +FutureSetFromSubquery::FutureSetFromSubquery( + String key, + QueryTreeNodePtr query_tree_, + const Settings & settings) + : query_tree(std::move(query_tree_)) +{ + set_and_key = std::make_shared(); + set_and_key->key = std::move(key); + + auto size_limits = getSizeLimitsForSet(settings); + set_and_key->set = std::make_shared(size_limits, settings.use_index_for_in_with_subqueries_max_values, settings.transform_null_in); +} + +SetPtr FutureSetFromSubquery::get() const +{ + if (set_and_key->set != nullptr && set_and_key->set->isCreated()) + return set_and_key->set; + + return nullptr; +} + +void FutureSetFromSubquery::setQueryPlan(std::unique_ptr source_) +{ + source = std::move(source_); + set_and_key->set->setHeader(source->getCurrentDataStream().header.getColumnsWithTypeAndName()); +} + +const DataTypes & FutureSetFromSubquery::getTypes() const +{ + return set_and_key->set->getElementsTypes(); +} + +std::unique_ptr FutureSetFromSubquery::build(const ContextPtr & context) +{ + if (set_and_key->set->isCreated()) + return nullptr; + + const auto & settings = context->getSettingsRef(); + + auto plan = std::move(source); + + if (!plan) + return nullptr; + + auto creating_set = std::make_unique( + plan->getCurrentDataStream(), + set_and_key, + external_table, + SizeLimits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode), + context); + creating_set->setStepDescription("Create set for subquery"); + plan->addStep(std::move(creating_set)); + return plan; +} + +SetPtr FutureSetFromSubquery::buildOrderedSetInplace(const ContextPtr & context) +{ + if (!context->getSettingsRef().use_index_for_in_with_subqueries) + return nullptr; + + if (auto set = get()) + { + if (set->hasExplicitSetElements()) + return set; + + return nullptr; + } + + if (external_table_set) + { + auto set = external_table_set->buildOrderedSetInplace(context); + if (set) + return set_and_key->set = set; + } + + auto plan = build(context); + if (!plan) + return nullptr; + + set_and_key->set->fillSetElements(); + auto builder = plan->buildQueryPipeline(QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); + auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder)); + pipeline.complete(std::make_shared(Block())); + + CompletedPipelineExecutor executor(pipeline); + executor.execute(); + + set_and_key->set->checkIsCreated(); + + return set_and_key->set; +} -// return true; -// } String PreparedSets::toString(const PreparedSets::Hash & key, const DataTypes & types) { @@ -108,84 +224,18 @@ String PreparedSets::toString(const PreparedSets::Hash & key, const DataTypes & return buf.str(); } -// SubqueryForSet & PreparedSets::createOrGetSubquery(const String & subquery_id, const PreparedSetKey & key, -// SizeLimits set_size_limit, bool transform_null_in) -// { -// SubqueryForSet & subquery = subqueries[subquery_id]; - -// /// If you already created a Set with the same subquery / table for another ast -// /// In that case several PreparedSetKey would share same subquery and set -// /// Not sure if it's really possible case (maybe for distributed query when set was filled by external table?) -// if (subquery.set.isValid()) -// sets[key] = subquery.set; -// else -// { -// subquery.set_in_progress = std::make_shared(set_size_limit, false, transform_null_in); -// sets[key] = FutureSet(subquery.promise_to_fill_set.get_future()); -// } - -// if (!subquery.set_in_progress) -// { -// subquery.key = key.toString(); -// subquery.set_in_progress = std::make_shared(set_size_limit, false, transform_null_in); -// } - -// return subquery; -// } - -/// If the subquery is not associated with any set, create default-constructed SubqueryForSet. -/// It's aimed to fill external table passed to SubqueryForSet::createSource. -// void PreparedSets::addStorageToSubquery(const String & subquery_id, StoragePtr storage) -// { -// auto it = subqueries.find(subquery_id); -// if (it == subqueries.end()) -// throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find subquery {}", subquery_id); - -// it->second->addStorage(std::move(storage)); -// } -static bool equals(const DataTypes & lhs, const DataTypes & rhs) -{ - size_t size = lhs.size(); - if (size != rhs.size()) - return false; - - for (size_t i = 0; i < size; ++i) - { - if (!lhs[i]->equals(*rhs[i])) - return false; - } - - return true; -} - -static bool tryInsertSet(std::vector> & sets, FutureSetPtr new_set) -{ - auto types = new_set->getTypes(); - for (const auto & set : sets) - if (equals(set->getTypes(), new_set->getTypes())) - return false; - - sets.push_back(std::move(new_set)); - return true; -} - -static FutureSetPtr findSet(const std::vector> & sets, const DataTypes & types) -{ - for (const auto & set : sets) - if (equals(set->getTypes(), types)) - return set; - - return nullptr; -} - FutureSetPtr PreparedSets::addFromTuple(const Hash & key, Block block, const Settings & settings) { auto from_tuple = std::make_shared(std::move(block), settings); + const auto & set_types = from_tuple->getTypes(); auto & sets_by_hash = sets_from_tuple[key]; - if (!tryInsertSet(sets_by_hash, from_tuple)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Duplicate set: {}", toString(key, from_tuple->getTypes())); + auto types = from_tuple->getTypes(); + for (const auto & set : sets_by_hash) + if (equals(set->getTypes(), set_types)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Duplicate set: {}", toString(key, from_tuple->getTypes())); + sets_by_hash.push_back(std::move(from_tuple)); return from_tuple; } @@ -246,7 +296,11 @@ FutureSetPtr PreparedSets::findTuple(const Hash & key, const DataTypes & types) if (it == sets_from_tuple.end()) return nullptr; - return findSet(it->second, types); + for (const auto & set : it->second) + if (equals(set->getTypes(), types)) + return set; + + return nullptr; } std::shared_ptr PreparedSets::findSubquery(const Hash & key) const @@ -267,36 +321,9 @@ std::shared_ptr PreparedSets::findStorage(const Hash & key return it->second; } -// FutureSetPtr PreparedSets::getFuture(const PreparedSetKey & key) const -// { -// auto it = sets.find(key); -// if (it == sets.end()) -// return {}; -// return it->second; -// } - -// SetPtr PreparedSets::get(const PreparedSetKey & key) const -// { -// auto it = sets.find(key); -// if (it == sets.end() || !it->second.isReady()) -// return nullptr; -// return it->second.get(); -// } - -// std::vector PreparedSets::getByTreeHash(IAST::Hash ast_hash) const -// { -// std::vector res; -// for (const auto & it : this->sets) -// { -// if (it.first.ast_hash == ast_hash) -// res.push_back(it.second); -// } -// return res; -// } - -std::vector> PreparedSets::getSubqueries() +PreparedSets::Subqueries PreparedSets::getSubqueries() { - std::vector> res; + PreparedSets::Subqueries res; res.reserve(sets_from_subqueries.size()); for (auto & [_, set] : sets_from_subqueries) res.push_back(set); @@ -304,33 +331,10 @@ std::vector> PreparedSets::getSubqueries( return res; } -// void SubqueryForSet::createSource(InterpreterSelectWithUnionQuery & interpreter, StoragePtr table_) -// { -// source = std::make_unique(); -// interpreter.buildQueryPlan(*source); -// if (table_) -// table = table_; -// } - -// bool SubqueryForSet::hasSource() const -// { -// return source != nullptr; -// } - -// QueryPlanPtr SubqueryForSet::detachSource() -// { -// auto res = std::move(source); -// source = nullptr; -// return res; -// } - - std::variant, SharedSet> PreparedSetsCache::findOrPromiseToBuild(const String & key) { std::lock_guard lock(cache_mutex); - // std::cerr << "PreparedSetsCache::findOrPromiseToBuild " << key << "\n" << StackTrace().toString() << std::endl; - auto it = cache.find(key); if (it != cache.end()) { @@ -347,163 +351,4 @@ std::variant, SharedSet> PreparedSetsCache::findOrPromiseTo return promise_to_fill_set; } -SetPtr FutureSetFromSubquery::buildOrderedSetInplace(const ContextPtr & context) -{ - if (!context->getSettingsRef().use_index_for_in_with_subqueries) - return nullptr; - - if (auto set = get()) - { - if (set->hasExplicitSetElements()) - return set; - - return nullptr; - } - - if (external_table_set) - { - auto set = external_table_set->buildOrderedSetInplace(context); - if (set) - return set_and_key->set = set; - } - - auto plan = buildPlan(context); - if (!plan) - return nullptr; - - set_and_key->set->fillSetElements(); - set_and_key->set->initSetElements(); - auto builder = plan->buildQueryPipeline(QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); - auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder)); - pipeline.complete(std::make_shared(Block())); - - CompletedPipelineExecutor executor(pipeline); - executor.execute(); - - set_and_key->set->checkIsCreated(); - - return set_and_key->set; -} - -SetPtr FutureSetFromSubquery::get() const -{ - if (set_and_key->set != nullptr && set_and_key->set->isCreated()) - return set_and_key->set; - - return nullptr; -} - -std::unique_ptr FutureSetFromSubquery::build(const ContextPtr & context) -{ - return buildPlan(context); -} - -static SizeLimits getSizeLimitsForSet(const Settings & settings) -{ - return SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode); -} - -std::unique_ptr FutureSetFromSubquery::buildPlan(const ContextPtr & context) -{ - if (set_and_key->set->isCreated()) - return nullptr; - - const auto & settings = context->getSettingsRef(); - - auto plan = std::move(source); - - if (!plan) - return nullptr; - - auto creating_set = std::make_unique( - plan->getCurrentDataStream(), - set_and_key, - external_table, - SizeLimits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode), - context); - creating_set->setStepDescription("Create set for subquery"); - plan->addStep(std::move(creating_set)); - return plan; -} - -FutureSetFromTuple::FutureSetFromTuple(Block block, const Settings & settings) -{ - bool create_ordered_set = false; - auto size_limits = getSizeLimitsForSet(settings); - set = std::make_shared(size_limits, create_ordered_set, settings.use_index_for_in_with_subqueries_max_values, settings.transform_null_in); - set->setHeader(block.cloneEmpty().getColumnsWithTypeAndName()); - - Columns columns; - columns.reserve(block.columns()); - for (const auto & column : block) - columns.emplace_back(column.column); - - set_key_columns.filter = ColumnUInt8::create(block.rows()); - - set->insertFromColumns(columns, set_key_columns); - set->finishInsert(); -} - -FutureSetFromSubquery::FutureSetFromSubquery( - String key, - std::unique_ptr source_, - StoragePtr external_table_, - FutureSetPtr external_table_set_, - const Settings & settings) - : external_table(std::move(external_table_)) - , external_table_set(std::move(external_table_set_)) - , source(std::move(source_)) -{ - set_and_key = std::make_shared(); - set_and_key->key = std::move(key); - - bool create_ordered_set = false; - auto size_limits = getSizeLimitsForSet(settings); - set_and_key->set = std::make_shared(size_limits, create_ordered_set, settings.use_index_for_in_with_subqueries_max_values, settings.transform_null_in); - set_and_key->set->setHeader(source->getCurrentDataStream().header.getColumnsWithTypeAndName()); -} - -FutureSetFromSubquery::FutureSetFromSubquery( - String key, - QueryTreeNodePtr query_tree_, - //FutureSetPtr external_table_set_, - const Settings & settings) - : query_tree(std::move(query_tree_)) -{ - set_and_key = std::make_shared(); - set_and_key->key = std::move(key); - - bool create_ordered_set = false; - auto size_limits = getSizeLimitsForSet(settings); - set_and_key->set = std::make_shared(size_limits, create_ordered_set, settings.use_index_for_in_with_subqueries_max_values, settings.transform_null_in); -} - -void FutureSetFromSubquery::setQueryPlan(std::unique_ptr source_) -{ - source = std::move(source_); - set_and_key->set->setHeader(source->getCurrentDataStream().header.getColumnsWithTypeAndName()); -} - -const DataTypes & FutureSetFromSubquery::getTypes() const -{ - return set_and_key->set->getElementsTypes(); -} - -SetPtr FutureSetFromTuple::buildOrderedSetInplace(const ContextPtr & context) -{ - if (set->hasExplicitSetElements()) - return set; - - const auto & settings = context->getSettingsRef(); - size_t max_values = settings.use_index_for_in_with_subqueries_max_values; - bool too_many_values = max_values && max_values < set->getTotalRowCount(); - if (!too_many_values) - { - set->initSetElements(); - set->appendSetElements(set_key_columns); - } - - return set; -} - }; diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index 339f4a6b435..cb240f5260a 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -8,7 +8,7 @@ #include #include #include -#include +#include namespace DB { @@ -17,7 +17,7 @@ class QueryPlan; class Set; using SetPtr = std::shared_ptr; -class InterpreterSelectWithUnionQuery; +struct SetKeyColumns; class IQueryTreeNode; using QueryTreeNodePtr = std::shared_ptr; @@ -44,14 +44,19 @@ class FutureSet public: virtual ~FutureSet() = default; + /// Returns set if set is ready (created and filled) or nullptr if not. virtual SetPtr get() const = 0; + /// Returns set->getElementsTypes(), even if set is not created yet. virtual const DataTypes & getTypes() const = 0; + /// If possible, return set with stored elements useful for PK analysis. virtual SetPtr buildOrderedSetInplace(const ContextPtr & context) = 0; }; using FutureSetPtr = std::shared_ptr; -class FutureSetFromStorage : public FutureSet +/// Future set from already filled set. +/// Usually it is from StorageSet. +class FutureSetFromStorage final : public FutureSet { public: FutureSetFromStorage(SetPtr set_); @@ -64,34 +69,27 @@ private: SetPtr set; }; +/// Set from tuple is filled as well as set from storage. +/// Additionally, it can be converted to set useful for PK. +class FutureSetFromTuple final : public FutureSet +{ +public: + FutureSetFromTuple(Block block, const Settings & settings); -/// Information on how to build set for the [GLOBAL] IN section. -// class SubqueryForSet -// { -// public: + SetPtr get() const override { return set; } + SetPtr buildOrderedSetInplace(const ContextPtr & context) override; -// void createSource(InterpreterSelectWithUnionQuery & interpreter, StoragePtr table_ = nullptr); + const DataTypes & getTypes() const override; -// bool hasSource() const; +private: + SetPtr set; + SetKeyColumns set_key_columns; +}; -// /// Returns query plan for the set's source -// /// and removes it from SubqueryForSet because we need to build it only once. -// std::unique_ptr detachSource(); - -// /// Build this set from the result of the subquery. -// String key; -// SetPtr set; - -// /// If set, put the result into the table. -// /// This is a temporary table for transferring to remote servers for distributed query processing. -// StoragePtr table; - -// /// The source is obtained using the InterpreterSelectQuery subquery. -// std::unique_ptr source; -// QueryTreeNodePtr query_tree; -// }; - -class FutureSetFromSubquery : public FutureSet +/// Set from subquery can be built inplace for PK or in CreatingSet step. +/// If use_index_for_in_with_subqueries_max_values is reached, set for PK won't be created, +/// but ordinary set would be created instead. +class FutureSetFromSubquery final : public FutureSet { public: FutureSetFromSubquery( @@ -104,7 +102,6 @@ public: FutureSetFromSubquery( String key, QueryTreeNodePtr query_tree_, - //FutureSetPtr external_table_set_, const Settings & settings); SetPtr get() const override; @@ -123,10 +120,9 @@ private: std::unique_ptr source; QueryTreeNodePtr query_tree; - - std::unique_ptr buildPlan(const ContextPtr & context); }; +/// Container for all the sets used in query. class PreparedSets { public: @@ -137,7 +133,7 @@ public: UInt64 operator()(const Hash & key) const { return key.first ^ key.second; } }; - using SetsFromTuple = std::unordered_map>, Hashing>; + using SetsFromTuple = std::unordered_map>, Hashing>; using SetsFromStorage = std::unordered_map, Hashing>; using SetsFromSubqueries = std::unordered_map, Hashing>; @@ -160,11 +156,12 @@ public: std::shared_ptr findStorage(const Hash & key) const; std::shared_ptr findSubquery(const Hash & key) const; - std::vector> getSubqueries(); + using Subqueries = std::vector>; + Subqueries getSubqueries(); const SetsFromTuple & getSetsFromTuple() const { return sets_from_tuple; } - const SetsFromStorage & getSetsFromStorage() const { return sets_from_storage; } - const SetsFromSubqueries & getSetsFromSubquery() const { return sets_from_subqueries; } + // const SetsFromStorage & getSetsFromStorage() const { return sets_from_storage; } + // const SetsFromSubqueries & getSetsFromSubquery() const { return sets_from_subqueries; } static String toString(const Hash & key, const DataTypes & types); diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 17df5064642..b42ff102f72 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -167,19 +167,16 @@ void Set::setHeader(const ColumnsWithTypeAndName & header) extractNestedColumnsAndNullMap(key_columns, null_map); } - if (fill_set_elements) - { - /// Create empty columns with set values in advance. - /// It is needed because set may be empty, so method 'insertFromBlock' will be never called. - initSetElements(); - } - /// Choose data structure to use for the set. data.init(data.chooseMethod(key_columns, key_sizes)); } -void Set::initSetElements() +void Set::fillSetElements() { + if (data.getTotalRowCount()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot convert set to ordered because it is not empty"); + + fill_set_elements = true; set_elements.reserve(keys_size); for (const auto & type : set_elements_types) set_elements.emplace_back(type->createColumn()); diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index d1218830969..9ea46e117ef 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -21,17 +22,6 @@ using FunctionBasePtr = std::shared_ptr; class Chunk; -/// Prepared key columns for set which can be added to fill set elements. -/// Used only to upgrade set from tuple. -struct SetKeyColumns -{ - /// The constant columns to the right of IN are not supported directly. For this, they first materialize. - ColumnRawPtrs key_columns; - Columns materialized_columns; - ColumnPtr null_map_holder; - ColumnUInt8::MutablePtr filter; -}; - /** Data structure for implementation of IN expression. */ class Set @@ -41,14 +31,12 @@ public: /// (that is useful only for checking that some value is in the set and may not store the original values), /// store all set elements in explicit form. /// This is needed for subsequent use for index. - Set(const SizeLimits & limits_, bool fill_set_elements_, size_t max_elements_to_fill_, bool transform_null_in_) + Set(const SizeLimits & limits_, size_t max_elements_to_fill_, bool transform_null_in_) : log(&Poco::Logger::get("Set")), - limits(limits_), fill_set_elements(fill_set_elements_), max_elements_to_fill(max_elements_to_fill_), transform_null_in(transform_null_in_) + limits(limits_), max_elements_to_fill(max_elements_to_fill_), transform_null_in(transform_null_in_) { } - void fillSetElements() { fill_set_elements = true; } - /** Set can be created either from AST or from a stream of data (subquery result). */ @@ -61,7 +49,7 @@ public: bool insertFromColumns(const Columns & columns); bool insertFromBlock(const ColumnsWithTypeAndName & columns); - void initSetElements(); + void fillSetElements(); bool insertFromColumns(const Columns & columns, SetKeyColumns & holder); void appendSetElements(SetKeyColumns & holder); @@ -129,7 +117,7 @@ private: SizeLimits limits; /// Do we need to additionally store all elements of the set in explicit form for subsequent use for index. - bool fill_set_elements; + bool fill_set_elements = false; size_t max_elements_to_fill; /// If true, insert NULL values to set. diff --git a/src/Interpreters/SetKeys.h b/src/Interpreters/SetKeys.h new file mode 100644 index 00000000000..2cab9849c9b --- /dev/null +++ b/src/Interpreters/SetKeys.h @@ -0,0 +1,18 @@ +#pragma once +#include + +namespace DB +{ + +/// Prepared key columns for set which can be added to fill set elements. +/// Used only to upgrade set from tuple. +struct SetKeyColumns +{ + /// The constant columns to the right of IN are not supported directly. For this, they first materialize. + ColumnRawPtrs key_columns; + Columns materialized_columns; + ColumnPtr null_map_holder; + ColumnUInt8::MutablePtr filter; +}; + +} diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp index df3558745fb..8dd7c6637bf 100644 --- a/src/Planner/CollectSets.cpp +++ b/src/Planner/CollectSets.cpp @@ -30,9 +30,8 @@ namespace class CollectSetsVisitor : public ConstInDepthQueryTreeVisitor { public: - explicit CollectSetsVisitor(PlannerContext & planner_context_) //, const SelectQueryOptions & select_query_options_) + explicit CollectSetsVisitor(PlannerContext & planner_context_) : planner_context(planner_context_) - //, select_query_options(select_query_options_) {} void visitImpl(const QueryTreeNodePtr & node) @@ -60,7 +59,7 @@ public: } else if (const auto * constant_node = in_second_argument->as()) { - auto set = makeSetForConstantValue( + auto set = getSetElementsForConstantValue( in_first_argument->getResultType(), constant_node->getValue(), constant_node->getResultType(), @@ -72,18 +71,12 @@ public: set_element_types = left_tuple_type->getElements(); set_element_types = Set::getElementTypes(std::move(set_element_types), settings.transform_null_in); - - // for (auto & element_type : set_element_types) - // if (const auto * low_cardinality_type = typeid_cast(element_type.get())) - // element_type = low_cardinality_type->getDictionaryType(); - auto set_key = in_second_argument->getTreeHash(); + if (sets.findTuple(set_key, set_element_types)) return; sets.addFromTuple(set_key, std::move(set), settings); - - //planner_context.registerSet(set_key, PlannerSet(FutureSet(std::move(set)))); } else if (in_second_argument_node_type == QueryTreeNodeType::QUERY || in_second_argument_node_type == QueryTreeNodeType::UNION || @@ -141,14 +134,13 @@ public: private: PlannerContext & planner_context; - //const SelectQueryOptions & select_query_options; }; } -void collectSets(const QueryTreeNodePtr & node, PlannerContext & planner_context) //, const SelectQueryOptions & select_query_options) +void collectSets(const QueryTreeNodePtr & node, PlannerContext & planner_context) { - CollectSetsVisitor visitor(planner_context); //, select_query_options); + CollectSetsVisitor visitor(planner_context); visitor.visit(node); } diff --git a/src/Planner/CollectSets.h b/src/Planner/CollectSets.h index e0db802d3b4..5f9f7a5a466 100644 --- a/src/Planner/CollectSets.h +++ b/src/Planner/CollectSets.h @@ -12,6 +12,6 @@ struct SelectQueryOptions; /** Collect prepared sets and sets for subqueries that are necessary to execute IN function and its variations. * Collected sets are registered in planner context. */ -void collectSets(const QueryTreeNodePtr & node, PlannerContext & planner_context); //, const SelectQueryOptions & select_query_options); +void collectSets(const QueryTreeNodePtr & node, PlannerContext & planner_context); } diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index d524f2bed98..9f6c22f90f3 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -933,8 +933,6 @@ void addBuildSubqueriesForSetsStepIfNeeded( auto subqueries = planner_context->getPreparedSets().getSubqueries(); std::unordered_set useful_sets; - //PreparedSets::SubqueriesForSets subqueries_for_sets; - for (const auto & actions_to_execute : result_actions_to_execute) collectSetsFromActionsDAG(actions_to_execute, useful_sets); @@ -994,7 +992,7 @@ void addAdditionalFilterStepIfNeeded(QueryPlan & query_plan, auto storage = std::make_shared(StorageID{"dummy", "dummy"}, fake_column_descriptions); auto fake_table_expression = std::make_shared(std::move(storage), query_context); - auto filter_info = buildFilterInfo(additional_result_filter_ast, fake_table_expression, planner_context, select_query_options, std::move(fake_name_set)); + auto filter_info = buildFilterInfo(additional_result_filter_ast, fake_table_expression, planner_context, std::move(fake_name_set)); if (!filter_info.actions || !query_plan.isInitialized()) return; @@ -1225,7 +1223,7 @@ void Planner::buildPlanForQueryNode() } checkStoragesSupportTransactions(planner_context); - collectSets(query_tree, *planner_context); //, select_query_options); + collectSets(query_tree, *planner_context); collectTableExpressionData(query_tree, planner_context); const auto & settings = query_context->getSettingsRef(); @@ -1524,10 +1522,7 @@ void Planner::buildPlanForQueryNode() } if (!select_query_options.only_analyze) - { - //addCreatingSetsStep(query_plan, planner_context->getPreparedSets().detachSubqueries(planner_context->getQueryContext()), planner_context->getQueryContext()); addBuildSubqueriesForSetsStepIfNeeded(query_plan, select_query_options, planner_context, result_actions_to_execute); - } } SelectQueryInfo Planner::buildSelectQueryInfo() const diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 40b6ec109dc..7575828e64d 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -633,7 +633,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma DataTypes set_element_types; auto in_second_argument_node_type = in_second_argument->getNodeType(); - // std::cerr << "=========== " << in_second_argument->getNodeTypeName() << std::endl; + bool subquery_or_table = in_second_argument_node_type == QueryTreeNodeType::QUERY || in_second_argument_node_type == QueryTreeNodeType::UNION || @@ -649,10 +649,6 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma if (left_tuple_type && left_tuple_type->getElements().size() != 1) set_element_types = left_tuple_type->getElements(); - // for (auto & element_type : set_element_types) - // if (const auto * low_cardinality_type = typeid_cast(element_type.get())) - // element_type = low_cardinality_type->getDictionaryType(); - set_element_types = Set::getElementTypes(std::move(set_element_types), planner_context->getQueryContext()->getSettingsRef().transform_null_in); set = planner_context->getPreparedSets().findTuple(set_key, set_element_types); } diff --git a/src/Planner/PlannerContext.cpp b/src/Planner/PlannerContext.cpp index c8aeebc17c1..3c75d4fbea8 100644 --- a/src/Planner/PlannerContext.cpp +++ b/src/Planner/PlannerContext.cpp @@ -118,50 +118,4 @@ PlannerContext::SetKey PlannerContext::createSetKey(const QueryTreeNodePtr & set return "__set_" + toString(set_source_hash.first) + '_' + toString(set_source_hash.second); } -// void PlannerContext::registerSet(const SetKey & key, PlannerSet planner_set) -// { -// if (!planner_set.getSet().isValid()) -// throw Exception(ErrorCodes::LOGICAL_ERROR, "Set must be initialized"); - -// const auto & subquery_node = planner_set.getSubqueryNode(); -// if (subquery_node) -// { -// auto node_type = subquery_node->getNodeType(); - -// if (node_type != QueryTreeNodeType::QUERY && -// node_type != QueryTreeNodeType::UNION && -// node_type != QueryTreeNodeType::TABLE) -// throw Exception(ErrorCodes::LOGICAL_ERROR, -// "Invalid node for set table expression. Expected query or union. Actual {}", -// subquery_node->formatASTForErrorMessage()); -// } - -// set_key_to_set.emplace(key, std::move(planner_set)); -// } - -// bool PlannerContext::hasSet(const SetKey & key) const -// { -// return set_key_to_set.contains(key); -// } - -// const PlannerSet & PlannerContext::getSetOrThrow(const SetKey & key) const -// { -// auto it = set_key_to_set.find(key); -// if (it == set_key_to_set.end()) -// throw Exception(ErrorCodes::LOGICAL_ERROR, -// "No set is registered for key {}", -// key); - -// return it->second; -// } - -// PlannerSet * PlannerContext::getSetOrNull(const SetKey & key) -// { -// auto it = set_key_to_set.find(key); -// if (it == set_key_to_set.end()) -// return nullptr; - -// return &it->second; -// } - } diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h index 78c7f8feab5..aceb313d4b5 100644 --- a/src/Planner/PlannerContext.h +++ b/src/Planner/PlannerContext.h @@ -44,56 +44,6 @@ private: using GlobalPlannerContextPtr = std::shared_ptr; -/** PlannerSet is wrapper around Set that is used during query planning. - * - * If subquery node is null, such set is already prepared for execution. - * - * If subquery node is not null, then set must be build from the result of the subquery. - * If subquery node is not null, it must have QUERY or UNION type. - */ -class PlannerSet -{ - -}; - -// { -// public: -// /// Construct planner set that is ready for execution -// explicit PlannerSet(FutureSetPtr set_) -// : set(std::move(set_)) -// {} - -// /// Construct planner set with set and subquery node -// explicit PlannerSet(QueryTreeNodePtr subquery_node_) -// //: set(promise_to_build_set.get_future()) -// : subquery_node(std::move(subquery_node_)) -// {} - -// /// Get a reference to a set that might be not built yet -// const FutureSetPtr & getSet() const -// { -// return set; -// } - -// /// Get subquery node -// const QueryTreeNodePtr & getSubqueryNode() const -// { -// return subquery_node; -// } - -// /// This promise will be fulfilled when set is built and all FutureSet objects will become ready -// // std::promise extractPromiseToBuildSet() -// // { -// // return std::move(promise_to_build_set); -// // } - -// private: -// //std::promise promise_to_build_set; -// FutureSetPtr set; - -// QueryTreeNodePtr subquery_node; -// }; - class PlannerContext { public: @@ -181,29 +131,9 @@ public: using SetKey = std::string; - // using SetKeyToSet = std::unordered_map; - - // /// Create set key for set source node + /// Create set key for set source node static SetKey createSetKey(const QueryTreeNodePtr & set_source_node); - // /// Register set for set key - // void registerSet(const SetKey & key, PlannerSet planner_set); - - // /// Returns true if set is registered for key, false otherwise - // bool hasSet(const SetKey & key) const; - - // /// Get set for key, if no set is registered logical exception is thrown - // const PlannerSet & getSetOrThrow(const SetKey & key) const; - - // /// Get set for key, if no set is registered null is returned - // PlannerSet * getSetOrNull(const SetKey & key); - - // /// Get registered sets - // const SetKeyToSet & getRegisteredSets() const - // { - // return set_key_to_set; - // } - PreparedSets & getPreparedSets() { return prepared_sets; } private: diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index c27ee72437c..5d8f8ca8741 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -403,8 +403,7 @@ void updatePrewhereOutputsIfNeeded(SelectQueryInfo & table_expression_query_info FilterDAGInfo buildRowPolicyFilterIfNeeded(const StoragePtr & storage, SelectQueryInfo & table_expression_query_info, - PlannerContextPtr & planner_context, - const SelectQueryOptions & select_query_options) + PlannerContextPtr & planner_context) { auto storage_id = storage->getStorageID(); const auto & query_context = planner_context->getQueryContext(); @@ -413,13 +412,12 @@ FilterDAGInfo buildRowPolicyFilterIfNeeded(const StoragePtr & storage, if (!row_policy_filter) return {}; - return buildFilterInfo(row_policy_filter->expression, table_expression_query_info.table_expression, planner_context, select_query_options); + return buildFilterInfo(row_policy_filter->expression, table_expression_query_info.table_expression, planner_context); } FilterDAGInfo buildCustomKeyFilterIfNeeded(const StoragePtr & storage, SelectQueryInfo & table_expression_query_info, - PlannerContextPtr & planner_context, - const SelectQueryOptions & select_query_options) + PlannerContextPtr & planner_context) { const auto & query_context = planner_context->getQueryContext(); const auto & settings = query_context->getSettingsRef(); @@ -445,15 +443,14 @@ FilterDAGInfo buildCustomKeyFilterIfNeeded(const StoragePtr & storage, *storage, query_context); - return buildFilterInfo(parallel_replicas_custom_filter_ast, table_expression_query_info.table_expression, planner_context, select_query_options); + return buildFilterInfo(parallel_replicas_custom_filter_ast, table_expression_query_info.table_expression, planner_context); } /// Apply filters from additional_table_filters setting FilterDAGInfo buildAdditionalFiltersIfNeeded(const StoragePtr & storage, const String & table_expression_alias, SelectQueryInfo & table_expression_query_info, - PlannerContextPtr & planner_context, - const SelectQueryOptions & select_query_options) + PlannerContextPtr & planner_context) { const auto & query_context = planner_context->getQueryContext(); const auto & settings = query_context->getSettingsRef(); @@ -487,7 +484,7 @@ FilterDAGInfo buildAdditionalFiltersIfNeeded(const StoragePtr & storage, return {}; table_expression_query_info.additional_filter_ast = additional_filter_ast; - return buildFilterInfo(additional_filter_ast, table_expression_query_info.table_expression, planner_context, select_query_options); + return buildFilterInfo(additional_filter_ast, table_expression_query_info.table_expression, planner_context); } JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression, @@ -697,14 +694,14 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres } }; - auto row_policy_filter_info = buildRowPolicyFilterIfNeeded(storage, table_expression_query_info, planner_context, select_query_options); + auto row_policy_filter_info = buildRowPolicyFilterIfNeeded(storage, table_expression_query_info, planner_context); add_filter(row_policy_filter_info, "Row-level security filter"); if (query_context->getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY) { if (settings.parallel_replicas_count > 1) { - auto parallel_replicas_custom_key_filter_info = buildCustomKeyFilterIfNeeded(storage, table_expression_query_info, planner_context, select_query_options); + auto parallel_replicas_custom_key_filter_info = buildCustomKeyFilterIfNeeded(storage, table_expression_query_info, planner_context); add_filter(parallel_replicas_custom_key_filter_info, "Parallel replicas custom key filter"); } else @@ -719,7 +716,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres } const auto & table_expression_alias = table_expression->getAlias(); - auto additional_filters_info = buildAdditionalFiltersIfNeeded(storage, table_expression_alias, table_expression_query_info, planner_context, select_query_options); + auto additional_filters_info = buildAdditionalFiltersIfNeeded(storage, table_expression_alias, table_expression_query_info, planner_context); add_filter(additional_filters_info, "additional filter"); from_stage = storage->getQueryProcessingStage(query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info); diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 403e3f03997..733db0f00bc 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -429,7 +429,6 @@ SelectQueryInfo buildSelectQueryInfo(const QueryTreeNodePtr & query_tree, const FilterDAGInfo buildFilterInfo(ASTPtr filter_expression, const QueryTreeNodePtr & table_expression, PlannerContextPtr & planner_context, - [[maybe_unused]] const SelectQueryOptions & select_query_options, NameSet table_expression_required_names_without_filter) { const auto & query_context = planner_context->getQueryContext(); @@ -447,7 +446,7 @@ FilterDAGInfo buildFilterInfo(ASTPtr filter_expression, } collectSourceColumns(filter_query_tree, planner_context); - collectSets(filter_query_tree, *planner_context); //, select_query_options); + collectSets(filter_query_tree, *planner_context); auto filter_actions_dag = std::make_shared(); diff --git a/src/Planner/Utils.h b/src/Planner/Utils.h index 8071e201f88..d9412800e61 100644 --- a/src/Planner/Utils.h +++ b/src/Planner/Utils.h @@ -82,7 +82,6 @@ SelectQueryInfo buildSelectQueryInfo(const QueryTreeNodePtr & query_tree, const FilterDAGInfo buildFilterInfo(ASTPtr filter_expression, const QueryTreeNodePtr & table_expression, PlannerContextPtr & planner_context, - const SelectQueryOptions & select_query_options, NameSet table_expression_required_names_without_filter = {}); ASTPtr parseAdditionalResultFilter(const Settings & settings); diff --git a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp index ebd58c3dc95..c54d32c1385 100644 --- a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp +++ b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp @@ -105,7 +105,7 @@ CreateSetAndFilterOnTheFlyStep::CreateSetAndFilterOnTheFlyStep( : ITransformingStep(input_stream_, input_stream_.header, getTraits()) , column_names(column_names_) , max_rows_in_set(max_rows_in_set_) - , own_set(std::make_shared(SizeLimits(max_rows_in_set, 0, OverflowMode::BREAK), false, 0, true)) + , own_set(std::make_shared(SizeLimits(max_rows_in_set, 0, OverflowMode::BREAK), 0, true)) , filtering_set(nullptr) , crosswise_connection(crosswise_connection_) , position(position_) diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp index aae632787b9..3e4dfb0c7d1 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.cpp +++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp @@ -124,7 +124,7 @@ void CreatingSetsStep::describePipeline(FormatSettings & settings) const IQueryPlanStep::describePipeline(processors, settings); } -void addCreatingSetsStep(QueryPlan & query_plan, std::vector> sets_from_subquery, ContextPtr context) +void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::Subqueries subqueries, ContextPtr context) { DataStreams input_streams; input_streams.emplace_back(query_plan.getCurrentDataStream()); @@ -133,7 +133,7 @@ void addCreatingSetsStep(QueryPlan & query_plan, std::vector(std::move(query_plan))); query_plan = QueryPlan(); - for (auto & future_set : sets_from_subquery) + for (auto & future_set : subqueries) { if (future_set->get()) continue; @@ -157,18 +157,11 @@ void addCreatingSetsStep(QueryPlan & query_plan, std::vector> DelayedCreatingSetsStep::makePlansForSets(DelayedCreatingSetsStep && step) { - // DataStreams input_streams; - // input_streams.emplace_back(query_plan.getCurrentDataStream()); - std::vector> plans; - // plans.emplace_back(std::make_unique(std::move(query_plan))); - // query_plan = QueryPlan(); - for (auto & future_set : step.sets_from_subquery) + for (auto & future_set : step.subqueries) { if (future_set->get()) continue; @@ -179,7 +172,6 @@ std::vector> DelayedCreatingSetsStep::makePlansForSet plan->optimize(QueryPlanOptimizationSettings::fromContext(step.context)); - //input_streams.emplace_back(plan->getCurrentDataStream()); plans.emplace_back(std::move(plan)); } @@ -199,8 +191,8 @@ void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, } DelayedCreatingSetsStep::DelayedCreatingSetsStep( - DataStream input_stream, std::vector> sets_from_subquery_, ContextPtr context_) - : sets_from_subquery(std::move(sets_from_subquery_)), context(std::move(context_)) + DataStream input_stream, PreparedSets::Subqueries subqueries_, ContextPtr context_) + : subqueries(std::move(subqueries_)), context(std::move(context_)) { input_streams = {input_stream}; output_stream = std::move(input_stream); diff --git a/src/Processors/QueryPlan/CreatingSetsStep.h b/src/Processors/QueryPlan/CreatingSetsStep.h index 7110775da79..a90b70a2fa4 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.h +++ b/src/Processors/QueryPlan/CreatingSetsStep.h @@ -47,10 +47,12 @@ public: void describePipeline(FormatSettings & settings) const override; }; +/// This is a temporary step which is converted to CreatingSetStep after plan optimization. +/// Can't be used by itself. class DelayedCreatingSetsStep final : public IQueryPlanStep { public: - DelayedCreatingSetsStep(DataStream input_stream, std::vector> sets_from_subquery_, ContextPtr context_); + DelayedCreatingSetsStep(DataStream input_stream, PreparedSets::Subqueries subqueries_, ContextPtr context_); String getName() const override { return "DelayedCreatingSets"; } @@ -59,14 +61,14 @@ public: static std::vector> makePlansForSets(DelayedCreatingSetsStep && step); ContextPtr getContext() const { return context; } - std::vector> detachSets() { return std::move(sets_from_subquery); } + PreparedSets::Subqueries detachSets() { return std::move(subqueries); } private: - std::vector> sets_from_subquery; + PreparedSets::Subqueries subqueries; ContextPtr context; }; -void addCreatingSetsStep(QueryPlan & query_plan, std::vector> sets_from_subquery, ContextPtr context); +void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::Subqueries subqueries, ContextPtr context); void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, ContextPtr context); diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index 8e2221d564d..9b9cc221ca8 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -43,7 +43,6 @@ std::unique_ptr createLocalPlan( const Block & header, ContextPtr context, QueryProcessingStage::Enum processed_stage, - [[maybe_unused]] PreparedSetsPtr prepared_sets, size_t shard_num, size_t shard_count, size_t replica_num, @@ -99,7 +98,7 @@ std::unique_ptr createLocalPlan( } else { - auto interpreter = InterpreterSelectQuery(query_ast, new_context, select_query_options); //, prepared_sets); + auto interpreter = InterpreterSelectQuery(query_ast, new_context, select_query_options); interpreter.buildQueryPlan(*query_plan); } diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.h b/src/Processors/QueryPlan/DistributedCreateLocalPlan.h index cf59027a33f..c08b9bdf67e 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.h +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.h @@ -18,7 +18,6 @@ std::unique_ptr createLocalPlan( const Block & header, ContextPtr context, QueryProcessingStage::Enum processed_stage, - PreparedSetsPtr prepared_sets, size_t shard_num, size_t shard_count, size_t replica_num, diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index e549a06877e..1e21d13e2b1 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1220,13 +1220,8 @@ static void buildIndexes( const auto & partition_key = metadata_snapshot->getPartitionKey(); auto minmax_columns_names = data.getMinMaxColumnsNames(partition_key); auto minmax_expression_actions = data.getMinMaxExpr(partition_key, ExpressionActionsSettings::fromContext(context)); - // minmax_columns_types = data.getMinMaxColumnsTypes(partition_key); - // if (context->getSettingsRef().allow_experimental_analyzer) indexes->minmax_idx_condition.emplace(filter_actions_dag, context, minmax_columns_names, minmax_expression_actions, NameSet()); - // else - // indexes->minmax_idx_condition.emplace(query_info, context, minmax_columns_names, minmax_expression_actions); - indexes->partition_pruner.emplace(metadata_snapshot, filter_actions_dag, context, false /* strict */); } @@ -1402,32 +1397,10 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( result.column_names_to_read.push_back(ExpressionActions::getSmallestColumn(available_real_columns).name); } - // storage_snapshot->check(result.column_names_to_read); - // Build and check if primary key is used when necessary const auto & primary_key = metadata_snapshot->getPrimaryKey(); const Names & primary_key_column_names = primary_key.column_names; - // if (!key_condition) - // { - // if (settings.query_plan_optimize_primary_key) - // { - // NameSet array_join_name_set; - // if (query_info.syntax_analyzer_result) - // array_join_name_set = query_info.syntax_analyzer_result->getArrayJoinSourceNameSet(); - - // key_condition.emplace(query_info.filter_actions_dag, - // context, - // primary_key_column_names, - // primary_key.expression, - // array_join_name_set); - // } - // else - // { - // key_condition.emplace(query_info, context, primary_key_column_names, primary_key.expression); - // } - // } - if (!indexes) buildIndexes(indexes, query_info.filter_actions_dag, data, context, query_info, metadata_snapshot); diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 16cb06a94d6..ed740e3e242 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -97,7 +97,6 @@ ReadFromRemote::ReadFromRemote( ClusterProxy::SelectStreamFactory::Shards shards_, Block header_, QueryProcessingStage::Enum stage_, - PreparedSetsPtr prepared_sets_, StorageID main_table_, ASTPtr table_func_ptr_, ContextMutablePtr context_, @@ -110,7 +109,6 @@ ReadFromRemote::ReadFromRemote( : ISourceStep(DataStream{.header = std::move(header_)}) , shards(std::move(shards_)) , stage(stage_) - , prepared_sets(std::move(prepared_sets_)) , main_table(std::move(main_table_)) , table_func_ptr(std::move(table_func_ptr_)) , context(std::move(context_)) @@ -152,7 +150,7 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream my_context = context, my_throttler = throttler, my_main_table = main_table, my_table_func_ptr = table_func_ptr, my_scalars = scalars, my_external_tables = external_tables, - my_stage = stage, my_prepared_sets = prepared_sets, local_delay = shard.local_delay, + my_stage = stage, local_delay = shard.local_delay, add_agg_info, add_totals, add_extremes, async_read, async_query_sending]() mutable -> QueryPipelineBuilder { @@ -187,7 +185,7 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream if (try_results.empty() || local_delay < max_remote_delay) { auto plan = createLocalPlan( - query, header, my_context, my_stage, my_prepared_sets, my_shard.shard_info.shard_num, my_shard_count, 0, 0, /*coordinator=*/nullptr); + query, header, my_context, my_stage, my_shard.shard_info.shard_num, my_shard_count, 0, 0, /*coordinator=*/nullptr); return std::move(*plan->buildQueryPipeline( QueryPlanOptimizationSettings::fromContext(my_context), diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h index 405b5727ff2..d4005d81f1b 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.h +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -26,7 +26,6 @@ public: ClusterProxy::SelectStreamFactory::Shards shards_, Block header_, QueryProcessingStage::Enum stage_, - PreparedSetsPtr prepared_sets_, StorageID main_table_, ASTPtr table_func_ptr_, ContextMutablePtr context_, @@ -47,7 +46,6 @@ public: private: ClusterProxy::SelectStreamFactory::Shards shards; QueryProcessingStage::Enum stage; - PreparedSetsPtr prepared_sets; StorageID main_table; ASTPtr table_func_ptr; ContextMutablePtr context; diff --git a/src/Storages/KVStorageUtils.cpp b/src/Storages/KVStorageUtils.cpp index 4c1fe4fc559..16ab99d03b4 100644 --- a/src/Storages/KVStorageUtils.cpp +++ b/src/Storages/KVStorageUtils.cpp @@ -77,7 +77,6 @@ bool traverseASTFilter( if (!future_set) return false; - //if (!future_set->isReady()) future_set->buildOrderedSetInplace(context); auto set = future_set->get(); diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index d41dd30098b..f909d854cf6 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1194,8 +1194,6 @@ bool KeyCondition::tryPrepareSetIndex( RPNElement & out, size_t & out_key_column_num) { - // std::cerr << "::: tryPrepareSetIndex for " << func.getColumnName() << std::endl; - // std::cerr << StackTrace().toString() << std::endl; const auto & left_arg = func.getArgumentAt(0); out_key_column_num = 0; @@ -1239,56 +1237,27 @@ bool KeyCondition::tryPrepareSetIndex( } if (indexes_mapping.empty()) - { - // std::cerr << ".. index mapping is empty\n"; return false; - } const auto right_arg = func.getArgumentAt(1); - // LOG_TRACE(&Poco::Logger::get("KK"), "Trying to get set for {}", right_arg.getColumnName()); - auto future_set = right_arg.tryGetPreparedSet(indexes_mapping, data_types); if (!future_set) - { - // std::cerr << ".. no future set\n"; return false; - } - // LOG_TRACE(&Poco::Logger::get("KK"), "Found set for {}", right_arg.getColumnName()); - - //if (!future_set->isReady()) - //{ - // LOG_TRACE(&Poco::Logger::get("KK"), "Building set inplace for {}", right_arg.getColumnName()); - future_set->buildOrderedSetInplace(right_arg.getTreeContext().getQueryContext()); - //} - - auto prepared_set = future_set->get(); + auto prepared_set = future_set->buildOrderedSetInplace(right_arg.getTreeContext().getQueryContext()); if (!prepared_set) - { - - // std::cerr << ".. no prepared set\n"; return false; - } - - // LOG_TRACE(&Poco::Logger::get("KK"), "Set if ready for {}", right_arg.getColumnName()); /// The index can be prepared if the elements of the set were saved in advance. if (!prepared_set->hasExplicitSetElements()) - { - - // std::cerr << ".. no explicit elements\n"; return false; - } - - // LOG_TRACE(&Poco::Logger::get("KK"), "Has explicit elements for {}", right_arg.getColumnName()); prepared_set->checkColumnsNumber(left_args_count); for (size_t i = 0; i < indexes_mapping.size(); ++i) prepared_set->checkTypesEqual(indexes_mapping[i].tuple_index, data_types[i]); out.set_index = std::make_shared(prepared_set->getSetElements(), std::move(indexes_mapping)); - // std::cerr << ".. can use\n"; return true; } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 125fe98203f..ee515106591 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1839,9 +1839,7 @@ void MergeTreeDataSelectExecutor::selectPartsToRead( if (partition_pruner) { - auto val = partition_pruner->canBePruned(*part); - // std::cerr << "... part " << part->getNameWithState() << " cbp ? " << val << std::endl; - if (val) + if (partition_pruner->canBePruned(*part)) continue; } diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index e2bf9bde674..398a85e92ac 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -31,21 +31,13 @@ namespace ColumnWithTypeAndName getPreparedSetInfo(const ConstSetPtr & prepared_set) { - // std::cerr << "====== " << prepared_set->getDataTypes().size() << std::endl; if (prepared_set->getDataTypes().size() == 1) return {prepared_set->getSetElements()[0], prepared_set->getElementsTypes()[0], "dummy"}; Columns set_elements; for (auto & set_element : prepared_set->getSetElements()) - { - // std::cerr << set_element->dumpStructure() << std::endl; - set_elements.emplace_back(set_element->convertToFullColumnIfConst()); - } - // for (auto & set_element : prepared_set->getElementsTypes()) - // { - // // std::cerr << set_element->getName() << std::endl; - // } + set_elements.emplace_back(set_element->convertToFullColumnIfConst()); return {ColumnTuple::create(set_elements), std::make_shared(prepared_set->getElementsTypes()), "dummy"}; } @@ -319,30 +311,17 @@ bool MergeTreeIndexConditionBloomFilter::traverseFunction(const RPNBuilderTreeNo if (functionIsInOrGlobalInOperator(function_name)) { - //std::cerr << StackTrace().toString() << std::endl; - - auto future_set = rhs_argument.tryGetPreparedSet(); - - //std::cerr << "==== Finding set for MergeTreeBF " << bool(future_set) << std::endl; - - if (future_set) // && !future_set->isReady()) + if (auto future_set = rhs_argument.tryGetPreparedSet(); future_set) { - //std::cerr << "==== not ready, building " << std::endl; - future_set->buildOrderedSetInplace(rhs_argument.getTreeContext().getQueryContext()); - } - - ConstSetPtr prepared_set; - if (future_set) - prepared_set = future_set->get(); - - //std::cerr << "==== Prep set for MergeTreeBF " << bool(prepared_set) << ' ' << (prepared_set ? prepared_set->hasExplicitSetElements() : false) << std::endl; - - if (prepared_set && prepared_set->hasExplicitSetElements()) - { - const auto prepared_info = getPreparedSetInfo(prepared_set); - // std::cerr << "...... " << prepared_info.dumpStructure() << std::endl; - if (traverseTreeIn(function_name, lhs_argument, prepared_set, prepared_info.type, prepared_info.column, out)) - maybe_useful = true; + if (auto prepared_set = future_set->buildOrderedSetInplace(rhs_argument.getTreeContext().getQueryContext()); prepared_set) + { + if (prepared_set->hasExplicitSetElements()) + { + const auto prepared_info = getPreparedSetInfo(prepared_set); + if (traverseTreeIn(function_name, lhs_argument, prepared_set, prepared_info.type, prepared_info.column, out)) + maybe_useful = true; + } + } } } else if (function_name == "equals" || @@ -387,7 +366,6 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeIn( size_t row_size = column->size(); size_t position = header.getPositionByName(key_node_column_name); const DataTypePtr & index_type = header.getByPosition(position).type; - // std::cerr << "::::: " << ColumnWithTypeAndName{column, type, ""}.dumpStructure() << " -> " << index_type->getName() << std::endl; const auto & converted_column = castColumn(ColumnWithTypeAndName{column, type, ""}, index_type); out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(index_type, converted_column, 0, row_size))); diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 422f504d046..b6a2cafe245 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -625,13 +625,10 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter( return false; auto future_set = right_argument.tryGetPreparedSet(data_types); - if (future_set) // && !future_set->isReady()) - future_set->buildOrderedSetInplace(right_argument.getTreeContext().getQueryContext()); - - ConstSetPtr prepared_set; - if (future_set) - prepared_set = future_set->get(); + if (!future_set) + return false; + auto prepared_set = future_set->buildOrderedSetInplace(right_argument.getTreeContext().getQueryContext()); if (!prepared_set || !prepared_set->hasExplicitSetElements()) return false; diff --git a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp index 2814b8521f6..6b4919c545d 100644 --- a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp @@ -655,19 +655,11 @@ bool MergeTreeConditionInverted::tryPrepareSetGinFilter( if (key_tuple_mapping.empty()) return false; - //std::cerr << "==== Finding set for MergeTreeConditionInverted\n"; - auto future_set = rhs.tryGetPreparedSet(); + if (!future_set) + return false; - //std::cerr << "==== Set for MergeTreeConditionInverted" << bool(future_set) << std::endl; - - if (future_set) // && !future_set->isReady()) - future_set->buildOrderedSetInplace(rhs.getTreeContext().getQueryContext()); - - ConstSetPtr prepared_set; - if (future_set) - prepared_set = future_set->get(); - + auto prepared_set = future_set->buildOrderedSetInplace(rhs.getTreeContext().getQueryContext()); if (!prepared_set || !prepared_set->hasExplicitSetElements()) return false; diff --git a/src/Storages/MergeTree/PartitionPruner.cpp b/src/Storages/MergeTree/PartitionPruner.cpp index a397a1475d1..97bb9f3b4d4 100644 --- a/src/Storages/MergeTree/PartitionPruner.cpp +++ b/src/Storages/MergeTree/PartitionPruner.cpp @@ -29,12 +29,6 @@ PartitionPruner::PartitionPruner(const StorageMetadataPtr & metadata, ActionsDAG , partition_condition(filter_actions_dag, context, partition_key.column_names, partition_key.expression, {}, true /* single_point */, strict) , useless(strict ? partition_condition.anyUnknownOrAlwaysTrue() : partition_condition.alwaysUnknownOrTrue()) { - // auto description = getKeyCondition().getDescription(); - // std::cerr << ".... " << description.condition << std::endl; - // std::cerr << filter_actions_dag->dumpDAG() << std::endl; - // for (const auto & name : partition_key.column_names) - // std::cerr << ". " << name << std::endl; - // std::cerr << partition_key.expression->dumpActions() << std::endl; } bool PartitionPruner::canBePruned(const IMergeTreeDataPart & part) @@ -52,8 +46,6 @@ bool PartitionPruner::canBePruned(const IMergeTreeDataPart & part) else { const auto & partition_value = part.partition.value; - // for (const auto & val : partition_value) - // std::cerr << val.dump() << std::endl; std::vector index_value(partition_value.begin(), partition_value.end()); for (auto & field : index_value) { @@ -64,7 +56,6 @@ bool PartitionPruner::canBePruned(const IMergeTreeDataPart & part) is_valid = partition_condition.mayBeTrueInRange( partition_value.size(), index_value.data(), index_value.data(), partition_key.data_types); - // std::cerr << "may be true " << is_valid << std::endl; partition_filter_map.emplace(partition_id, is_valid); if (!is_valid) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index a69db7d80c0..c46192ab43b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -88,10 +88,8 @@ #include #include #include -#include #include #include -#include #include #include #include @@ -769,8 +767,7 @@ void StorageDistributed::read( header, snapshot_data.objects_by_shard, storage_snapshot, - processed_stage, - query_info.prepared_sets); + processed_stage); auto settings = local_context->getSettingsRef(); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 655b3d5a8f4..c02c96f62be 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -231,8 +231,7 @@ void StorageMergeTree::read( header, {}, storage_snapshot, - processed_stage, - query_info.prepared_sets); + processed_stage); ClusterProxy::executeQueryWithParallelReplicas( query_plan, getStorageID(), /*remove_table_function_ptr*/ nullptr, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 01f663aeee4..bb99e21e4ab 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4866,8 +4866,7 @@ void StorageReplicatedMergeTree::read( header, {}, storage_snapshot, - processed_stage, - query_info.prepared_sets); + processed_stage); ClusterProxy::executeQueryWithParallelReplicas( query_plan, getStorageID(), /*remove_table_function_ptr*/ nullptr, diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index f9fdcdf02a6..79369ab4bcb 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -147,7 +147,7 @@ StorageSet::StorageSet( const String & comment, bool persistent_) : StorageSetOrJoinBase{disk_, relative_path_, table_id_, columns_, constraints_, comment, persistent_} - , set(std::make_shared(SizeLimits(), false, 0, true)) + , set(std::make_shared(SizeLimits(), 0, true)) { Block header = getInMemoryMetadataPtr()->getSampleBlock(); set->setHeader(header.getColumnsWithTypeAndName()); @@ -176,7 +176,7 @@ void StorageSet::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_sn Block header = metadata_snapshot->getSampleBlock(); increment = 0; - set = std::make_shared(SizeLimits(), false, 0, true); + set = std::make_shared(SizeLimits(), 0, true); set->setHeader(header.getColumnsWithTypeAndName()); } diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 4cb08b90c2a..d361a4173c1 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -323,14 +323,8 @@ static void extractPathImpl(const ActionsDAG::Node & node, Paths & res, ContextP if (!future_set) return; - //if (!future_set->isReady()) - future_set->buildOrderedSetInplace(context); - - auto set = future_set->get(); - if (!set) - return; - - if (!set->hasExplicitSetElements()) + auto set = future_set->buildOrderedSetInplace(context); + if (!set || !set->hasExplicitSetElements()) return; set->checkColumnsNumber(1); diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index b83afe766aa..907fc0cd22c 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -89,25 +89,6 @@ ASTPtr buildWhereExpression(const ASTs & functions) return makeASTFunction("and", functions); } -// void buildSets(const ASTPtr & expression, ExpressionAnalyzer & analyzer) -// { -// const auto * func = expression->as(); -// if (func && functionIsInOrGlobalInOperator(func->name)) -// { -// const IAST & args = *func->arguments; -// const ASTPtr & arg = args.children.at(1); -// if (arg->as() || arg->as()) -// { -// analyzer.tryMakeSetForIndexFromSubquery(arg); -// } -// } -// else -// { -// for (const auto & child : expression->children) -// buildSets(child, analyzer); -// } -// } - } namespace VirtualColumnUtils @@ -208,7 +189,6 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr contex /// Let's analyze and calculate the prepared expression. auto syntax_result = TreeRewriter(context).analyze(expression_ast, block.getNamesAndTypesList()); ExpressionAnalyzer analyzer(expression_ast, syntax_result, context); - //buildSets(expression_ast, analyzer); ExpressionActionsPtr actions = analyzer.getActions(false /* add alises */, true /* project result */, CompileExpressions::yes); for (const auto & node : actions->getNodes()) diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index fa4730cbe84..1ee7d747fcc 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -1,3 +1,4 @@ + #include #include @@ -15,6 +16,8 @@ #include #include #include +#include +#include namespace DB { @@ -276,11 +279,14 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, temporary_table_expression_node->setTemporaryTableName(temporary_table_name); auto table_out = external_storage->write({}, external_storage->getInMemoryMetadataPtr(), mutable_context, /*async_insert=*/false); - auto io = interpreter.execute(); - io.pipeline.complete(std::move(table_out)); - CompletedPipelineExecutor executor(io.pipeline); - executor.execute(); + auto optimization_settings = QueryPlanOptimizationSettings::fromContext(mutable_context); + auto build_pipeline_settings = BuildQueryPipelineSettings::fromContext(mutable_context); + auto pipeline = QueryPipelineBuilder::getPipeline(std::move(*query_plan.buildQueryPipeline(optimization_settings, build_pipeline_settings))); + + pipeline.complete(std::move(table_out)); + CompletedPipelineExecutor executor(pipeline); + executor.execute(); mutable_context->addExternalTable(temporary_table_name, std::move(external_storage_holder)); return temporary_table_expression_node; @@ -291,14 +297,13 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, QueryTreeNodePtr buildQueryTreeForShard(SelectQueryInfo & query_info, QueryTreeNodePtr query_tree_to_modify) { auto & planner_context = query_info.planner_context; - const auto & query_context = planner_context->getQueryContext(); CollectColumnSourceToColumnsVisitor collect_column_source_to_columns_visitor; collect_column_source_to_columns_visitor.visit(query_tree_to_modify); const auto & column_source_to_columns = collect_column_source_to_columns_visitor.getColumnSourceToColumns(); - DistributedProductModeRewriteInJoinVisitor visitor(query_info.planner_context->getQueryContext()); + DistributedProductModeRewriteInJoinVisitor visitor(planner_context->getQueryContext()); visitor.visit(query_tree_to_modify); auto replacement_map = visitor.getReplacementMap(); From dd49f4a5651076136ed8deda33ac2f1f71ba8635 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 22 Jun 2023 17:07:52 +0200 Subject: [PATCH 0650/1997] Remove key --- src/Interpreters/Cache/Metadata.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index b19d095692d..75d37072711 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -53,17 +53,13 @@ KeyMetadata::KeyMetadata( const Key & key_, const std::string & key_path_, CleanupQueue & cleanup_queue_, - [[maybe_unused]] Poco::Logger * log_, + Poco::Logger * log_, bool created_base_directory_) : key(key_) , key_path(key_path_) , cleanup_queue(cleanup_queue_) , created_base_directory(created_base_directory_) -#ifdef ABORT_ON_LOGICAL_ERROR - , log(&Poco::Logger::get("Key(" + key.toString() + ")")) -#else , log(log_) -#endif { if (created_base_directory) chassert(fs::exists(key_path)); From 765b4ce279af01e7905e1e0f525b24500161d1e0 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov Date: Thu, 22 Jun 2023 17:19:47 +0200 Subject: [PATCH 0651/1997] added wrong path for hdfs test --- tests/integration/test_storage_hdfs/test.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 2d0285fb9f2..cf951050c6f 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -101,6 +101,13 @@ def test_storage_with_multidirectory_glob(started_cluster): ) assert (r == f"File1\t11\nFile2\t22\n") or (r == f"File2\t22\nFile1\t11\n") + try: + node1.query("SELECT * FROM hdfs('hdfs://hdfs1:9000/multiglob/{p4/path1,p2/path3}/postfix/data{1,2}.nonexist', TSV)") + assert False, "Exception have to be thrown" + except Exception as ex: + print(ex) + assert "no files" in str(ex) + def test_read_write_table(started_cluster): hdfs_api = started_cluster.hdfs_api From 4b0d5a8a96c744912ddb514e779c40a560abb27c Mon Sep 17 00:00:00 2001 From: Thom O'Connor Date: Thu, 22 Jun 2023 09:30:23 -0600 Subject: [PATCH 0652/1997] Update remote.md Simplifying this example for remoteSecure(): * no reason to be using a ReplicatedMergeTree example here * no reason to be setting index_granularity to a default value --- docs/en/sql-reference/table-functions/remote.md | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md index bf0abd49fc6..8dcae6a0a79 100644 --- a/docs/en/sql-reference/table-functions/remote.md +++ b/docs/en/sql-reference/table-functions/remote.md @@ -114,9 +114,8 @@ This example uses one table from a sample dataset. The database is `imdb`, and `first_name` String, `last_name` String, `gender` FixedString(1)) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}') - ORDER BY (id, first_name, last_name, gender) - SETTINGS index_granularity = 8192 + ENGINE = MergeTree + ORDER BY (id, first_name, last_name, gender); ``` #### On the destination ClickHouse system: @@ -132,9 +131,8 @@ This example uses one table from a sample dataset. The database is `imdb`, and `first_name` String, `last_name` String, `gender` FixedString(1)) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}') - ORDER BY (id, first_name, last_name, gender) - SETTINGS index_granularity = 8192 + ENGINE = MergeTree + ORDER BY (id, first_name, last_name, gender); ``` #### Back on the source deployment: @@ -142,7 +140,7 @@ This example uses one table from a sample dataset. The database is `imdb`, and Insert into the new database and table created on the remote system. You will need the host, port, username, password, destination database, and destination table. ```sql INSERT INTO FUNCTION -remoteSecure('remote.clickhouse.cloud:9440', 'imdb.actors', 'USER', 'PASSWORD', rand()) +remoteSecure('remote.clickhouse.cloud:9440', 'imdb.actors', 'USER', 'PASSWORD') SELECT * from imdb.actors ``` From 85d86fec8337e347b519ebd7318012e83af109ec Mon Sep 17 00:00:00 2001 From: sanjam Date: Thu, 22 Jun 2023 13:38:50 +0000 Subject: [PATCH 0653/1997] external_aggregation_fix --- .../HashTable/TwoLevelStringHashTable.h | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/Common/HashTable/TwoLevelStringHashTable.h b/src/Common/HashTable/TwoLevelStringHashTable.h index ea1914348b2..ee6dcd05d9a 100644 --- a/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/src/Common/HashTable/TwoLevelStringHashTable.h @@ -113,14 +113,20 @@ public: if ((reinterpret_cast(p) & 2048) == 0) { memcpy(&n[0], p, 8); - n[0] &= -1ULL >> s; - } + if constexpr (std::endian::native == std::endian::little) + n[0] &= -1ULL >> s; + else + n[0] &= -1ULL << s; + } else { const char * lp = x.data + x.size - 8; memcpy(&n[0], lp, 8); - n[0] >>= s; - } + if constexpr (std::endian::native == std::endian::little) + n[0] >>= s; + else + n[0] <<= s; + } auto res = hash(k8); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); @@ -131,8 +137,11 @@ public: memcpy(&n[0], p, 8); const char * lp = x.data + x.size - 8; memcpy(&n[1], lp, 8); - n[1] >>= s; - auto res = hash(k16); + if constexpr (std::endian::native == std::endian::little) + n[1] >>= s; + else + n[1] <<= s; + auto res = hash(k16); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); return func(self.impls[buck].m2, k16, res); @@ -142,8 +151,11 @@ public: memcpy(&n[0], p, 16); const char * lp = x.data + x.size - 8; memcpy(&n[2], lp, 8); - n[2] >>= s; - auto res = hash(k24); + if constexpr (std::endian::native == std::endian::little) + n[2] >>= s; + else + n[2] <<= s; + auto res = hash(k24); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); return func(self.impls[buck].m3, k24, res); From a89bfb9ec507c08c732cb1088915914a42485e41 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Thu, 22 Jun 2023 08:59:44 -0700 Subject: [PATCH 0654/1997] Revert back to value type as requested --- src/IO/WriteHelpers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 2ee1e1651f6..3d1a5aa49ef 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -1183,13 +1183,13 @@ inline void writeBinaryEndian(T x, WriteBuffer & buf) } template -inline void writeBinaryLittleEndian(const T & x, WriteBuffer & buf) +inline void writeBinaryLittleEndian(T x, WriteBuffer & buf) { writeBinaryEndian(x, buf); } template -inline void writeBinaryBigEndian(const T & x, WriteBuffer & buf) +inline void writeBinaryBigEndian(T x, WriteBuffer & buf) { writeBinaryEndian(x, buf); } From 2498170253f91f7406452b99ef1dd168c8a9ec2d Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Wed, 21 Jun 2023 23:46:12 +0000 Subject: [PATCH 0655/1997] Fix use-after-free in StorageURL when switching URLs --- src/Formats/FormatFactory.h | 1 + src/Storages/StorageURL.cpp | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index 1d258beca8d..489db944ee6 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -153,6 +153,7 @@ public: /// * Parallel reading. /// To enable it, make sure `buf` is a SeekableReadBuffer implementing readBigAt(). /// * Parallel parsing. + /// `buf` must outlive the returned IInputFormat. InputFormatPtr getInput( const String & name, ReadBuffer & buf, diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 1d6aed204cb..aa574ef11be 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -361,6 +361,8 @@ Chunk StorageURLSource::generate() pipeline->reset(); reader.reset(); + input_format.reset(); + read_buf.reset(); } return {}; } From 0823d610a398ffe435e3b3d163e691f72eca89f5 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Thu, 22 Jun 2023 00:57:50 +0000 Subject: [PATCH 0656/1997] A test --- .../0_stateless/02790_url_multiple_tsv_files.reference | 5 +++++ tests/queries/0_stateless/02790_url_multiple_tsv_files.sql | 5 +++++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/02790_url_multiple_tsv_files.reference create mode 100644 tests/queries/0_stateless/02790_url_multiple_tsv_files.sql diff --git a/tests/queries/0_stateless/02790_url_multiple_tsv_files.reference b/tests/queries/0_stateless/02790_url_multiple_tsv_files.reference new file mode 100644 index 00000000000..927a33afecf --- /dev/null +++ b/tests/queries/0_stateless/02790_url_multiple_tsv_files.reference @@ -0,0 +1,5 @@ +136 +136 +136 +136 +136 diff --git a/tests/queries/0_stateless/02790_url_multiple_tsv_files.sql b/tests/queries/0_stateless/02790_url_multiple_tsv_files.sql new file mode 100644 index 00000000000..b71bd7c737a --- /dev/null +++ b/tests/queries/0_stateless/02790_url_multiple_tsv_files.sql @@ -0,0 +1,5 @@ +select sum(*) from (select * from url('http://127.0.0.1:8123?query=select+{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}+as+x+format+TSV', 'TSV') settings max_threads=1, max_download_threads=1); +select sum(*) from (select * from url('http://127.0.0.1:8123?query=select+{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}+as+x+format+CSV', 'CSV') settings max_threads=1, max_download_threads=1); +select sum(*) from (select * from url('http://127.0.0.1:8123?query=select+{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}+as+x+format+JSONEachRow', 'JSONEachRow') settings max_threads=1, max_download_threads=1); +select sum(*) from (select * from url('http://127.0.0.1:8123?query=select+{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}+as+x+format+TSKV', 'TSKV') settings max_threads=1, max_download_threads=1); +select sum(*) from (select * from url('http://127.0.0.1:8123?query=select+{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}+as+x+format+Native', 'Native') settings max_threads=1, max_download_threads=1); From 456709488ead67d890d965fb04f10e5fcf4fa307 Mon Sep 17 00:00:00 2001 From: Mike Kot Date: Thu, 22 Jun 2023 20:03:36 +0300 Subject: [PATCH 0657/1997] Update ext-dict-functions.md --- docs/ru/sql-reference/functions/ext-dict-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/ext-dict-functions.md b/docs/ru/sql-reference/functions/ext-dict-functions.md index e6cb878d1c7..d14f0ddf027 100644 --- a/docs/ru/sql-reference/functions/ext-dict-functions.md +++ b/docs/ru/sql-reference/functions/ext-dict-functions.md @@ -5,7 +5,7 @@ sidebar_label: "Функции для работы с внешними слов --- :::note "Внимание" - Для словарей, созданных с помощью [DDL-запросов](../../sql-reference/statements/create/dictionary.md), в параметре `dict_name` указывается полное имя словаря вместе с базой данных, например: `.`. Если база данных не указана, используется текущая. +Для словарей, созданных с помощью [DDL-запросов](../../sql-reference/statements/create/dictionary.md), в параметре `dict_name` указывается полное имя словаря вместе с базой данных, например: `.`. Если база данных не указана, используется текущая. ::: # Функции для работы с внешними словарями {#ext_dict_functions} From 8afb8bf13afd654a6706b69f58da83b6096770db Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 22 Jun 2023 19:06:28 +0200 Subject: [PATCH 0658/1997] disable table structure check for secondary queries from Replicated db --- src/Databases/DatabaseReplicated.cpp | 4 +- src/Databases/DatabaseReplicatedWorker.cpp | 1 + .../MergeTree/registerStorageMergeTree.cpp | 8 ++- src/Storages/StorageReplicatedMergeTree.cpp | 39 ++++++++++----- src/Storages/StorageReplicatedMergeTree.h | 5 +- .../test_replicated_database/test.py | 49 +++++++++++++++++++ 6 files changed, 90 insertions(+), 16 deletions(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index e468e533818..de40ee4d82d 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -985,7 +985,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep const auto & create_query_string = metadata_it->second; if (isTableExist(table_name, getContext())) { - assert(create_query_string == readMetadataFile(table_name)); + assert(create_query_string == readMetadataFile(table_name) || getTableUUIDIfReplicated(create_query_string, getContext()) != UUIDHelpers::Nil); continue; } @@ -1274,7 +1274,7 @@ void DatabaseReplicated::commitAlterTable(const StorageID & table_id, const String & statement, ContextPtr query_context) { auto txn = query_context->getZooKeeperMetadataTransaction(); - assert(!ddl_worker->isCurrentlyActive() || txn); + assert(!ddl_worker || !ddl_worker->isCurrentlyActive() || txn); if (txn && txn->isInitialQuery()) { String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(table_id.table_name); diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index 66ae5cd250c..ff2675dfd6b 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -91,6 +91,7 @@ void DatabaseReplicatedDDLWorker::initializeReplication() if (zookeeper->tryGet(database->replica_path + "/digest", digest_str)) { digest = parse(digest_str); + LOG_TRACE(log, "Metadata digest in ZooKeeper: {}", digest); std::lock_guard lock{database->metadata_mutex}; local_digest = database->tables_metadata_digest; } diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 027cd1af7c9..75f1542e30e 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -23,6 +23,7 @@ #include #include #include +#include namespace DB @@ -684,6 +685,10 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (replicated) { + bool need_check_table_structure = true; + if (auto txn = args.getLocalContext()->getZooKeeperMetadataTransaction()) + need_check_table_structure = txn->isInitialQuery(); + return std::make_shared( zookeeper_path, replica_name, @@ -696,7 +701,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) merging_params, std::move(storage_settings), args.has_force_restore_data_flag, - renaming_restrictions); + renaming_restrictions, + need_check_table_structure); } else return std::make_shared( diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index bb99e21e4ab..8f34fca85c5 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -292,7 +292,8 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( const MergingParams & merging_params_, std::unique_ptr settings_, bool has_force_restore_data_flag, - RenamingRestrictions renaming_restrictions_) + RenamingRestrictions renaming_restrictions_, + bool need_check_structure) : MergeTreeData(table_id_, metadata_, context_, @@ -492,11 +493,17 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( /// information in /replica/metadata. other_replicas_fixed_granularity = checkFixedGranularityInZookeeper(); - checkTableStructure(zookeeper_path, metadata_snapshot); + /// Allow structure mismatch for secondary queries from Replicated database. + /// It may happen if the table was altered just after creation. + /// Metadata will be updated in cloneMetadataIfNeeded(...), metadata_version will be 0 for a while. + bool same_structure = checkTableStructure(zookeeper_path, metadata_snapshot, need_check_structure); - Coordination::Stat metadata_stat; - current_zookeeper->get(zookeeper_path + "/metadata", &metadata_stat); - setInMemoryMetadata(metadata_snapshot->withMetadataVersion(metadata_stat.version)); + if (same_structure) + { + Coordination::Stat metadata_stat; + current_zookeeper->get(zookeeper_path + "/metadata", &metadata_stat); + setInMemoryMetadata(metadata_snapshot->withMetadataVersion(metadata_stat.version)); + } } catch (Coordination::Exception & e) { @@ -1186,7 +1193,7 @@ bool StorageReplicatedMergeTree::removeTableNodesFromZooKeeper(zkutil::ZooKeeper /** Verify that list of columns and table storage_settings_ptr match those specified in ZK (/metadata). * If not, throw an exception. */ -void StorageReplicatedMergeTree::checkTableStructure(const String & zookeeper_prefix, const StorageMetadataPtr & metadata_snapshot) +bool StorageReplicatedMergeTree::checkTableStructure(const String & zookeeper_prefix, const StorageMetadataPtr & metadata_snapshot, bool strict_check) { auto zookeeper = getZooKeeper(); @@ -1201,12 +1208,20 @@ void StorageReplicatedMergeTree::checkTableStructure(const String & zookeeper_pr auto columns_from_zk = ColumnsDescription::parse(zookeeper->get(fs::path(zookeeper_prefix) / "columns", &columns_stat)); const ColumnsDescription & old_columns = metadata_snapshot->getColumns(); - if (columns_from_zk != old_columns) + if (columns_from_zk == old_columns) + return true; + + if (!strict_check && metadata_stat.version != 0) { - throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, - "Table columns structure in ZooKeeper is different from local table structure. Local columns:\n" - "{}\nZookeeper columns:\n{}", old_columns.toString(), columns_from_zk.toString()); + LOG_WARNING(log, "Table columns structure in ZooKeeper is different from local table structure. " + "Assuming it's because the table was altered concurrently. Metadata version: {}. Local columns:\n" + "{}\nZookeeper columns:\n{}", metadata_stat.version, old_columns.toString(), columns_from_zk.toString()); + return false; } + + throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, + "Table columns structure in ZooKeeper is different from local table structure. Local columns:\n" + "{}\nZookeeper columns:\n{}", old_columns.toString(), columns_from_zk.toString()); } void StorageReplicatedMergeTree::setTableStructure(const StorageID & table_id, const ContextPtr & local_context, @@ -2993,7 +3008,9 @@ void StorageReplicatedMergeTree::cloneMetadataIfNeeded(const String & source_rep dummy_alter.alter_version = source_metadata_version; dummy_alter.create_time = time(nullptr); - zookeeper->create(replica_path + "/queue/queue-", dummy_alter.toString(), zkutil::CreateMode::PersistentSequential); + String path_created = zookeeper->create(replica_path + "/queue/queue-", dummy_alter.toString(), zkutil::CreateMode::PersistentSequential); + LOG_INFO(log, "Created an ALTER_METADATA entry {} to force metadata update after cloning replica from {}. Entry: {}", + path_created, source_replica, dummy_alter.toString()); /// We don't need to do anything with mutation_pointer, because mutation log cleanup process is different from /// replication log cleanup. A mutation is removed from ZooKeeper only if all replicas had executed the mutation, diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index c08e05090b1..bdd3f0da5bf 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -108,7 +108,8 @@ public: const MergingParams & merging_params_, std::unique_ptr settings_, bool has_force_restore_data_flag, - RenamingRestrictions renaming_restrictions_); + RenamingRestrictions renaming_restrictions_, + bool need_check_structure); void startup() override; void shutdown() override; @@ -529,7 +530,7 @@ private: */ void createNewZooKeeperNodes(); - void checkTableStructure(const String & zookeeper_prefix, const StorageMetadataPtr & metadata_snapshot); + bool checkTableStructure(const String & zookeeper_prefix, const StorageMetadataPtr & metadata_snapshot, bool strict_check = true); /// A part of ALTER: apply metadata changes only (data parts are altered separately). /// Must be called under IStorage::lockForAlter() lock. diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index 2ab2fe499ff..8d4244b69b5 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -34,6 +34,7 @@ competing_node = cluster.add_instance( main_configs=["configs/config.xml"], user_configs=["configs/settings.xml"], with_zookeeper=True, + stay_alive=True, macros={"shard": 1, "replica": 3}, ) snapshotting_node = cluster.add_instance( @@ -1272,3 +1273,51 @@ def test_recover_digest_mismatch(started_cluster): dummy_node.query("DROP DATABASE IF EXISTS recover_digest_mismatch") print("Everything Okay") + +def test_replicated_table_structure_alter(started_cluster): + main_node.query("DROP DATABASE IF EXISTS table_structure") + dummy_node.query("DROP DATABASE IF EXISTS table_structure") + + main_node.query( + "CREATE DATABASE table_structure ENGINE = Replicated('/clickhouse/databases/table_structure', 'shard1', 'replica1');" + ) + dummy_node.query( + "CREATE DATABASE table_structure ENGINE = Replicated('/clickhouse/databases/table_structure', 'shard1', 'replica2');" + ) + competing_node.query( + "CREATE DATABASE table_structure ENGINE = Replicated('/clickhouse/databases/table_structure', 'shard1', 'replica3');" + ) + + competing_node.query("CREATE TABLE table_structure.mem (n int) ENGINE=Memory") + dummy_node.query("DETACH DATABASE table_structure") + + settings = {"distributed_ddl_task_timeout": 0} + main_node.query("CREATE TABLE table_structure.rmt (n int, v UInt64) ENGINE=ReplicatedReplacingMergeTree(v) ORDER BY n", settings=settings) + + competing_node.query("SYSTEM SYNC DATABASE REPLICA table_structure") + competing_node.query("DETACH DATABASE table_structure") + + main_node.query("ALTER TABLE table_structure.rmt ADD COLUMN m int", settings=settings) + main_node.query("ALTER TABLE table_structure.rmt COMMENT COLUMN v 'version'", settings=settings) + main_node.query("INSERT INTO table_structure.rmt VALUES (1, 2, 3)") + + command = "rm -f /var/lib/clickhouse/metadata/table_structure/mem.sql" + competing_node.exec_in_container(["bash", "-c", command]) + competing_node.restart_clickhouse(kill=True) + + dummy_node.query("ATTACH DATABASE table_structure") + dummy_node.query("SYSTEM SYNC DATABASE REPLICA table_structure") + dummy_node.query("SYSTEM SYNC REPLICA table_structure.rmt") + assert "1\t2\t3\n" == dummy_node.query("SELECT * FROM table_structure.rmt") + + competing_node.query("SYSTEM SYNC DATABASE REPLICA table_structure") + competing_node.query("SYSTEM SYNC REPLICA table_structure.rmt") + #time.sleep(600) + assert "mem" in competing_node.query("SHOW TABLES FROM table_structure") + assert "1\t2\t3\n" == competing_node.query("SELECT * FROM table_structure.rmt") + + main_node.query("ALTER TABLE table_structure.rmt ADD COLUMN k int") + main_node.query("INSERT INTO table_structure.rmt VALUES (1, 2, 3, 4)") + dummy_node.query("SYSTEM SYNC DATABASE REPLICA table_structure") + dummy_node.query("SYSTEM SYNC REPLICA table_structure.rmt") + assert "1\t2\t3\t0\n1\t2\t3\t4\n" == dummy_node.query("SELECT * FROM table_structure.rmt ORDER BY k") From 376b65ee1efeeec438d52750e57182aef7379cad Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 22 Jun 2023 19:06:49 +0200 Subject: [PATCH 0659/1997] Define Thrift version for parquet --- contrib/arrow-cmake/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index 5fe942d1cd0..836452c53ed 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -514,6 +514,10 @@ if (SANITIZE STREQUAL "undefined") target_compile_options(_arrow PRIVATE -fno-sanitize=undefined) endif () +# Define Thrift version for parquet (we use 0.16.0) +add_definitions(-DPARQUET_THRIFT_VERSION_MAJOR=0) +add_definitions(-DPARQUET_THRIFT_VERSION_MINOR=16) + # === tools set(TOOLS_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/tools/parquet") From 1d379108a314abc9744069c4e697b9e87818fa7b Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 22 Jun 2023 19:23:14 +0200 Subject: [PATCH 0660/1997] Update arrow version in cmake --- contrib/arrow-cmake/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index 836452c53ed..46b86cb4ddb 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -31,12 +31,12 @@ endif() set (CMAKE_CXX_STANDARD 17) -set(ARROW_VERSION "6.0.1") +set(ARROW_VERSION "11.0.0") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}") -set(ARROW_VERSION_MAJOR "6") +set(ARROW_VERSION_MAJOR "11") set(ARROW_VERSION_MINOR "0") -set(ARROW_VERSION_PATCH "1") +set(ARROW_VERSION_PATCH "0") if(ARROW_VERSION_MAJOR STREQUAL "0") # Arrow 0.x.y => SO version is "x", full SO version is "x.y.0" From cf082f2f9a68c21241c9b6667a8e4241da220601 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 22 Jun 2023 17:24:43 +0000 Subject: [PATCH 0661/1997] Use read_bytes/total_bytes_to_read for progress bar in s3/file/url/... table functions --- .../IO/ReadBufferFromAzureBlobStorage.cpp | 8 +- src/Disks/IO/ReadBufferFromAzureBlobStorage.h | 5 +- .../AzureBlobStorage/AzureObjectStorage.cpp | 10 +- .../AzureBlobStorage/AzureObjectStorage.h | 5 + src/IO/Progress.h | 9 +- src/IO/ReadBufferFromFileBase.cpp | 2 +- src/IO/ReadBufferFromFileBase.h | 2 +- src/IO/ReadBufferFromS3.cpp | 15 +- src/IO/ReadBufferFromS3.h | 6 +- src/IO/ReadWriteBufferFromHTTP.cpp | 15 +- src/IO/ReadWriteBufferFromHTTP.h | 5 +- .../Executors/ExecutionThreadContext.cpp | 3 + src/Processors/IProcessor.h | 1 + src/Processors/ISource.h | 1 + src/Processors/Sources/RemoteSource.cpp | 2 + src/QueryPipeline/ReadProgressCallback.cpp | 12 ++ src/QueryPipeline/ReadProgressCallback.h | 3 + src/Storages/HDFS/ReadBufferFromHDFS.cpp | 29 +++- src/Storages/HDFS/ReadBufferFromHDFS.h | 6 +- src/Storages/HDFS/StorageHDFS.cpp | 74 ++++----- src/Storages/HDFS/StorageHDFS.h | 6 - src/Storages/StorageAzureBlob.cpp | 83 ++++------ src/Storages/StorageAzureBlob.h | 18 +-- src/Storages/StorageFile.cpp | 32 +--- src/Storages/StorageS3.cpp | 152 ++++++++---------- src/Storages/StorageS3.h | 29 +--- src/Storages/StorageS3Cluster.cpp | 2 +- src/Storages/StorageURL.cpp | 37 ++--- src/Storages/StorageURL.h | 8 +- 29 files changed, 268 insertions(+), 312 deletions(-) diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp index 129bb97be09..6a328de0341 100644 --- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace ProfileEvents @@ -36,7 +37,8 @@ ReadBufferFromAzureBlobStorage::ReadBufferFromAzureBlobStorage( size_t max_single_download_retries_, bool use_external_buffer_, bool restricted_seek_, - size_t read_until_position_) + size_t read_until_position_, + std::function progress_callback_) : ReadBufferFromFileBase(use_external_buffer_ ? 0 : read_settings_.remote_fs_buffer_size, nullptr, 0) , blob_container_client(blob_container_client_) , path(path_) @@ -47,6 +49,7 @@ ReadBufferFromAzureBlobStorage::ReadBufferFromAzureBlobStorage( , use_external_buffer(use_external_buffer_) , restricted_seek(restricted_seek_) , read_until_position(read_until_position_) + , progress_callback(progress_callback_) { if (!use_external_buffer) { @@ -127,6 +130,9 @@ bool ReadBufferFromAzureBlobStorage::nextImpl() if (bytes_read == 0) return false; + if (progress_callback) + progress_callback(FileProgress(bytes_read)); + BufferBase::set(data_ptr, bytes_read, 0); offset += bytes_read; diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.h b/src/Disks/IO/ReadBufferFromAzureBlobStorage.h index 4e21f543653..6f683dcf1ce 100644 --- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.h @@ -25,7 +25,8 @@ public: size_t max_single_download_retries_, bool use_external_buffer_ = false, bool restricted_seek_ = false, - size_t read_until_position_ = 0); + size_t read_until_position_ = 0, + std::function progress_callback_ = {}); off_t seek(off_t off, int whence) override; @@ -74,6 +75,8 @@ private: size_t data_capacity; Poco::Logger * log = &Poco::Logger::get("ReadBufferFromAzureBlobStorage"); + + std::function progress_callback; }; } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index dbb41851053..982c376404a 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -191,7 +191,7 @@ std::unique_ptr AzureObjectStorage::readObject( /// NOLI return std::make_unique( client.get(), object.remote_path, patchSettings(read_settings), settings_ptr->max_single_read_retries, - settings_ptr->max_single_download_retries); + settings_ptr->max_single_download_retries, false, false, 0, progress_callback); } std::unique_ptr AzureObjectStorage::readObjects( /// NOLINT @@ -216,7 +216,8 @@ std::unique_ptr AzureObjectStorage::readObjects( /// NOL settings_ptr->max_single_download_retries, /* use_external_buffer */true, /* restricted_seek */true, - read_until_position); + read_until_position, + progress_callback); }; switch (read_settings.remote_fs_method) @@ -390,6 +391,11 @@ std::unique_ptr AzureObjectStorage::cloneObjectStorage(const std ); } +void AzureObjectStorage::setProgressCallback(const ContextPtr & context) +{ + progress_callback = context->getFileProgressCallback(); +} + } #endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index b5f81cef235..ee144cdd56e 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -123,6 +123,9 @@ public: bool isRemote() const override { return true; } + /// Set progress callback to read buffer while reading from storage. + void setProgressCallback(const ContextPtr & context); + private: const String name; /// client used to access the files in the Blob Storage cloud @@ -132,6 +135,8 @@ private: Poco::Logger * log; DataSourceDescription data_source_description; + + std::function progress_callback; }; } diff --git a/src/IO/Progress.h b/src/IO/Progress.h index c21b1b854b0..a68ff9bc5c2 100644 --- a/src/IO/Progress.h +++ b/src/IO/Progress.h @@ -40,9 +40,10 @@ struct ReadProgress UInt64 read_rows = 0; UInt64 read_bytes = 0; UInt64 total_rows_to_read = 0; + UInt64 total_bytes_to_read = 0; - ReadProgress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0) - : read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_) {} + ReadProgress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0, UInt64 total_bytes_to_read_ = 0) + : read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_), total_bytes_to_read(total_bytes_to_read_) {} }; struct WriteProgress @@ -98,8 +99,8 @@ struct Progress Progress() = default; - Progress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0) - : read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_) {} + Progress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0, UInt64 total_bytes_to_read_ = 0) + : read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_), total_bytes_to_read(total_bytes_to_read_) {} explicit Progress(ReadProgress read_progress) : read_rows(read_progress.read_rows), read_bytes(read_progress.read_bytes), total_rows_to_read(read_progress.total_rows_to_read) {} diff --git a/src/IO/ReadBufferFromFileBase.cpp b/src/IO/ReadBufferFromFileBase.cpp index 4181615bc52..4ac3f984f78 100644 --- a/src/IO/ReadBufferFromFileBase.cpp +++ b/src/IO/ReadBufferFromFileBase.cpp @@ -42,7 +42,7 @@ void ReadBufferFromFileBase::setProgressCallback(ContextPtr context) setProfileCallback([file_progress_callback](const ProfileInfo & progress) { - file_progress_callback(FileProgress(progress.bytes_read, 0)); + file_progress_callback(FileProgress(progress.bytes_read)); }); } diff --git a/src/IO/ReadBufferFromFileBase.h b/src/IO/ReadBufferFromFileBase.h index b77db29bc23..2abdf883ab0 100644 --- a/src/IO/ReadBufferFromFileBase.h +++ b/src/IO/ReadBufferFromFileBase.h @@ -52,7 +52,7 @@ public: size_t getFileSize() override; - void setProgressCallback(ContextPtr context); + virtual void setProgressCallback(ContextPtr context); protected: std::optional file_size; diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index d1cb1ec9ab0..36ff81a85d4 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -162,12 +163,13 @@ bool ReadBufferFromS3::nextImpl() offset += working_buffer.size(); if (read_settings.remote_throttler) read_settings.remote_throttler->add(working_buffer.size(), ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds); - + if (progress_callback) + progress_callback(FileProgress(working_buffer.size())); return true; } -size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, const std::function & progress_callback) +size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, const std::function & custom_progress_callback) { if (n == 0) return 0; @@ -184,7 +186,9 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons auto result = sendRequest(range_begin, range_begin + n - 1); std::istream & istr = result.GetBody(); - size_t bytes = copyFromIStreamWithProgressCallback(istr, to, n, progress_callback); + size_t bytes = copyFromIStreamWithProgressCallback(istr, to, n, custom_progress_callback); + if (progress_callback) + progress_callback(FileProgress(bytes, 0)); ProfileEvents::increment(ProfileEvents::ReadBufferFromS3Bytes, bytes); @@ -415,6 +419,11 @@ Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t range_begin } } +void ReadBufferFromS3::setProgressCallback(DB::ContextPtr context) +{ + progress_callback = context->getFileProgressCallback(); +} + } #endif diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index 0f665861a1e..824038c7af0 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -77,10 +77,12 @@ public: String getFileName() const override { return bucket + "/" + key; } - size_t readBigAt(char * to, size_t n, size_t range_begin, const std::function & progress_callback) override; + size_t readBigAt(char * to, size_t n, size_t range_begin, const std::function & custom_progress_callback) override; bool supportsReadAt() override { return true; } + void setProgressCallback(ContextPtr context) override; + private: std::unique_ptr initialize(); @@ -100,6 +102,8 @@ private: /// There is different seek policy for disk seek and for non-disk seek /// (non-disk seek is applied for seekable input formats: orc, arrow, parquet). bool restricted_seek; + + std::function progress_callback; }; } diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index cf1159bfb4b..7bd7f4a9b8e 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -587,11 +587,13 @@ bool ReadWriteBufferFromHTTPBase::nextImpl() internal_buffer = impl->buffer(); working_buffer = internal_buffer; offset_from_begin_pos += working_buffer.size(); + if (progress_callback) + progress_callback(FileProgress(working_buffer.size())); return true; } template -size_t ReadWriteBufferFromHTTPBase::readBigAt(char * to, size_t n, size_t offset, const std::function & progress_callback) +size_t ReadWriteBufferFromHTTPBase::readBigAt(char * to, size_t n, size_t offset, const std::function & custom_progress_callback) { /// Caller must have checked supportsReadAt(). /// This ensures we've sent at least one HTTP request and populated saved_uri_redirect. @@ -633,8 +635,9 @@ size_t ReadWriteBufferFromHTTPBase::readBigAt(char * to, si toString(response.getStatus()), uri_.toString(), offset, offset + n); bool cancelled; - size_t r = copyFromIStreamWithProgressCallback(*result_istr, to, n, progress_callback, &cancelled); - + size_t r = copyFromIStreamWithProgressCallback(*result_istr, to, n, custom_progress_callback, &cancelled); + if (progress_callback) + progress_callback(FileProgress(r)); return r; } catch (const Poco::Exception & e) @@ -780,6 +783,12 @@ void ReadWriteBufferFromHTTPBase::setNextCallback(NextCallb next_callback(count()); } +template +void ReadWriteBufferFromHTTPBase::setProgressCallback(std::function file_progress_callback_) +{ + progress_callback = file_progress_callback_; +} + template const std::string & ReadWriteBufferFromHTTPBase::getCompressionMethod() const { return content_encoding; } diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 2d2ae5fe724..18bd31fcdce 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -92,6 +92,7 @@ namespace detail HTTPHeaderEntries http_header_entries; const RemoteHostFilter * remote_host_filter = nullptr; std::function next_callback; + std::function progress_callback; size_t buffer_size; bool use_external_buffer; @@ -176,7 +177,7 @@ namespace detail bool nextImpl() override; - size_t readBigAt(char * to, size_t n, size_t offset, const std::function & progress_callback) override; + size_t readBigAt(char * to, size_t n, size_t offset, const std::function & custom_progress_callback) override; off_t getPosition() override; @@ -199,6 +200,8 @@ namespace detail /// passed through the buffer void setNextCallback(NextCallback next_callback_); + void setProgressCallback(std::function progress_callback_); + const std::string & getCompressionMethod() const; std::optional getLastModificationTime(); diff --git a/src/Processors/Executors/ExecutionThreadContext.cpp b/src/Processors/Executors/ExecutionThreadContext.cpp index 794f478b272..0fa7e0b552f 100644 --- a/src/Processors/Executors/ExecutionThreadContext.cpp +++ b/src/Processors/Executors/ExecutionThreadContext.cpp @@ -56,6 +56,9 @@ static void executeJob(ExecutingGraph::Node * node, ReadProgressCallback * read_ if (read_progress->counters.total_rows_approx) read_progress_callback->addTotalRowsApprox(read_progress->counters.total_rows_approx); + if (read_progress->counters.total_bytes) + read_progress_callback->addTotalBytes(read_progress->counters.total_bytes); + if (!read_progress_callback->onProgress(read_progress->counters.read_rows, read_progress->counters.read_bytes, read_progress->limits)) node->processor->cancel(); } diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 34322acb2af..c6bef186877 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -343,6 +343,7 @@ public: uint64_t read_rows = 0; uint64_t read_bytes = 0; uint64_t total_rows_approx = 0; + uint64_t total_bytes = 0; }; struct ReadProgress diff --git a/src/Processors/ISource.h b/src/Processors/ISource.h index 292f79ba348..2593a241c63 100644 --- a/src/Processors/ISource.h +++ b/src/Processors/ISource.h @@ -43,6 +43,7 @@ public: std::optional getReadProgress() final; void addTotalRowsApprox(size_t value) { read_progress.total_rows_approx += value; } + void addTotalBytes(size_t value) { read_progress.total_bytes += value; } }; using SourcePtr = std::shared_ptr; diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 310a1d33e28..74ab3649068 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -77,6 +77,8 @@ std::optional RemoteSource::tryGenerate() { if (value.total_rows_to_read) addTotalRowsApprox(value.total_rows_to_read); + if (value.total_bytes_to_read) + addTotalBytes(value.total_bytes_to_read); progress(value.read_rows, value.read_bytes); }); diff --git a/src/QueryPipeline/ReadProgressCallback.cpp b/src/QueryPipeline/ReadProgressCallback.cpp index 0f50d56f1a5..4d7c7aa0f2a 100644 --- a/src/QueryPipeline/ReadProgressCallback.cpp +++ b/src/QueryPipeline/ReadProgressCallback.cpp @@ -63,6 +63,18 @@ bool ReadProgressCallback::onProgress(uint64_t read_rows, uint64_t read_bytes, c process_list_elem->updateProgressIn(total_rows_progress); } + size_t bytes = 0; + if ((bytes = total_bytes.exchange(0)) != 0) + { + Progress total_bytes_progress = {0, 0, 0, bytes}; + + if (progress_callback) + progress_callback(total_bytes_progress); + + if (process_list_elem) + process_list_elem->updateProgressIn(total_bytes_progress); + } + Progress value {read_rows, read_bytes}; if (progress_callback) diff --git a/src/QueryPipeline/ReadProgressCallback.h b/src/QueryPipeline/ReadProgressCallback.h index 08f2f9fc99b..5dbf3344bdf 100644 --- a/src/QueryPipeline/ReadProgressCallback.h +++ b/src/QueryPipeline/ReadProgressCallback.h @@ -23,6 +23,7 @@ public: void setProcessListElement(QueryStatusPtr elem); void setProgressCallback(const ProgressCallback & callback) { progress_callback = callback; } void addTotalRowsApprox(size_t value) { total_rows_approx += value; } + void addTotalBytes(size_t value) { total_bytes += value; } /// Skip updating profile events. /// For merges in mutations it may need special logic, it's done inside ProgressCallback. @@ -37,6 +38,8 @@ private: /// The approximate total number of rows to read. For progress bar. std::atomic_size_t total_rows_approx = 0; + /// The total number of bytes to read. For progress bar. + std::atomic_size_t total_bytes = 0; std::mutex limits_and_quotas_mutex; Stopwatch total_stopwatch{CLOCK_MONOTONIC_COARSE}; /// Including waiting time diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index ee8e0764db0..2c2c5047cb1 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -3,6 +3,7 @@ #if USE_HDFS #include #include +#include #include #include #include @@ -42,19 +43,23 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory file_size; + explicit ReadBufferFromHDFSImpl( const std::string & hdfs_uri_, const std::string & hdfs_file_path_, const Poco::Util::AbstractConfiguration & config_, const ReadSettings & read_settings_, size_t read_until_position_, - bool use_external_buffer_) + bool use_external_buffer_, + std::optional file_size_) : BufferWithOwnMemory(use_external_buffer_ ? 0 : read_settings_.remote_fs_buffer_size) , hdfs_uri(hdfs_uri_) , hdfs_file_path(hdfs_file_path_) , builder(createHDFSBuilder(hdfs_uri_, config_)) , read_settings(read_settings_) , read_until_position(read_until_position_) + , file_size(file_size_) { fs = createHDFSFS(builder.get()); fin = hdfsOpenFile(fs.get(), hdfs_file_path.c_str(), O_RDONLY, 0, 0, 0); @@ -70,12 +75,16 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemorymSize; + file_size = static_cast(file_info->mSize); + return *file_size; } bool nextImpl() override @@ -156,10 +165,11 @@ ReadBufferFromHDFS::ReadBufferFromHDFS( const Poco::Util::AbstractConfiguration & config_, const ReadSettings & read_settings_, size_t read_until_position_, - bool use_external_buffer_) + bool use_external_buffer_, + std::optional file_size_) : ReadBufferFromFileBase(read_settings_.remote_fs_buffer_size, nullptr, 0) , impl(std::make_unique( - hdfs_uri_, hdfs_file_path_, config_, read_settings_, read_until_position_, use_external_buffer_)) + hdfs_uri_, hdfs_file_path_, config_, read_settings_, read_until_position_, use_external_buffer_, file_size_)) , use_external_buffer(use_external_buffer_) { } @@ -188,7 +198,11 @@ bool ReadBufferFromHDFS::nextImpl() auto result = impl->next(); if (result) + { BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); /// use the buffer returned by `impl` + if (progress_callback) + progress_callback(FileProgress(working_buffer.size())); + } return result; } @@ -248,6 +262,11 @@ String ReadBufferFromHDFS::getFileName() const return impl->hdfs_file_path; } +void ReadBufferFromHDFS::setProgressCallback(DB::ContextPtr context) +{ + progress_callback = context->getFileProgressCallback(); +} + } #endif diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.h b/src/Storages/HDFS/ReadBufferFromHDFS.h index 6aed3ddff26..3dce6a93cba 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.h +++ b/src/Storages/HDFS/ReadBufferFromHDFS.h @@ -29,7 +29,8 @@ public: const Poco::Util::AbstractConfiguration & config_, const ReadSettings & read_settings_, size_t read_until_position_ = 0, - bool use_external_buffer = false); + bool use_external_buffer = false, + std::optional file_size = std::nullopt); ~ReadBufferFromHDFS() override; @@ -47,9 +48,12 @@ public: String getFileName() const override; + void setProgressCallback(ContextPtr context) override; + private: std::unique_ptr impl; bool use_external_buffer; + std::function progress_callback; }; } diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 583c45a0633..79cda3050d6 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -259,8 +259,13 @@ public: { const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); uris = getPathsList(path_from_uri, uri_without_path, context_); + auto file_progress_callback = context_->getFileProgressCallback(); for (auto & elem : uris) + { elem.path = uri_without_path + elem.path; + if (file_progress_callback && elem.info) + file_progress_callback(FileProgress(0, elem.info->size)); + } uris_iter = uris.begin(); } @@ -281,37 +286,40 @@ private: std::vector::iterator uris_iter; }; -class HDFSSource::URISIterator::Impl +class HDFSSource::URISIterator::Impl : WithContext { public: - explicit Impl(const std::vector & uris_, ContextPtr context) + explicit Impl(const std::vector & uris_, ContextPtr context_) + : WithContext(context_), uris(uris_), file_progress_callback(context_->getFileProgressCallback()) { - auto path_and_uri = getPathFromUriAndUriWithoutPath(uris_[0]); - HDFSBuilderWrapper builder = createHDFSBuilder(path_and_uri.second + "/", context->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - for (const auto & uri : uris_) - { - path_and_uri = getPathFromUriAndUriWithoutPath(uri); - if (!hdfsExists(fs.get(), path_and_uri.first.c_str())) - uris.push_back(uri); - } - uris_iter = uris.begin(); } StorageHDFS::PathWithInfo next() { - std::lock_guard lock(mutex); - if (uris_iter == uris.end()) + size_t current_index = index.fetch_add(1); + if (current_index >= uris.size()) return {"", {}}; - auto key = *uris_iter; - ++uris_iter; - return {key, {}}; + + auto uri = uris[current_index]; + auto path_and_uri = getPathFromUriAndUriWithoutPath(uri); + HDFSBuilderWrapper builder = createHDFSBuilder(path_and_uri.second + "/", getContext()->getGlobalContext()->getConfigRef()); + auto fs = createHDFSFS(builder.get()); + auto * hdfs_info = hdfsGetPathInfo(fs.get(), path_and_uri.first.c_str()); + std::optional info; + if (hdfs_info) + { + info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; + if (file_progress_callback && hdfs_info) + file_progress_callback(FileProgress(0, hdfs_info->mSize)); + } + + return {uri, info}; } private: - std::mutex mutex; + std::atomic_size_t index = 0; Strings uris; - Strings::iterator uris_iter; + std::function file_progress_callback; }; HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(ContextPtr context_, const String & uri) @@ -348,7 +356,7 @@ HDFSSource::HDFSSource( UInt64 max_block_size_, std::shared_ptr file_iterator_, ColumnsDescription columns_description_) - : ISource(getHeader(block_for_format_, requested_virtual_columns_)) + : ISource(getHeader(block_for_format_, requested_virtual_columns_), false) , WithContext(context_) , storage(std::move(storage_)) , block_for_format(block_for_format_) @@ -374,13 +382,17 @@ bool HDFSSource::initialize() continue; current_path = path_with_info.path; + std::optional file_size; + if (path_with_info.info) + file_size = path_with_info.info->size; const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_path); auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method); auto impl = std::make_unique( - uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); + uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings(), 0, false, file_size); if (!skip_empty_files || !impl->eof()) { + impl->setProgressCallback(getContext()); const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); break; @@ -389,15 +401,7 @@ bool HDFSSource::initialize() current_path = path_with_info.path; - if (path_with_info.info && path_with_info.info->size) - { - /// Adjust total_rows_approx_accumulated with new total size. - if (total_files_size) - total_rows_approx_accumulated = static_cast(std::ceil(static_cast(total_files_size + path_with_info.info->size) / total_files_size * total_rows_approx_accumulated)); - total_files_size += path_with_info.info->size; - } - - input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size); + auto input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size); QueryPipelineBuilder builder; builder.init(Pipe(input_format)); @@ -434,14 +438,7 @@ Chunk HDFSSource::generate() { Columns columns = chunk.getColumns(); UInt64 num_rows = chunk.getNumRows(); - - if (num_rows && total_files_size) - { - size_t chunk_size = input_format->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox(*this, num_rows, chunk_size, total_files_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } + progress(num_rows, 0); for (const auto & virtual_column : requested_virtual_columns) { @@ -465,7 +462,6 @@ Chunk HDFSSource::generate() reader.reset(); pipeline.reset(); - input_format.reset(); read_buf.reset(); if (!initialize()) diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index 74801b68f73..5a3b97a0e3c 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -164,16 +164,10 @@ private: ColumnsDescription columns_description; std::unique_ptr read_buf; - std::shared_ptr input_format; std::unique_ptr pipeline; std::unique_ptr reader; String current_path; - UInt64 total_rows_approx_max = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_accumulated = 0; - size_t total_files_size = 0; - /// Recreate ReadBuffer and PullingPipelineExecutor for each file. bool initialize(); }; diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 336c4eaed9b..8e06ceda885 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -617,13 +617,13 @@ Pipe StorageAzureBlob::read( /// Iterate through disclosed globs and make a source for each file iterator_wrapper = std::make_shared( object_storage.get(), configuration.container, std::nullopt, - configuration.blob_path, query_info.query, virtual_block, local_context, nullptr); + configuration.blob_path, query_info.query, virtual_block, local_context, nullptr, local_context->getFileProgressCallback()); } else { iterator_wrapper = std::make_shared( object_storage.get(), configuration.container, configuration.blobs_paths, - std::nullopt, query_info.query, virtual_block, local_context, nullptr); + std::nullopt, query_info.query, virtual_block, local_context, nullptr, local_context->getFileProgressCallback()); } ColumnsDescription columns_description; @@ -794,15 +794,16 @@ StorageAzureBlobSource::Iterator::Iterator( ASTPtr query_, const Block & virtual_header_, ContextPtr context_, - RelativePathsWithMetadata * outer_blobs_) + RelativePathsWithMetadata * outer_blobs_, + std::function file_progress_callback_) : WithContext(context_) , object_storage(object_storage_) , container(container_) - , keys(keys_) , blob_path_with_globs(blob_path_with_globs_) , query(query_) , virtual_header(virtual_header_) , outer_blobs(outer_blobs_) + , file_progress_callback(file_progress_callback_) { if (keys.has_value() && blob_path_with_globs.has_value()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot specify keys and glob simultaneously it's a bug"); @@ -810,11 +811,10 @@ StorageAzureBlobSource::Iterator::Iterator( if (!keys.has_value() && !blob_path_with_globs.has_value()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Both keys and glob mask are not specified"); - if (keys) + if (keys_) { Strings all_keys = *keys; - blobs_with_metadata.emplace(); /// Create a virtual block with one row to construct filter if (query && virtual_header && !all_keys.empty()) { @@ -843,29 +843,12 @@ StorageAzureBlobSource::Iterator::Iterator( all_keys = std::move(filtered_keys); } } - - for (auto && key : all_keys) - { - ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); - total_size += object_metadata.size_bytes; - blobs_with_metadata->emplace_back(RelativePathWithMetadata{key, object_metadata}); - if (outer_blobs) - outer_blobs->emplace_back(blobs_with_metadata->back()); - } + keys = std::move(all_keys); } else { const String key_prefix = blob_path_with_globs->substr(0, blob_path_with_globs->find_first_of("*?{")); - - /// We don't have to list bucket, because there is no asterisks. - if (key_prefix.size() == blob_path_with_globs->size()) - { - ObjectMetadata object_metadata = object_storage->getObjectMetadata(*blob_path_with_globs); - blobs_with_metadata->emplace_back(*blob_path_with_globs, object_metadata); - if (outer_blobs) - outer_blobs->emplace_back(blobs_with_metadata->back()); - return; - } + assert(key_prefix.size() != blob_path_with_globs->size()); object_storage_iterator = object_storage->iterate(key_prefix); @@ -888,13 +871,17 @@ RelativePathWithMetadata StorageAzureBlobSource::Iterator::next() if (keys) { size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= blobs_with_metadata->size()) + if (current_index >= keys->size()) { is_finished = true; return {}; } - return (*blobs_with_metadata)[current_index]; + auto key = (*keys)[current_index]; + ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); + if (file_progress_callback) + file_progress_callback(FileProgress(0, object_metadata.size_bytes)); + return {key, object_metadata}; } else { @@ -946,11 +933,12 @@ RelativePathWithMetadata StorageAzureBlobSource::Iterator::next() const auto & idxs = typeid_cast(*block.getByName("_idx").column); std::lock_guard lock(next_mutex); - blob_path_with_globs.reset(); - blob_path_with_globs.emplace(); + blobs_with_metadata.reset(); + blobs_with_metadata.emplace(); for (UInt64 idx : idxs.getData()) { - total_size.fetch_add(new_batch[idx].metadata.size_bytes, std::memory_order_relaxed); + if (file_progress_callback) + file_progress_callback(FileProgress(0, new_batch[idx].metadata.size_bytes)); blobs_with_metadata->emplace_back(std::move(new_batch[idx])); if (outer_blobs) outer_blobs->emplace_back(blobs_with_metadata->back()); @@ -963,8 +951,11 @@ RelativePathWithMetadata StorageAzureBlobSource::Iterator::next() std::lock_guard lock(next_mutex); blobs_with_metadata = std::move(new_batch); - for (const auto & [_, info] : *blobs_with_metadata) - total_size.fetch_add(info.size_bytes, std::memory_order_relaxed); + if (file_progress_callback) + { + for (const auto & [_, info] : *blobs_with_metadata) + file_progress_callback(FileProgress(0, info.size_bytes)); + } } } @@ -1011,17 +1002,9 @@ Chunk StorageAzureBlobSource::generate() if (reader->pull(chunk)) { UInt64 num_rows = chunk.getNumRows(); + progress(num_rows, 0); const auto & file_path = reader.getPath(); - if (num_rows && total_objects_size) - { - size_t chunk_size = reader.getFormat()->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox( - *this, num_rows, chunk_size, total_objects_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } - for (const auto & virtual_column : requested_virtual_columns) { if (virtual_column.name == "_path") @@ -1046,13 +1029,6 @@ Chunk StorageAzureBlobSource::generate() if (!reader) break; - size_t object_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0); - /// Adjust total_rows_approx_accumulated with new total size. - if (total_objects_size) - total_rows_approx_accumulated = static_cast( - std::ceil(static_cast(total_objects_size + object_size) / total_objects_size * total_rows_approx_accumulated)); - total_objects_size += object_size; - /// Even if task is finished the thread may be not freed in pool. /// So wait until it will be freed before scheduling a new task. create_reader_pool.wait(); @@ -1083,7 +1059,7 @@ StorageAzureBlobSource::StorageAzureBlobSource( AzureObjectStorage * object_storage_, const String & container_, std::shared_ptr file_iterator_) - :ISource(getHeader(sample_block_, requested_virtual_columns_)) + :ISource(getHeader(sample_block_, requested_virtual_columns_), false) , WithContext(context_) , requested_virtual_columns(requested_virtual_columns_) , format(format_) @@ -1101,13 +1077,7 @@ StorageAzureBlobSource::StorageAzureBlobSource( { reader = createReader(); if (reader) - { - const auto & read_buf = reader.getReadBuffer(); - if (read_buf) - total_objects_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0); - reader_future = createReaderAsync(); - } } @@ -1149,7 +1119,7 @@ StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader() auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); auto current_reader = std::make_unique(*pipeline); - return ReaderHolder{fs::path(container) / current_key, std::move(read_buf), input_format, std::move(pipeline), std::move(current_reader)}; + return ReaderHolder{fs::path(container) / current_key, std::move(read_buf), std::move(pipeline), std::move(current_reader)}; } std::future StorageAzureBlobSource::createReaderAsync() @@ -1163,6 +1133,7 @@ std::unique_ptr StorageAzureBlobSource::createAzureReadBuffer(const read_settings.enable_filesystem_cache = false; auto download_buffer_size = getContext()->getSettings().max_download_buffer_size; const bool object_too_small = object_size <= 2 * download_buffer_size; + object_storage->setProgressCallback(getContext()); // Create a read buffer that will prefetch the first ~1 MB of the file. // When reading lots of tiny files, this prefetching almost doubles the throughput. diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 25c791f1700..a78ba691b57 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -153,7 +153,8 @@ public: ASTPtr query_, const Block & virtual_header_, ContextPtr context_, - RelativePathsWithMetadata * outer_blobs_); + RelativePathsWithMetadata * outer_blobs_, + std::function file_progress_callback_ = {}); RelativePathWithMetadata next(); size_t getTotalSize() const; @@ -182,6 +183,8 @@ public: std::atomic is_finished = false; std::atomic is_initialized = false; std::mutex next_mutex; + + std::function file_progress_callback; }; StorageAzureBlobSource( @@ -225,12 +228,10 @@ private: ReaderHolder( String path_, std::unique_ptr read_buf_, - std::shared_ptr input_format_, std::unique_ptr pipeline_, std::unique_ptr reader_) : path(std::move(path_)) , read_buf(std::move(read_buf_)) - , input_format(input_format_) , pipeline(std::move(pipeline_)) , reader(std::move(reader_)) { @@ -251,7 +252,6 @@ private: /// reader uses pipeline, pipeline uses read_buf. reader = std::move(other.reader); pipeline = std::move(other.pipeline); - input_format = std::move(other.input_format); read_buf = std::move(other.read_buf); path = std::move(other.path); return *this; @@ -262,14 +262,9 @@ private: const PullingPipelineExecutor * operator->() const { return reader.get(); } const String & getPath() const { return path; } - const std::unique_ptr & getReadBuffer() const { return read_buf; } - - const std::shared_ptr & getFormat() const { return input_format; } - private: String path; std::unique_ptr read_buf; - std::shared_ptr input_format; std::unique_ptr pipeline; std::unique_ptr reader; }; @@ -282,11 +277,6 @@ private: ThreadPoolCallbackRunner create_reader_scheduler; std::future reader_future; - UInt64 total_rows_approx_max = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_accumulated = 0; - size_t total_objects_size = 0; - /// Recreate ReadBuffer and Pipeline for each file. ReaderHolder createReader(); std::future createReaderAsync(); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 7c04de1a28a..f196415e2dc 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -295,13 +295,8 @@ std::unique_ptr createReadBuffer( std::unique_ptr nested_buffer = selectReadBuffer(current_path, use_table_fd, table_fd, file_stat, context); - /// For clickhouse-local and clickhouse-client add progress callback to display progress bar. - if (context->getApplicationType() == Context::ApplicationType::LOCAL - || context->getApplicationType() == Context::ApplicationType::CLIENT) - { - auto & in = static_cast(*nested_buffer); - in.setProgressCallback(context); - } + auto & in = static_cast(*nested_buffer); + in.setProgressCallback(context); int zstd_window_log_max = static_cast(context->getSettingsRef().zstd_window_log_max); return wrapReadBufferWithCompressionMethod(std::move(nested_buffer), method, zstd_window_log_max); @@ -607,7 +602,7 @@ public: ColumnsDescription columns_description_, const Block & block_for_format_, std::unique_ptr read_buf_) - : ISource(getBlockForSource(block_for_format_, files_info_)) + : ISource(getBlockForSource(block_for_format_, files_info_), false) , storage(std::move(storage_)) , storage_snapshot(storage_snapshot_) , files_info(std::move(files_info_)) @@ -722,12 +717,6 @@ public: read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, context); } - size_t file_size = tryGetFileSizeFromReadBuffer(*read_buf).value_or(0); - /// Adjust total_rows_approx_accumulated with new total size. - if (total_files_size) - total_rows_approx_accumulated = static_cast(std::ceil(static_cast(total_files_size + file_size) / total_files_size * total_rows_approx_accumulated)); - total_files_size += file_size; - const Settings & settings = context->getSettingsRef(); chassert(!storage->paths.empty()); const auto max_parsing_threads = std::max(settings.max_threads/ storage->paths.size(), 1UL); @@ -753,6 +742,7 @@ public: if (reader->pull(chunk)) { UInt64 num_rows = chunk.getNumRows(); + progress(num_rows, 0); /// Enrich with virtual columns. if (files_info->need_path_column) @@ -770,14 +760,6 @@ public: chunk.addColumn(column->convertToFullColumnIfConst()); } - if (num_rows && total_files_size) - { - size_t chunk_size = input_format->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox( - *this, num_rows, chunk_size, total_files_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } return chunk; } @@ -816,12 +798,6 @@ private: bool finished_generate = false; std::shared_lock shared_lock; - - UInt64 total_rows_approx_accumulated = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_max = 0; - - size_t total_files_size = 0; }; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index f4791e45e2b..d933ffe8041 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -148,7 +148,8 @@ public: const Block & virtual_header_, ContextPtr context_, KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_) + const S3Settings::RequestSettings & request_settings_, + std::function progress_callback_) : WithContext(context_) , client(S3::Client::create(client_)) , globbed_uri(globbed_uri_) @@ -158,6 +159,7 @@ public: , request_settings(request_settings_) , list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, 1) , list_objects_scheduler(threadPoolCallbackRunner(list_objects_pool, "ListObjects")) + , progress_callback(progress_callback_) { if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name"); @@ -194,11 +196,6 @@ public: return nextAssumeLocked(); } - size_t getTotalSize() const - { - return total_size.load(std::memory_order_relaxed); - } - ~Impl() { list_objects_pool.wait(); @@ -312,15 +309,19 @@ private: buffer.reserve(block.rows()); for (UInt64 idx : idxs.getData()) { - total_size.fetch_add(temp_buffer[idx].info->size, std::memory_order_relaxed); + if (progress_callback) + progress_callback(FileProgress(0, temp_buffer[idx].info->size)); buffer.emplace_back(std::move(temp_buffer[idx])); } } else { buffer = std::move(temp_buffer); - for (const auto & [_, info] : buffer) - total_size.fetch_add(info->size, std::memory_order_relaxed); + if (progress_callback) + { + for (const auto & [_, info] : buffer) + progress_callback(FileProgress(0, info->size)); + } } /// Set iterator only after the whole batch is processed @@ -381,7 +382,7 @@ private: ThreadPool list_objects_pool; ThreadPoolCallbackRunner list_objects_scheduler; std::future outcome_future; - std::atomic total_size = 0; + std::function progress_callback; }; StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( @@ -391,8 +392,9 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( const Block & virtual_header, ContextPtr context, KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_) - : pimpl(std::make_shared(client_, globbed_uri_, query, virtual_header, context, read_keys_, request_settings_)) + const S3Settings::RequestSettings & request_settings_, + std::function progress_callback_) + : pimpl(std::make_shared(client_, globbed_uri_, query, virtual_header, context, read_keys_, request_settings_, progress_callback_)) { } @@ -401,11 +403,6 @@ StorageS3Source::KeyWithInfo StorageS3Source::DisclosedGlobIterator::next() return pimpl->next(); } -size_t StorageS3Source::DisclosedGlobIterator::getTotalSize() const -{ - return pimpl->getTotalSize(); -} - class StorageS3Source::KeysIterator::Impl : WithContext { public: @@ -418,23 +415,26 @@ public: ASTPtr query_, const Block & virtual_header_, ContextPtr context_, - bool need_total_size, - KeysWithInfo * read_keys_) + KeysWithInfo * read_keys_, + std::function progress_callback_) : WithContext(context_) + , keys(keys_) + , client(S3::Client::create(client_)) + , version_id(version_id_) , bucket(bucket_) + , request_settings(request_settings_) , query(query_) , virtual_header(virtual_header_) + , progress_callback(progress_callback_) { - Strings all_keys = keys_; - /// Create a virtual block with one row to construct filter - if (query && virtual_header && !all_keys.empty()) + if (query && virtual_header && !keys.empty()) { /// Append "idx" column as the filter result virtual_header.insert({ColumnUInt64::create(), std::make_shared(), "_idx"}); auto block = virtual_header.cloneEmpty(); - addPathToVirtualColumns(block, fs::path(bucket) / all_keys.front(), 0); + addPathToVirtualColumns(block, fs::path(bucket) / keys.front(), 0); ASTPtr filter_ast; VirtualColumnUtils::prepareFilterBlockWithQuery(query, getContext(), block, filter_ast); @@ -442,8 +442,8 @@ public: if (filter_ast) { block = virtual_header.cloneEmpty(); - for (size_t i = 0; i < all_keys.size(); ++i) - addPathToVirtualColumns(block, fs::path(bucket) / all_keys[i], i); + for (size_t i = 0; i < keys.size(); ++i) + addPathToVirtualColumns(block, fs::path(bucket) / keys[i], i); VirtualColumnUtils::filterBlockWithQuery(query, block, getContext(), filter_ast); const auto & idxs = typeid_cast(*block.getByName("_idx").column); @@ -451,29 +451,17 @@ public: Strings filtered_keys; filtered_keys.reserve(block.rows()); for (UInt64 idx : idxs.getData()) - filtered_keys.emplace_back(std::move(all_keys[idx])); + filtered_keys.emplace_back(std::move(keys[idx])); - all_keys = std::move(filtered_keys); + keys = std::move(filtered_keys); } } - for (auto && key : all_keys) - { - std::optional info; - /// In case all_keys.size() > 1, avoid getting object info now - /// (it will be done anyway eventually, but with delay and in parallel). - /// But progress bar will not work in this case. - if (need_total_size && all_keys.size() == 1) - { - info = S3::getObjectInfo(client_, bucket, key, version_id_, request_settings_); - total_size += info->size; - } - - keys.emplace_back(std::move(key), std::move(info)); - } - if (read_keys_) - *read_keys_ = keys; + { + for (const auto & key : keys) + read_keys_->push_back({key, {}}); + } } KeyWithInfo next() @@ -481,24 +469,27 @@ public: size_t current_index = index.fetch_add(1, std::memory_order_relaxed); if (current_index >= keys.size()) return {}; + auto key = keys[current_index]; + std::optional info; + if (progress_callback) + { + info = S3::getObjectInfo(*client, bucket, key, version_id, request_settings); + progress_callback(FileProgress(0, info->size)); + } - return keys[current_index]; - } - - size_t getTotalSize() const - { - return total_size; + return {key, info}; } private: - KeysWithInfo keys; + Strings keys; std::atomic_size_t index = 0; - + std::unique_ptr client; + String version_id; String bucket; + S3Settings::RequestSettings request_settings; ASTPtr query; Block virtual_header; - - size_t total_size = 0; + std::function progress_callback; }; StorageS3Source::KeysIterator::KeysIterator( @@ -510,11 +501,11 @@ StorageS3Source::KeysIterator::KeysIterator( ASTPtr query, const Block & virtual_header, ContextPtr context, - bool need_total_size, - KeysWithInfo * read_keys) + KeysWithInfo * read_keys, + std::function progress_callback_) : pimpl(std::make_shared( client_, version_id_, keys_, bucket_, request_settings_, - query, virtual_header, context, need_total_size, read_keys)) + query, virtual_header, context, read_keys, progress_callback_)) { } @@ -523,11 +514,6 @@ StorageS3Source::KeyWithInfo StorageS3Source::KeysIterator::next() return pimpl->next(); } -size_t StorageS3Source::KeysIterator::getTotalSize() const -{ - return pimpl->getTotalSize(); -} - Block StorageS3Source::getHeader(Block sample_block, const std::vector & requested_virtual_columns) { for (const auto & virtual_column : requested_virtual_columns) @@ -552,7 +538,7 @@ StorageS3Source::StorageS3Source( const String & version_id_, std::shared_ptr file_iterator_, const size_t download_thread_num_) - : ISource(getHeader(sample_block_, requested_virtual_columns_)) + : ISource(getHeader(sample_block_, requested_virtual_columns_), false) , WithContext(context_) , name(std::move(name_)) , bucket(bucket_) @@ -573,10 +559,7 @@ StorageS3Source::StorageS3Source( { reader = createReader(); if (reader) - { - total_objects_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0); reader_future = createReaderAsync(); - } } StorageS3Source::ReaderHolder StorageS3Source::createReader() @@ -614,7 +597,7 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader() auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); auto current_reader = std::make_unique(*pipeline); - return ReaderHolder{fs::path(bucket) / key_with_info.key, std::move(read_buf), input_format, std::move(pipeline), std::move(current_reader)}; + return ReaderHolder{fs::path(bucket) / key_with_info.key, std::move(read_buf), std::move(pipeline), std::move(current_reader)}; } std::future StorageS3Source::createReaderAsync() @@ -638,10 +621,13 @@ std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & k return createAsyncS3ReadBuffer(key, read_settings, object_size); } - return std::make_unique( + auto buf = std::make_unique( client, bucket, key, version_id, request_settings, read_settings, /*use_external_buffer*/ false, /*offset_*/ 0, /*read_until_position_*/ 0, /*restricted_seek_*/ false, object_size); + + buf->setProgressCallback(getContext()); + return buf; } std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( @@ -652,7 +638,7 @@ std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( [this, read_settings, object_size] (const std::string & path, size_t read_until_position) -> std::unique_ptr { - return std::make_unique( + auto buf = std::make_unique( client, bucket, path, @@ -664,6 +650,8 @@ std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( read_until_position, /* restricted_seek */true, object_size); + buf->setProgressCallback(getContext()); + return buf; }; auto s3_impl = std::make_unique( @@ -713,17 +701,10 @@ Chunk StorageS3Source::generate() if (reader->pull(chunk)) { UInt64 num_rows = chunk.getNumRows(); + progress(num_rows, 0); const auto & file_path = reader.getPath(); - if (num_rows && total_objects_size) - { - size_t chunk_size = reader.getFormat()->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox(*this, num_rows, chunk_size, total_objects_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } - for (const auto & virtual_column : requested_virtual_columns) { if (virtual_column.name == "_path") @@ -748,13 +729,6 @@ Chunk StorageS3Source::generate() if (!reader) break; - size_t object_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0); - /// Adjust total_rows_approx_accumulated with new total size. - if (total_objects_size) - total_rows_approx_accumulated = static_cast( - std::ceil(static_cast(total_objects_size + object_size) / total_objects_size * total_rows_approx_accumulated)); - total_objects_size += object_size; - /// Even if task is finished the thread may be not freed in pool. /// So wait until it will be freed before scheduling a new task. create_reader_pool.wait(); @@ -990,8 +964,8 @@ std::shared_ptr StorageS3::createFileIterator( ContextPtr local_context, ASTPtr query, const Block & virtual_block, - bool need_total_size, - KeysWithInfo * read_keys) + KeysWithInfo * read_keys, + std::function progress_callback) { if (distributed_processing) { @@ -1002,14 +976,14 @@ std::shared_ptr StorageS3::createFileIterator( /// Iterate through disclosed globs and make a source for each file return std::make_shared( *configuration.client, configuration.url, query, virtual_block, - local_context, read_keys, configuration.request_settings); + local_context, read_keys, configuration.request_settings, progress_callback); } else { return std::make_shared( *configuration.client, configuration.url.version_id, configuration.keys, configuration.url.bucket, configuration.request_settings, query, - virtual_block, local_context, need_total_size, read_keys); + virtual_block, local_context, read_keys, progress_callback); } } @@ -1059,7 +1033,7 @@ Pipe StorageS3::read( } std::shared_ptr iterator_wrapper = createFileIterator( - query_configuration, distributed_processing, local_context, query_info.query, virtual_block); + query_configuration, distributed_processing, local_context, query_info.query, virtual_block, nullptr, local_context->getFileProgressCallback()); ColumnsDescription columns_description; Block block_for_format; @@ -1459,7 +1433,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl( { KeysWithInfo read_keys; - auto file_iterator = createFileIterator(configuration, false, ctx, nullptr, {}, false, &read_keys); + auto file_iterator = createFileIterator(configuration, false, ctx, nullptr, {}, &read_keys); std::optional columns_from_cache; size_t prev_read_keys_size = read_keys.size(); diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 13053833623..16d075a67d2 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -56,7 +56,6 @@ public: public: virtual ~IIterator() = default; virtual KeyWithInfo next() = 0; - virtual size_t getTotalSize() const = 0; KeyWithInfo operator ()() { return next(); } }; @@ -71,10 +70,10 @@ public: const Block & virtual_header, ContextPtr context, KeysWithInfo * read_keys_ = nullptr, - const S3Settings::RequestSettings & request_settings_ = {}); + const S3Settings::RequestSettings & request_settings_ = {}, + std::function progress_callback_ = {}); KeyWithInfo next() override; - size_t getTotalSize() const override; private: class Impl; @@ -94,11 +93,10 @@ public: ASTPtr query, const Block & virtual_header, ContextPtr context, - bool need_total_size = true, - KeysWithInfo * read_keys = nullptr); + KeysWithInfo * read_keys = nullptr, + std::function progress_callback_ = {}); KeyWithInfo next() override; - size_t getTotalSize() const override; private: class Impl; @@ -113,8 +111,6 @@ public: KeyWithInfo next() override { return {callback(), {}}; } - size_t getTotalSize() const override { return 0; } - private: ReadTaskCallback callback; }; @@ -163,12 +159,10 @@ private: ReaderHolder( String path_, std::unique_ptr read_buf_, - std::shared_ptr input_format_, std::unique_ptr pipeline_, std::unique_ptr reader_) : path(std::move(path_)) , read_buf(std::move(read_buf_)) - , input_format(input_format_) , pipeline(std::move(pipeline_)) , reader(std::move(reader_)) { @@ -189,16 +183,11 @@ private: /// reader uses pipeline, pipeline uses read_buf. reader = std::move(other.reader); pipeline = std::move(other.pipeline); - input_format = std::move(other.input_format); read_buf = std::move(other.read_buf); path = std::move(other.path); return *this; } - const std::unique_ptr & getReadBuffer() const { return read_buf; } - - const std::shared_ptr & getFormat() const { return input_format; } - explicit operator bool() const { return reader != nullptr; } PullingPipelineExecutor * operator->() { return reader.get(); } const PullingPipelineExecutor * operator->() const { return reader.get(); } @@ -207,7 +196,6 @@ private: private: String path; std::unique_ptr read_buf; - std::shared_ptr input_format; std::unique_ptr pipeline; std::unique_ptr reader; }; @@ -224,11 +212,6 @@ private: ThreadPoolCallbackRunner create_reader_scheduler; std::future reader_future; - UInt64 total_rows_approx_max = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_accumulated = 0; - size_t total_objects_size = 0; - /// Recreate ReadBuffer and Pipeline for each file. ReaderHolder createReader(); std::future createReaderAsync(); @@ -352,8 +335,8 @@ private: ContextPtr local_context, ASTPtr query, const Block & virtual_block, - bool need_total_size = true, - KeysWithInfo * read_keys = nullptr); + KeysWithInfo * read_keys = nullptr, + std::function progress_callback = {}); static ColumnsDescription getTableStructureFromDataImpl( const Configuration & configuration, diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 153a3b7f11b..18ae44bc1ad 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -86,7 +86,7 @@ void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context) RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const { auto iterator = std::make_shared( - *s3_configuration.client, s3_configuration.url, query, virtual_block, context); + *s3_configuration.client, s3_configuration.url, query, virtual_block, context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback()); auto callback = std::make_shared>([iterator]() mutable -> String { return iterator->next().key; }); return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 1d6aed204cb..1ea0eb5a88e 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -234,7 +234,7 @@ StorageURLSource::StorageURLSource( const HTTPHeaderEntries & headers_, const URIParams & params, bool glob_url) - : ISource(getHeader(sample_block, requested_virtual_columns_)), name(std::move(name_)), requested_virtual_columns(requested_virtual_columns_), uri_iterator(uri_iterator_) + : ISource(getHeader(sample_block, requested_virtual_columns_), false), name(std::move(name_)), requested_virtual_columns(requested_virtual_columns_), uri_iterator(uri_iterator_) { auto headers = getHeaders(headers_); @@ -261,7 +261,8 @@ StorageURLSource::StorageURLSource( credentials, headers, glob_url, - current_uri_options.size() == 1); + current_uri_options.size() == 1, + context->getFileProgressCallback()); /// If file is empty and engine_url_skip_empty_files=1, skip it and go to the next file. } @@ -270,22 +271,11 @@ StorageURLSource::StorageURLSource( curr_uri = uri_and_buf.first; read_buf = std::move(uri_and_buf.second); - size_t file_size = 0; - try + if (auto progress_callback = context->getFileProgressCallback()) { - file_size = getFileSizeFromReadBuffer(*read_buf); - } - catch (...) - { - // we simply continue without updating total_size - } - - if (file_size) - { - /// Adjust total_rows_approx_accumulated with new total size. - if (total_size) - total_rows_approx_accumulated = static_cast(std::ceil(static_cast(total_size + file_size) / total_size * total_rows_approx_accumulated)); - total_size += file_size; + size_t file_size = tryGetFileSizeFromReadBuffer(*read_buf).value_or(0); + LOG_DEBUG(&Poco::Logger::get("URL"), "Send file size {}", file_size); + progress_callback(FileProgress(0, file_size)); } // TODO: Pass max_parsing_threads and max_download_threads adjusted for num_streams. @@ -331,14 +321,7 @@ Chunk StorageURLSource::generate() if (reader->pull(chunk)) { UInt64 num_rows = chunk.getNumRows(); - if (num_rows && total_size) - { - size_t chunk_size = input_format->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox( - *this, num_rows, chunk_size, total_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } + progress(num_rows, 0); const String & path{curr_uri.getPath()}; @@ -376,7 +359,8 @@ std::pair> StorageURLSource: Poco::Net::HTTPBasicCredentials & credentials, const HTTPHeaderEntries & headers, bool glob_url, - bool delay_initialization) + bool delay_initialization, + std::function file_progress_callback) { String first_exception_message; ReadSettings read_settings = context->getReadSettings(); @@ -418,6 +402,7 @@ std::pair> StorageURLSource: continue; } + res->setProgressCallback(file_progress_callback); return std::make_tuple(request_uri, std::move(res)); } catch (...) diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index e3305cda89e..315a5f9897b 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -195,7 +195,8 @@ public: Poco::Net::HTTPBasicCredentials & credentials, const HTTPHeaderEntries & headers, bool glob_url, - bool delay_initialization); + bool delay_initialization, + std::function file_progress_callback = {}); private: using InitializeFunc = std::function; @@ -212,11 +213,6 @@ private: std::unique_ptr reader; Poco::Net::HTTPBasicCredentials credentials; - - size_t total_size = 0; - UInt64 total_rows_approx_max = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_accumulated = 0; }; class StorageURLSink : public SinkToStorage From ccb42d0afa202aba6b4a8459bae971afa87a67dd Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 22 Jun 2023 17:26:42 +0000 Subject: [PATCH 0662/1997] Automatic style fix --- .../test_replicated_database/test.py | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index 8d4244b69b5..b3ba8d4737f 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -1274,6 +1274,7 @@ def test_recover_digest_mismatch(started_cluster): print("Everything Okay") + def test_replicated_table_structure_alter(started_cluster): main_node.query("DROP DATABASE IF EXISTS table_structure") dummy_node.query("DROP DATABASE IF EXISTS table_structure") @@ -1292,13 +1293,20 @@ def test_replicated_table_structure_alter(started_cluster): dummy_node.query("DETACH DATABASE table_structure") settings = {"distributed_ddl_task_timeout": 0} - main_node.query("CREATE TABLE table_structure.rmt (n int, v UInt64) ENGINE=ReplicatedReplacingMergeTree(v) ORDER BY n", settings=settings) + main_node.query( + "CREATE TABLE table_structure.rmt (n int, v UInt64) ENGINE=ReplicatedReplacingMergeTree(v) ORDER BY n", + settings=settings, + ) competing_node.query("SYSTEM SYNC DATABASE REPLICA table_structure") competing_node.query("DETACH DATABASE table_structure") - main_node.query("ALTER TABLE table_structure.rmt ADD COLUMN m int", settings=settings) - main_node.query("ALTER TABLE table_structure.rmt COMMENT COLUMN v 'version'", settings=settings) + main_node.query( + "ALTER TABLE table_structure.rmt ADD COLUMN m int", settings=settings + ) + main_node.query( + "ALTER TABLE table_structure.rmt COMMENT COLUMN v 'version'", settings=settings + ) main_node.query("INSERT INTO table_structure.rmt VALUES (1, 2, 3)") command = "rm -f /var/lib/clickhouse/metadata/table_structure/mem.sql" @@ -1312,7 +1320,7 @@ def test_replicated_table_structure_alter(started_cluster): competing_node.query("SYSTEM SYNC DATABASE REPLICA table_structure") competing_node.query("SYSTEM SYNC REPLICA table_structure.rmt") - #time.sleep(600) + # time.sleep(600) assert "mem" in competing_node.query("SHOW TABLES FROM table_structure") assert "1\t2\t3\n" == competing_node.query("SELECT * FROM table_structure.rmt") @@ -1320,4 +1328,6 @@ def test_replicated_table_structure_alter(started_cluster): main_node.query("INSERT INTO table_structure.rmt VALUES (1, 2, 3, 4)") dummy_node.query("SYSTEM SYNC DATABASE REPLICA table_structure") dummy_node.query("SYSTEM SYNC REPLICA table_structure.rmt") - assert "1\t2\t3\t0\n1\t2\t3\t4\n" == dummy_node.query("SELECT * FROM table_structure.rmt ORDER BY k") + assert "1\t2\t3\t0\n1\t2\t3\t4\n" == dummy_node.query( + "SELECT * FROM table_structure.rmt ORDER BY k" + ) From f48cd0f926338d2420b123aaed3bc22fbc2969e9 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 22 Jun 2023 17:30:24 +0000 Subject: [PATCH 0663/1997] Delete updateRowsProgressApprox implementation --- src/Storages/ReadFromStorageProgress.cpp | 52 ------------------------ src/Storages/ReadFromStorageProgress.h | 18 -------- 2 files changed, 70 deletions(-) delete mode 100644 src/Storages/ReadFromStorageProgress.cpp delete mode 100644 src/Storages/ReadFromStorageProgress.h diff --git a/src/Storages/ReadFromStorageProgress.cpp b/src/Storages/ReadFromStorageProgress.cpp deleted file mode 100644 index 8ad1cf92209..00000000000 --- a/src/Storages/ReadFromStorageProgress.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include -#include -#include - -namespace DB -{ - -void updateRowsProgressApprox( - ISource & source, - size_t num_rows, - UInt64 chunk_bytes_size, - UInt64 total_result_size, - UInt64 & total_rows_approx_accumulated, - size_t & total_rows_count_times, - UInt64 & total_rows_approx_max) -{ - if (!total_result_size) - return; - - if (!num_rows) - return; - - const auto progress = source.getReadProgress(); - if (progress && !progress->limits.empty()) - { - for (const auto & limit : progress->limits) - { - if (limit.leaf_limits.max_rows || limit.leaf_limits.max_bytes - || limit.local_limits.size_limits.max_rows || limit.local_limits.size_limits.max_bytes) - return; - } - } - - const auto bytes_per_row = std::ceil(static_cast(chunk_bytes_size) / num_rows); - size_t total_rows_approx = static_cast(std::ceil(static_cast(total_result_size) / bytes_per_row)); - total_rows_approx_accumulated += total_rows_approx; - ++total_rows_count_times; - total_rows_approx = total_rows_approx_accumulated / total_rows_count_times; - - /// We need to add diff, because total_rows_approx is incremental value. - /// It would be more correct to send total_rows_approx as is (not a diff), - /// but incrementation of total_rows_to_read does not allow that. - /// A new counter can be introduced for that to be sent to client, but it does not worth it. - if (total_rows_approx > total_rows_approx_max) - { - size_t diff = total_rows_approx - total_rows_approx_max; - source.addTotalRowsApprox(diff); - total_rows_approx_max = total_rows_approx; - } -} - -} diff --git a/src/Storages/ReadFromStorageProgress.h b/src/Storages/ReadFromStorageProgress.h deleted file mode 100644 index 2be37d26fee..00000000000 --- a/src/Storages/ReadFromStorageProgress.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once -#include - -namespace DB -{ - -class ISource; - -void updateRowsProgressApprox( - ISource & source, - size_t num_rows, - UInt64 chunk_bytes_size, - UInt64 total_result_size, - UInt64 & total_rows_approx_accumulated, - size_t & total_rows_count_times, - UInt64 & total_rows_approx_max); - -} From 84bb17033c7cc91adf2147456a29606eb1f48812 Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 22 Jun 2023 19:48:28 +0200 Subject: [PATCH 0664/1997] fix black --- tests/integration/test_storage_hdfs/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index cf951050c6f..8ff88791a3a 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -102,7 +102,9 @@ def test_storage_with_multidirectory_glob(started_cluster): assert (r == f"File1\t11\nFile2\t22\n") or (r == f"File2\t22\nFile1\t11\n") try: - node1.query("SELECT * FROM hdfs('hdfs://hdfs1:9000/multiglob/{p4/path1,p2/path3}/postfix/data{1,2}.nonexist', TSV)") + node1.query( + "SELECT * FROM hdfs('hdfs://hdfs1:9000/multiglob/{p4/path1,p2/path3}/postfix/data{1,2}.nonexist', TSV)" + ) assert False, "Exception have to be thrown" except Exception as ex: print(ex) From 3d385be9cb9236cd59baead6e4ad83d34316e357 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Thu, 22 Jun 2023 20:56:50 +0300 Subject: [PATCH 0665/1997] Update Settings.h --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d01caeda344..3d42bd582ed 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -770,7 +770,7 @@ class IColumn; M(Bool, allow_experimental_undrop_table_query, false, "Allow to use undrop query to restore dropped table in a limited time", 0) \ M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \ - M(Timezone, session_timezone, "", "The default timezone for current session or query. The server default timezone, if empty.", 0) \ + M(Timezone, session_timezone, "", "The default timezone for current session or query. The server default timezone if empty.", 0) \ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. From ba4b5c335047f72bbed063434b5f4b7a38459fe6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 22 Jun 2023 18:15:16 +0000 Subject: [PATCH 0666/1997] Fix stupid bug. --- src/Interpreters/PreparedSets.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index 428ef873bc5..67822ecf440 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -230,12 +230,11 @@ FutureSetPtr PreparedSets::addFromTuple(const Hash & key, Block block, const Set const auto & set_types = from_tuple->getTypes(); auto & sets_by_hash = sets_from_tuple[key]; - auto types = from_tuple->getTypes(); for (const auto & set : sets_by_hash) if (equals(set->getTypes(), set_types)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Duplicate set: {}", toString(key, from_tuple->getTypes())); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Duplicate set: {}", toString(key, set_types)); - sets_by_hash.push_back(std::move(from_tuple)); + sets_by_hash.push_back(from_tuple); return from_tuple; } From 75391afbd82b3c2aa5caae8fed82324334a49bbb Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 22 Jun 2023 18:16:48 +0000 Subject: [PATCH 0667/1997] Fix typo. --- src/Interpreters/ActionsVisitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index bf78868463a..cfbe53b5e4d 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1406,7 +1406,7 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool /// /// Mutation is executed in two stages: /// * first, query 'SELECT count() FROM table WHERE ...' is executed to get the set of affected parts (using analyzer) - /// * second, every part is mutated separately, where plan is build "manually", usign this code as well + /// * second, every part is mutated separately, where plan is build "manually", using this code as well /// To share the Set in between first and second stage, we should use the same hash. /// New analyzer is uses a hash from query tree, so here we also build a query tree. /// From 369ad0aea397612a823f28b7aeb3ac72583066f8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 22 Jun 2023 20:32:29 +0200 Subject: [PATCH 0668/1997] Remove ALTER of LIVE VIEW --- src/Interpreters/InterpreterAlterQuery.cpp | 24 +------- src/Storages/LiveView/LiveViewCommands.h | 65 ---------------------- 2 files changed, 1 insertion(+), 88 deletions(-) delete mode 100644 src/Storages/LiveView/LiveViewCommands.h diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index ec2145b38bf..e82415f1aca 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -18,8 +18,6 @@ #include #include #include -#include -#include #include #include #include @@ -117,7 +115,6 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) AlterCommands alter_commands; PartitionCommands partition_commands; MutationCommands mutation_commands; - LiveViewCommands live_view_commands; for (const auto & child : alter.command_list->children) { auto * command_ast = child->as(); @@ -137,17 +134,13 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) mutation_commands.emplace_back(std::move(*mut_command)); } - else if (auto live_view_command = LiveViewCommand::parse(command_ast)) - { - live_view_commands.emplace_back(std::move(*live_view_command)); - } else throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong parameter type in ALTER query"); } if (typeid_cast(database.get())) { - int command_types_count = !mutation_commands.empty() + !partition_commands.empty() + !live_view_commands.empty() + !alter_commands.empty(); + int command_types_count = !mutation_commands.empty() + !partition_commands.empty() + !alter_commands.empty(); bool mixed_settings_amd_metadata_alter = alter_commands.hasSettingsAlterCommand() && !alter_commands.isSettingsAlter(); if (1 < command_types_count || mixed_settings_amd_metadata_alter) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "For Replicated databases it's not allowed " @@ -170,21 +163,6 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) res.pipeline = QueryPipeline(std::move(partition_commands_pipe)); } - if (!live_view_commands.empty()) - { - live_view_commands.validate(*table); - for (const LiveViewCommand & command : live_view_commands) - { - auto live_view = std::dynamic_pointer_cast(table); - switch (command.type) - { - case LiveViewCommand::REFRESH: - live_view->refresh(); - break; - } - } - } - if (!alter_commands.empty()) { auto alter_lock = table->lockForAlter(getContext()->getSettingsRef().lock_acquire_timeout); diff --git a/src/Storages/LiveView/LiveViewCommands.h b/src/Storages/LiveView/LiveViewCommands.h deleted file mode 100644 index 2bb2dfb2752..00000000000 --- a/src/Storages/LiveView/LiveViewCommands.h +++ /dev/null @@ -1,65 +0,0 @@ -#pragma once -/* Copyright (c) 2018 BlackBerry Limited - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int UNKNOWN_STORAGE; -} - -struct LiveViewCommand -{ - enum Type - { - REFRESH - }; - - Type type; - - ASTPtr values; - - static LiveViewCommand refresh(const ASTPtr & values) - { - LiveViewCommand res; - res.type = REFRESH; - res.values = values; - return res; - } - - static std::optional parse(ASTAlterCommand * command) - { - if (command->type == ASTAlterCommand::LIVE_VIEW_REFRESH) - return refresh(command->values); - return {}; - } -}; - - -class LiveViewCommands : public std::vector -{ -public: - void validate(const IStorage & table) - { - if (!empty() && !dynamic_cast(&table)) - throw Exception(DB::ErrorCodes::UNKNOWN_STORAGE, "Wrong storage type. Must be StorageLiveView"); - } -}; - -} From 24fab7bfde4557303335609949548632dbafc218 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 22 Jun 2023 18:48:15 +0000 Subject: [PATCH 0669/1997] Remove old includes --- src/Storages/HDFS/StorageHDFS.cpp | 1 - src/Storages/StorageAzureBlob.cpp | 1 - src/Storages/StorageFile.cpp | 1 - src/Storages/StorageS3.cpp | 1 - src/Storages/StorageURL.cpp | 1 - 5 files changed, 5 deletions(-) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 79cda3050d6..c6f0bd3f18d 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -30,7 +30,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 8e06ceda885..1af7afc952f 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index f196415e2dc..914fc432907 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index d933ffe8041..5a75da7a188 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -29,7 +29,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 1ea0eb5a88e..5a8f94d07fd 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include From 2b01711565e919baa39cd6e68ca42481cea6fdb6 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 22 Jun 2023 19:59:33 +0200 Subject: [PATCH 0670/1997] fix assert in test, revert debug message --- src/IO/Lz4DeflatingWriteBuffer.cpp | 10 ++-------- .../test_checking_s3_blobs_paranoid/test.py | 4 ++-- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/src/IO/Lz4DeflatingWriteBuffer.cpp b/src/IO/Lz4DeflatingWriteBuffer.cpp index 35feafbc736..27c945f92cf 100644 --- a/src/IO/Lz4DeflatingWriteBuffer.cpp +++ b/src/IO/Lz4DeflatingWriteBuffer.cpp @@ -104,14 +104,8 @@ void Lz4DeflatingWriteBuffer::nextImpl() if (LZ4F_isError(compressed_size)) throw Exception( ErrorCodes::LZ4_ENCODER_FAILED, - "LZ4 failed to encode stream. LZ4F version: {}, CodeName: {}," - " in_capacity: {}, out_capacity: {}, cur_buffer_size: {}, min_compressed_block_size: {}", - LZ4F_VERSION, - LZ4F_getErrorName(compressed_size), - in_capacity, - out_capacity, - cur_buffer_size, - min_compressed_block_size); + "LZ4 failed to encode stream. LZ4F version: {}", + LZ4F_VERSION); in_capacity -= cur_buffer_size; in_data = reinterpret_cast(working_buffer.end() - in_capacity); diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index 244ca8a2c81..43a313f07dc 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -165,5 +165,5 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload( assert count_create_multi_part_uploads == 1 assert count_upload_parts >= 2 assert ( - count_s3_errors == 2 - ) # the second is cancel multipart upload, s3_mock just redirects this request + count_s3_errors >= 2 + ) From f226397fa35bc7637a1a2a078018e760ecb09d6b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 22 Jun 2023 19:21:08 +0000 Subject: [PATCH 0671/1997] Fix check. --- src/Interpreters/Set.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index b42ff102f72..b8b61c7c11f 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -173,9 +173,6 @@ void Set::setHeader(const ColumnsWithTypeAndName & header) void Set::fillSetElements() { - if (data.getTotalRowCount()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot convert set to ordered because it is not empty"); - fill_set_elements = true; set_elements.reserve(keys_size); for (const auto & type : set_elements_types) From 65e5d40cae52b6cf9a2f0408f6ccb373f23a0e42 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 22 Jun 2023 19:38:41 +0000 Subject: [PATCH 0672/1997] Fix formatDateTime() with fractional negative datetime64 --- src/Functions/formatDateTime.cpp | 10 +++++++++- .../0_stateless/00718_format_datetime.reference | 5 +++++ tests/queries/0_stateless/00718_format_datetime.sql | 5 +++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index aac7ed1ad4d..c849b0dd933 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -943,7 +943,15 @@ public: { if constexpr (std::is_same_v) { - const auto c = DecimalUtils::split(vec[i], scale); + auto c = DecimalUtils::split(vec[i], scale); + + if (vec[i].value < 0 && c.fractional) + { + c.fractional = DecimalUtils::scaleMultiplier(scale) + + (c.whole ? DataType::FieldType(-1) : DataType::FieldType(1)) * c.fractional; + --c.whole; + } + for (auto & instruction : instructions) instruction.perform(pos, static_cast(c.whole), c.fractional, scale, time_zone); } diff --git a/tests/queries/0_stateless/00718_format_datetime.reference b/tests/queries/0_stateless/00718_format_datetime.reference index 50874ac9b2e..a51134348cc 100644 --- a/tests/queries/0_stateless/00718_format_datetime.reference +++ b/tests/queries/0_stateless/00718_format_datetime.reference @@ -61,6 +61,11 @@ no formatting pattern no formatting pattern 2022-12-08 18:11:29.123400000 2022-12-08 18:11:29.1 2022-12-08 18:11:29.000000 +1900-01-01 00:00:00.000 +1962-12-08 18:11:29.123 +1969-12-31 23:59:59.999 +1970-01-01 00:00:00.000 +1970-01-01 00:00:00.001 2022-12-08 18:11:29.000000 2022-12-08 00:00:00.000000 2022-12-08 00:00:00.000000 diff --git a/tests/queries/0_stateless/00718_format_datetime.sql b/tests/queries/0_stateless/00718_format_datetime.sql index c0db6a4f64e..14e43c31d9c 100644 --- a/tests/queries/0_stateless/00718_format_datetime.sql +++ b/tests/queries/0_stateless/00718_format_datetime.sql @@ -87,6 +87,11 @@ select formatDateTime(toDateTime64('2010-01-04 12:34:56.123456789', 9), '%f') SE select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 9, 'UTC'), '%F %T.%f'); select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 1, 'UTC'), '%F %T.%f'); select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 0, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('1900-01-01 00:00:00.000', 3, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('1962-12-08 18:11:29.123', 3, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('1969-12-31 23:59:59.999', 3, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('1970-01-01 00:00:00.000', 3, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('1970-01-01 00:00:00.001', 3, 'UTC'), '%F %T.%f'); select formatDateTime(toDateTime('2022-12-08 18:11:29', 'UTC'), '%F %T.%f'); select formatDateTime(toDate32('2022-12-08 18:11:29', 'UTC'), '%F %T.%f'); select formatDateTime(toDate('2022-12-08 18:11:29', 'UTC'), '%F %T.%f'); From 220520c516bea15399396b5f82aa3ab2d6cd9ca3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 22 Jun 2023 21:45:10 +0200 Subject: [PATCH 0673/1997] fix --- src/Common/Exception.cpp | 22 +++++++++---------- src/Storages/MergeTree/MergeTreeData.h | 2 +- .../MergeTree/MergeTreePartsMover.cpp | 10 +++++++-- src/Storages/StorageReplicatedMergeTree.cpp | 8 +++---- src/Storages/StorageReplicatedMergeTree.h | 6 ++--- .../test_s3_zero_copy_ttl/configs/s3.xml | 2 ++ .../integration/test_s3_zero_copy_ttl/test.py | 2 +- 7 files changed, 30 insertions(+), 22 deletions(-) diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 3fd0a929d6f..9757c24a8ec 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -401,17 +401,17 @@ PreformattedMessage getCurrentExceptionMessageAndPattern(bool with_stacktrace, b } catch (...) {} -#ifdef ABORT_ON_LOGICAL_ERROR - try - { - throw; - } - catch (const std::logic_error &) - { - abortOnFailedAssertion(stream.str()); - } - catch (...) {} -#endif +// #ifdef ABORT_ON_LOGICAL_ERROR +// try +// { +// throw; +// } +// catch (const std::logic_error &) +// { +// abortOnFailedAssertion(stream.str()); +// } +// catch (...) {} +// #endif } catch (...) { diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index b27392b355b..f6f241c1e89 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1040,7 +1040,7 @@ public: /// Fetch part only if some replica has it on shared storage like S3 /// Overridden in StorageReplicatedMergeTree - virtual MutableDataPartStoragePtr tryToFetchIfShared(const IMergeTreeDataPart &, const DiskPtr &, const String &) { return nullptr; } + virtual MutableDataPartPtr tryToFetchIfShared(const IMergeTreeDataPart &, const DiskPtr &, const String &) { return nullptr; } /// Check shared data usage on other replicas for detached/freezed part /// Remove local files and remote files if needed diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index 8fa4ac6c78a..59784935c7b 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -233,9 +233,15 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me disk->createDirectories(path_to_clone); - cloned_part_storage = data->tryToFetchIfShared(*part, disk, fs::path(path_to_clone) / part->name); + auto zero_copy_part = data->tryToFetchIfShared(*part, disk, fs::path(path_to_clone) / part->name); - if (!cloned_part_storage) + if (zero_copy_part) + { + /// FIXME for some reason we cannot just use this part, we have to re-create it through MergeTreeDataPartBuilder + zero_copy_part->is_temp = false; /// Do not remove it in dtor + cloned_part_storage = zero_copy_part->getDataPartStoragePtr(); + } + else { LOG_INFO(log, "Part {} was not fetched, we are the first who move it to another disk, so we will copy it", part->name); cloned_part_storage = part->getDataPartStorage().clonePart(path_to_clone, part->getDataPartStorage().getPartDirectory(), disk, log); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index bb99e21e4ab..e96049a456a 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1972,7 +1972,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che } -MutableDataPartStoragePtr StorageReplicatedMergeTree::executeFetchShared( +MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::executeFetchShared( const String & source_replica, const String & new_part_name, const DiskPtr & disk, @@ -4444,7 +4444,7 @@ bool StorageReplicatedMergeTree::fetchPart( } -MutableDataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart( +MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::fetchExistsPart( const String & part_name, const StorageMetadataPtr & metadata_snapshot, const String & source_replica_path, @@ -4550,7 +4550,7 @@ MutableDataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart( ProfileEvents::increment(ProfileEvents::ReplicatedPartFetches); LOG_DEBUG(log, "Fetched part {} from {}:{}", part_name, zookeeper_name, source_replica_path); - return part->getDataPartStoragePtr(); + return part; } void StorageReplicatedMergeTree::startup() @@ -8868,7 +8868,7 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( } -MutableDataPartStoragePtr StorageReplicatedMergeTree::tryToFetchIfShared( +MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::tryToFetchIfShared( const IMergeTreeDataPart & part, const DiskPtr & disk, const String & path) diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index c08e05090b1..3ba5c61d1b0 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -243,7 +243,7 @@ public: bool canExecuteFetch(const ReplicatedMergeTreeLogEntry & entry, String & disable_reason) const; /// Fetch part only when it stored on shared storage like S3 - MutableDataPartStoragePtr executeFetchShared(const String & source_replica, const String & new_part_name, const DiskPtr & disk, const String & path); + MutableDataPartPtr executeFetchShared(const String & source_replica, const String & new_part_name, const DiskPtr & disk, const String & path); /// Lock part in zookeeper for use shared data in several nodes void lockSharedData(const IMergeTreeDataPart & part, bool replace_existing_lock, std::optional hardlinked_files) const override; @@ -285,7 +285,7 @@ public: MergeTreeDataFormatVersion data_format_version); /// Fetch part only if some replica has it on shared storage like S3 - MutableDataPartStoragePtr tryToFetchIfShared(const IMergeTreeDataPart & part, const DiskPtr & disk, const String & path) override; + MutableDataPartPtr tryToFetchIfShared(const IMergeTreeDataPart & part, const DiskPtr & disk, const String & path) override; /// Get best replica having this partition on a same type remote disk String getSharedDataReplica(const IMergeTreeDataPart & part, DataSourceType data_source_type) const; @@ -716,7 +716,7 @@ private: * Used for replace local part on the same s3-shared part in hybrid storage. * Returns false if part is already fetching right now. */ - MutableDataPartStoragePtr fetchExistsPart( + MutableDataPartPtr fetchExistsPart( const String & part_name, const StorageMetadataPtr & metadata_snapshot, const String & replica_path, diff --git a/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml b/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml index 5ffeb0c0d01..e179c848be1 100644 --- a/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml +++ b/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml @@ -33,4 +33,6 @@ true + + true diff --git a/tests/integration/test_s3_zero_copy_ttl/test.py b/tests/integration/test_s3_zero_copy_ttl/test.py index 7dcf3734653..04bff4a44fb 100644 --- a/tests/integration/test_s3_zero_copy_ttl/test.py +++ b/tests/integration/test_s3_zero_copy_ttl/test.py @@ -35,7 +35,7 @@ def test_ttl_move_and_s3(started_cluster): ORDER BY id PARTITION BY id TTL date TO DISK 's3_disk' - SETTINGS storage_policy='s3_and_default' + SETTINGS storage_policy='s3_and_default', temporary_directories_lifetime=1 """.format( i ) From 9231bd9f9d544cb6b82e52b0327f25323aa644ab Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 22 Jun 2023 19:48:13 +0000 Subject: [PATCH 0674/1997] Process broken tests in report --- tests/analyzer_integration_broken_tests.txt | 138 ++++++++++++++++++++ tests/integration/ci-runner.py | 41 ++++-- 2 files changed, 169 insertions(+), 10 deletions(-) create mode 100644 tests/analyzer_integration_broken_tests.txt diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt new file mode 100644 index 00000000000..3aa3b0dff2a --- /dev/null +++ b/tests/analyzer_integration_broken_tests.txt @@ -0,0 +1,138 @@ +test_access_for_functions/test.py::test_access_rights_for_function +test_backward_compatibility/test_normalized_count_comparison.py::test_select_aggregate_alias_column +test_concurrent_backups_s3/test.py::test_concurrent_backups +test_distributed_ddl/test.py::test_default_database[configs] +test_distributed_ddl/test.py::test_default_database[configs_secure] +test_distributed_ddl/test.py::test_on_server_fail[configs] +test_distributed_ddl/test.py::test_on_server_fail[configs_secure] +test_distributed_insert_backward_compatibility/test.py::test_distributed_in_tuple +test_distributed_inter_server_secret/test.py::test_per_user_inline_settings_secure_cluster[default-] +test_distributed_inter_server_secret/test.py::test_per_user_inline_settings_secure_cluster[nopass-] +test_distributed_inter_server_secret/test.py::test_per_user_inline_settings_secure_cluster[pass-foo] +test_distributed_inter_server_secret/test.py::test_per_user_protocol_settings_secure_cluster[default-] +test_distributed_inter_server_secret/test.py::test_per_user_protocol_settings_secure_cluster[nopass-] +test_distributed_inter_server_secret/test.py::test_per_user_protocol_settings_secure_cluster[pass-foo] +test_distributed_inter_server_secret/test.py::test_user_insecure_cluster[default-] +test_distributed_inter_server_secret/test.py::test_user_insecure_cluster[nopass-] +test_distributed_inter_server_secret/test.py::test_user_insecure_cluster[pass-foo] +test_distributed_inter_server_secret/test.py::test_user_secure_cluster[default-] +test_distributed_inter_server_secret/test.py::test_user_secure_cluster[nopass-] +test_distributed_inter_server_secret/test.py::test_user_secure_cluster[pass-foo] +test_distributed_inter_server_secret/test.py::test_user_secure_cluster_from_backward[default-] +test_distributed_inter_server_secret/test.py::test_user_secure_cluster_from_backward[nopass-] +test_distributed_inter_server_secret/test.py::test_user_secure_cluster_from_backward[pass-foo] +test_distributed_inter_server_secret/test.py::test_user_secure_cluster_with_backward[default-] +test_distributed_inter_server_secret/test.py::test_user_secure_cluster_with_backward[nopass-] +test_distributed_inter_server_secret/test.py::test_user_secure_cluster_with_backward[pass-foo] +test_distributed_load_balancing/test.py::test_distributed_replica_max_ignored_errors +test_distributed_load_balancing/test.py::test_load_balancing_default +test_distributed_load_balancing/test.py::test_load_balancing_priority_round_robin[dist_priority] +test_distributed_load_balancing/test.py::test_load_balancing_priority_round_robin[dist_priority_negative] +test_distributed_load_balancing/test.py::test_load_balancing_round_robin +test_backward_compatibility/test.py::test_backward_compatability1 +test_backward_compatibility/test_aggregate_fixed_key.py::test_two_level_merge +test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_avg +test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact[1000] +test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact[500000] +test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact_variadic[1000] +test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact_variadic[500000] +test_backward_compatibility/test_ip_types_binary_compatibility.py::test_ip_types_binary_compatibility +test_backward_compatibility/test_select_aggregate_alias_column.py::test_select_aggregate_alias_column +test_backward_compatibility/test_short_strings_aggregation.py::test_backward_compatability +test_mask_sensitive_info/test.py::test_encryption_functions +test_merge_table_over_distributed/test.py::test_global_in +test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed +test_mutations_with_merge_tree/test.py::test_mutations_with_merge_background_task +test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster +test_row_policy/test.py::test_change_of_users_xml_changes_row_policies +test_row_policy/test.py::test_change_of_users_xml_changes_row_policies +test_row_policy/test.py::test_dcl_introspection +test_row_policy/test.py::test_dcl_introspection +test_row_policy/test.py::test_dcl_management +test_row_policy/test.py::test_dcl_management +test_row_policy/test.py::test_dcl_users_with_policies_from_users_xml +test_row_policy/test.py::test_dcl_users_with_policies_from_users_xml +test_row_policy/test.py::test_grant_create_row_policy +test_row_policy/test.py::test_grant_create_row_policy +test_row_policy/test.py::test_introspection +test_row_policy/test.py::test_introspection +test_row_policy/test.py::test_join +test_row_policy/test.py::test_join +test_row_policy/test.py::test_miscellaneous_engines +test_row_policy/test.py::test_miscellaneous_engines +test_row_policy/test.py::test_policy_from_users_xml_affects_only_user_assigned +test_row_policy/test.py::test_policy_from_users_xml_affects_only_user_assigned +test_row_policy/test.py::test_policy_on_distributed_table_via_role +test_row_policy/test.py::test_policy_on_distributed_table_via_role +test_row_policy/test.py::test_reload_users_xml_by_timer +test_row_policy/test.py::test_reload_users_xml_by_timer +test_row_policy/test.py::test_row_policy_filter_with_subquery +test_row_policy/test.py::test_row_policy_filter_with_subquery +test_row_policy/test.py::test_smoke +test_row_policy/test.py::test_smoke +test_row_policy/test.py::test_some_users_without_policies +test_row_policy/test.py::test_some_users_without_policies +test_row_policy/test.py::test_tags_with_db_and_table_names +test_row_policy/test.py::test_tags_with_db_and_table_names +test_row_policy/test.py::test_throwif_error_in_prewhere_with_same_condition_as_filter +test_row_policy/test.py::test_throwif_error_in_prewhere_with_same_condition_as_filter +test_row_policy/test.py::test_throwif_error_in_where_with_same_condition_as_filter +test_row_policy/test.py::test_throwif_error_in_where_with_same_condition_as_filter +test_row_policy/test.py::test_throwif_in_prewhere_doesnt_expose_restricted_data +test_row_policy/test.py::test_throwif_in_prewhere_doesnt_expose_restricted_data +test_row_policy/test.py::test_throwif_in_where_doesnt_expose_restricted_data +test_row_policy/test.py::test_throwif_in_where_doesnt_expose_restricted_data +test_row_policy/test.py::test_users_xml_is_readonly +test_row_policy/test.py::test_users_xml_is_readonly +test_row_policy/test.py::test_with_prewhere +test_row_policy/test.py::test_with_prewhere +test_settings_constraints_distributed/test.py::test_select_clamps_settings +test_backward_compatibility/test_cte_distributed.py::test_cte_distributed +test_compression_codec_read/test.py::test_default_codec_read +test_dictionaries_update_and_reload/test.py::test_reload_after_fail_in_cache_dictionary +test_distributed_type_object/test.py::test_distributed_type_object +test_materialized_mysql_database/test.py::test_select_without_columns_5_7 +test_materialized_mysql_database/test.py::test_select_without_columns_8_0 +test_shard_level_const_function/test.py::test_remote +test_storage_postgresql/test.py::test_postgres_select_insert +test_storage_rabbitmq/test.py::test_rabbitmq_materialized_view +test_system_merges/test.py::test_mutation_simple[] +test_system_merges/test.py::test_mutation_simple[replicated] +test_backward_compatibility/test_insert_profile_events.py::test_new_client_compatible +test_backward_compatibility/test_insert_profile_events.py::test_old_client_compatible +test_backward_compatibility/test_vertical_merges_from_compact_parts.py::test_vertical_merges_from_compact_parts +test_disk_over_web_server/test.py::test_cache[node2] +test_disk_over_web_server/test.py::test_incorrect_usage +test_disk_over_web_server/test.py::test_replicated_database +test_disk_over_web_server/test.py::test_unavailable_server +test_disk_over_web_server/test.py::test_usage[node2] +test_distributed_backward_compatability/test.py::test_distributed_in_tuple +test_executable_table_function/test.py::test_executable_function_input_python +test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py::test_groupBitmapAndState_on_different_version_nodes +test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py::test_groupBitmapAndState_on_distributed_table +test_settings_profile/test.py::test_show_profiles +test_sql_user_defined_functions_on_cluster/test.py::test_sql_user_defined_functions_on_cluster +test_backward_compatibility/test_functions.py::test_aggregate_states +test_backward_compatibility/test_functions.py::test_string_functions +test_default_compression_codec/test.py::test_default_codec_for_compact_parts +test_default_compression_codec/test.py::test_default_codec_multiple +test_default_compression_codec/test.py::test_default_codec_single +test_default_compression_codec/test.py::test_default_codec_version_update +test_postgresql_protocol/test.py::test_python_client +test_quota/test.py::test_add_remove_interval +test_quota/test.py::test_add_remove_quota +test_quota/test.py::test_consumption_of_show_clusters +test_quota/test.py::test_consumption_of_show_databases +test_quota/test.py::test_consumption_of_show_privileges +test_quota/test.py::test_consumption_of_show_processlist +test_quota/test.py::test_consumption_of_show_tables +test_quota/test.py::test_dcl_introspection +test_quota/test.py::test_dcl_management +test_quota/test.py::test_exceed_quota +test_quota/test.py::test_query_inserts +test_quota/test.py::test_quota_from_users_xml +test_quota/test.py::test_reload_users_xml_by_timer +test_quota/test.py::test_simpliest_quota +test_quota/test.py::test_tracking_quota +test_quota/test.py::test_users_xml_is_readonly +test_replicated_merge_tree_compatibility/test.py::test_replicated_merge_tree_defaults_compatibili \ No newline at end of file diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index 59c3c82499c..5b986251c57 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -485,7 +485,7 @@ class ClickhouseIntegrationTestsRunner: result[test_file].append(test) return result - def _update_counters(self, main_counters, current_counters): + def _update_counters(self, main_counters, current_counters, broken_tests): for test in current_counters["PASSED"]: if ( test not in main_counters["PASSED"] @@ -498,10 +498,17 @@ class ClickhouseIntegrationTestsRunner: if test in main_counters["ERROR"]: main_counters["ERROR"].remove(test) is_flaky = True + if test in main_counters["BROKEN"]: + main_counters["BROKEN"].remove(test) + is_flaky = True + if is_flaky: main_counters["FLAKY"].append(test) else: - main_counters["PASSED"].append(test) + if test not in broken_tests: + main_counters["PASSED"].append(test) + else: + main_counters["NOT_FAILED"].append(test) for state in ("ERROR", "FAILED"): for test in current_counters[state]: @@ -511,8 +518,12 @@ class ClickhouseIntegrationTestsRunner: main_counters["PASSED"].remove(test) main_counters["FLAKY"].append(test) continue - if test not in main_counters[state]: - main_counters[state].append(test) + if test not in broken_tests: + if test not in main_counters[state]: + main_counters[state].append(test) + else: + if test not in main_counters["BROKEN"]: + main_counters["BROKEN"].append(test) for state in ("SKIPPED",): for test in current_counters[state]: @@ -570,11 +581,11 @@ class ClickhouseIntegrationTestsRunner: return res def try_run_test_group( - self, repo_path, test_group, tests_in_group, num_tries, num_workers + self, repo_path, test_group, tests_in_group, num_tries, num_workers, broken_tests ): try: return self.run_test_group( - repo_path, test_group, tests_in_group, num_tries, num_workers + repo_path, test_group, tests_in_group, num_tries, num_workers, broken_tests ) except Exception as e: logging.info("Failed to run {}:\n{}".format(str(test_group), str(e))) @@ -592,7 +603,7 @@ class ClickhouseIntegrationTestsRunner: return counters, tests_times, [] def run_test_group( - self, repo_path, test_group, tests_in_group, num_tries, num_workers + self, repo_path, test_group, tests_in_group, num_tries, num_workers, broken_tests ): counters = { "ERROR": [], @@ -600,6 +611,8 @@ class ClickhouseIntegrationTestsRunner: "FAILED": [], "SKIPPED": [], "FLAKY": [], + "BROKEN": [], + "NOT_FAILED": [], } tests_times = defaultdict(float) @@ -705,7 +718,7 @@ class ClickhouseIntegrationTestsRunner: ) times_lines = parse_test_times(info_path) new_tests_times = get_test_times(times_lines) - self._update_counters(counters, new_counters) + self._update_counters(counters, new_counters, broken_tests) for test_name, test_time in new_tests_times.items(): tests_times[test_name] = test_time @@ -778,7 +791,7 @@ class ClickhouseIntegrationTestsRunner: final_retry += 1 logging.info("Running tests for the %s time", i) counters, tests_times, log_paths = self.try_run_test_group( - repo_path, "bugfix" if should_fail else "flaky", tests_to_run, 1, 1 + repo_path, "bugfix" if should_fail else "flaky", tests_to_run, 1, 1, [] ) logs += log_paths if counters["FAILED"]: @@ -899,6 +912,8 @@ class ClickhouseIntegrationTestsRunner: "FAILED": [], "SKIPPED": [], "FLAKY": [], + "BROKEN": [], + "NOT_FAILED": [], } tests_times = defaultdict(float) tests_log_paths = defaultdict(list) @@ -910,10 +925,16 @@ class ClickhouseIntegrationTestsRunner: logging.info("Shuffling test groups") random.shuffle(items_to_run) + broken_tests = list() + if self.use_analyzer: + with open(f"{repo_path}/tests/analyzer_integration_broken_tests.txt") as f: + broken_tests = f.read().splitlines() + logging.info(f"Broken tests in the list: {len(broken_tests)}") + for group, tests in items_to_run: logging.info("Running test group %s containing %s tests", group, len(tests)) group_counters, group_test_times, log_paths = self.try_run_test_group( - repo_path, group, tests, MAX_RETRY, NUM_WORKERS + repo_path, group, tests, MAX_RETRY, NUM_WORKERS, broken_tests ) total_tests = 0 for counter, value in group_counters.items(): From 532eba18a0f2e3c4d15d3334405756dc3aae1637 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 22 Jun 2023 19:58:58 +0000 Subject: [PATCH 0675/1997] Automatic style fix --- tests/integration/ci-runner.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index 5b986251c57..6a6134d7204 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -581,11 +581,22 @@ class ClickhouseIntegrationTestsRunner: return res def try_run_test_group( - self, repo_path, test_group, tests_in_group, num_tries, num_workers, broken_tests + self, + repo_path, + test_group, + tests_in_group, + num_tries, + num_workers, + broken_tests, ): try: return self.run_test_group( - repo_path, test_group, tests_in_group, num_tries, num_workers, broken_tests + repo_path, + test_group, + tests_in_group, + num_tries, + num_workers, + broken_tests, ) except Exception as e: logging.info("Failed to run {}:\n{}".format(str(test_group), str(e))) @@ -603,7 +614,13 @@ class ClickhouseIntegrationTestsRunner: return counters, tests_times, [] def run_test_group( - self, repo_path, test_group, tests_in_group, num_tries, num_workers, broken_tests + self, + repo_path, + test_group, + tests_in_group, + num_tries, + num_workers, + broken_tests, ): counters = { "ERROR": [], From ab903d395e6979f3885f2689bdb216986a3a4ffd Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 22 Jun 2023 20:43:26 +0000 Subject: [PATCH 0676/1997] Place new test into separate sql --- tests/queries/0_stateless/00718_format_datetime.reference | 5 ----- tests/queries/0_stateless/00718_format_datetime.sql | 5 ----- tests/queries/0_stateless/00718_format_datetime_1.reference | 5 +++++ tests/queries/0_stateless/00718_format_datetime_1.sql | 5 +++++ 4 files changed, 10 insertions(+), 10 deletions(-) create mode 100644 tests/queries/0_stateless/00718_format_datetime_1.reference create mode 100644 tests/queries/0_stateless/00718_format_datetime_1.sql diff --git a/tests/queries/0_stateless/00718_format_datetime.reference b/tests/queries/0_stateless/00718_format_datetime.reference index a51134348cc..50874ac9b2e 100644 --- a/tests/queries/0_stateless/00718_format_datetime.reference +++ b/tests/queries/0_stateless/00718_format_datetime.reference @@ -61,11 +61,6 @@ no formatting pattern no formatting pattern 2022-12-08 18:11:29.123400000 2022-12-08 18:11:29.1 2022-12-08 18:11:29.000000 -1900-01-01 00:00:00.000 -1962-12-08 18:11:29.123 -1969-12-31 23:59:59.999 -1970-01-01 00:00:00.000 -1970-01-01 00:00:00.001 2022-12-08 18:11:29.000000 2022-12-08 00:00:00.000000 2022-12-08 00:00:00.000000 diff --git a/tests/queries/0_stateless/00718_format_datetime.sql b/tests/queries/0_stateless/00718_format_datetime.sql index 14e43c31d9c..c0db6a4f64e 100644 --- a/tests/queries/0_stateless/00718_format_datetime.sql +++ b/tests/queries/0_stateless/00718_format_datetime.sql @@ -87,11 +87,6 @@ select formatDateTime(toDateTime64('2010-01-04 12:34:56.123456789', 9), '%f') SE select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 9, 'UTC'), '%F %T.%f'); select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 1, 'UTC'), '%F %T.%f'); select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 0, 'UTC'), '%F %T.%f'); -select formatDateTime(toDateTime64('1900-01-01 00:00:00.000', 3, 'UTC'), '%F %T.%f'); -select formatDateTime(toDateTime64('1962-12-08 18:11:29.123', 3, 'UTC'), '%F %T.%f'); -select formatDateTime(toDateTime64('1969-12-31 23:59:59.999', 3, 'UTC'), '%F %T.%f'); -select formatDateTime(toDateTime64('1970-01-01 00:00:00.000', 3, 'UTC'), '%F %T.%f'); -select formatDateTime(toDateTime64('1970-01-01 00:00:00.001', 3, 'UTC'), '%F %T.%f'); select formatDateTime(toDateTime('2022-12-08 18:11:29', 'UTC'), '%F %T.%f'); select formatDateTime(toDate32('2022-12-08 18:11:29', 'UTC'), '%F %T.%f'); select formatDateTime(toDate('2022-12-08 18:11:29', 'UTC'), '%F %T.%f'); diff --git a/tests/queries/0_stateless/00718_format_datetime_1.reference b/tests/queries/0_stateless/00718_format_datetime_1.reference new file mode 100644 index 00000000000..e495b69ddfc --- /dev/null +++ b/tests/queries/0_stateless/00718_format_datetime_1.reference @@ -0,0 +1,5 @@ +1900-01-01 00:00:00.000 +1962-12-08 18:11:29.123 +1969-12-31 23:59:59.999 +1970-01-01 00:00:00.000 +1970-01-01 00:00:00.001 diff --git a/tests/queries/0_stateless/00718_format_datetime_1.sql b/tests/queries/0_stateless/00718_format_datetime_1.sql new file mode 100644 index 00000000000..855b0506f44 --- /dev/null +++ b/tests/queries/0_stateless/00718_format_datetime_1.sql @@ -0,0 +1,5 @@ +select formatDateTime(toDateTime64('1900-01-01 00:00:00.000', 3, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('1962-12-08 18:11:29.123', 3, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('1969-12-31 23:59:59.999', 3, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('1970-01-01 00:00:00.000', 3, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('1970-01-01 00:00:00.001', 3, 'UTC'), '%F %T.%f'); From 506d52358a1262932df6d41f4dc97503c697038f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 22 Jun 2023 20:52:50 +0000 Subject: [PATCH 0677/1997] Query Cache: Remove confusing defaults in server cfg --- programs/server/config.xml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index d18b4cb2ac9..50db5fc4af6 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1542,14 +1542,6 @@ --> - - - - - - - - + + + 1073741824 + 1024 + 1048576 + 30000000 + + + RejectCertificateHandler + @@ -25,12 +29,9 @@ true sslv2,sslv3 true - - RejectCertificateHandler - \ No newline at end of file From c4ea7ab5b15add3d3b69412224f9c53e30be1661 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Jun 2023 09:31:09 +0200 Subject: [PATCH 0784/1997] Attempt to fix test_ssl_cert_authentication --- programs/server/config.xml | 7 +++++++ .../test_ssl_cert_authentication/configs/ssl_config.xml | 4 ---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index 2f69f23a718..acd6d92a896 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -282,6 +282,13 @@ true sslv2,sslv3 true + + + + RejectCertificateHandler + diff --git a/tests/integration/test_ssl_cert_authentication/configs/ssl_config.xml b/tests/integration/test_ssl_cert_authentication/configs/ssl_config.xml index d0b58d984bf..24c9eb8891f 100644 --- a/tests/integration/test_ssl_cert_authentication/configs/ssl_config.xml +++ b/tests/integration/test_ssl_cert_authentication/configs/ssl_config.xml @@ -18,10 +18,6 @@ /etc/clickhouse-server/config.d/server-key.pem /etc/clickhouse-server/config.d/ca-cert.pem relaxed - - - RejectCertificateHandler - From 5871ca0836e9a21790f6bf3cff2d3134523f3a08 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 24 Jun 2023 20:38:03 +0200 Subject: [PATCH 0785/1997] Fix performance tests due to warnings from jemalloc about Per-CPU arena disabled jemalloc can show the following warning: Number of CPUs detected is not deterministic. Per-CPU arena disabled It will be shown if one of the following returns different number of CPUs: - _SC_NPROCESSORS_ONLN - _SC_NPROCESSORS_CONF - sched_getaffinity() And actually for my CPU linux returns different numbers, because there are more possible CPUs then online, from dmesg: smpboot: Allowing 128 CPUs, 64 hotplug CPUs And from sysfs: # grep . /sys/devices/system/cpu/{possible,online,offline} /sys/devices/system/cpu/possible:0-127 /sys/devices/system/cpu/online:0-63 /sys/devices/system/cpu/offline:64-127 From ACPI: # acpidump -o acpi # acpixtract -a acpi # iasl -d *.dat # grep -e 'Processor Enabled' apic.dsl | sort | uniq -c 64 Processor Enabled : 0 64 Processor Enabled : 1 So I guess this is the same as what happened in this perf run [1]. [1]: https://s3.amazonaws.com/clickhouse-test-reports/51360/5d43a64112711b339b82b1c0e8df7882546a1a3c/performance_comparison_[4_4]/report.html P.S. personally I, just use cmdline=possible_cpus=64 to fix this for my setup. Signed-off-by: Azat Khuzhin --- docker/test/performance-comparison/compare.sh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 293ad9ac411..798d2a40b12 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -14,6 +14,13 @@ LEFT_SERVER_PORT=9001 # patched version RIGHT_SERVER_PORT=9002 +# abort_conf -- abort if some options is not recognized +# abort -- abort if something is not right in the env (i.e. per-cpu arenas does not work) +# narenas -- set them explicitly to avoid disabling per-cpu arena in env +# that returns different number of CPUs for some of the following +# _SC_NPROCESSORS_ONLN/_SC_NPROCESSORS_CONF/sched_getaffinity +export MALLOC_CONF="abort_conf:true,abort:true,narenas:$(nproc --all)" + function wait_for_server # port, pid { for _ in {1..60} @@ -109,10 +116,6 @@ function restart while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done echo all killed - # Change the jemalloc settings here. - # https://github.com/jemalloc/jemalloc/wiki/Getting-Started - export MALLOC_CONF="confirm_conf:true" - set -m # Spawn servers in their own process groups local left_server_opts=( @@ -147,8 +150,6 @@ function restart set +m - unset MALLOC_CONF - wait_for_server $LEFT_SERVER_PORT $left_pid echo left ok From 769169f820bffcf99539f654e01640e419582f92 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sun, 25 Jun 2023 18:24:02 +0800 Subject: [PATCH 0786/1997] fix heap overflow in read buffer from hdfs --- src/Storages/HDFS/ReadBufferFromHDFS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index ee8e0764db0..483f0894cc4 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -89,7 +89,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory {})", file_offset, read_until_position - 1); - num_bytes_to_read = read_until_position - file_offset; + num_bytes_to_read = std::min(read_until_position - file_offset, internal_buffer.size()); } else { From 79a03432bf688c9f6f29554f7b9548e2b36b2178 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Sun, 25 Jun 2023 13:27:07 +0200 Subject: [PATCH 0787/1997] add test, add comment --- src/Common/LoggingFormatStringHelpers.h | 5 ++++- src/Daemon/BaseDaemon.cpp | 2 ++ src/IO/tests/gtest_writebuffer_s3.cpp | 28 +++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/Common/LoggingFormatStringHelpers.h b/src/Common/LoggingFormatStringHelpers.h index 5dece8cd6ea..82c260e52a6 100644 --- a/src/Common/LoggingFormatStringHelpers.h +++ b/src/Common/LoggingFormatStringHelpers.h @@ -192,7 +192,10 @@ public: }; /// This wrapper helps to avoid too noisy log messages from similar objects. -/// For the value logger_name it remembers when such a message was logged the last time. +/// Once an instance of LogSeriesLimiter type is created the decision is done +/// All followed message which use this instance is either printed or muted all together. +/// LogSeriesLimiter differs from LogFrequencyLimiterIml in a way that +/// LogSeriesLimiter is useful for accept or mute series of logs when LogFrequencyLimiterIml works for each line independently. class LogSeriesLimiter { static std::mutex mutex; diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index bfd5568b71d..6a6175b802f 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -417,6 +417,8 @@ private: { SentryWriter::onFault(sig, error_message, stack_trace); +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunreachable-code" /// Advice the user to send it manually. if constexpr (std::string_view(VERSION_OFFICIAL).contains("official build")) { diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp index cd38291fb31..a4433fee60e 100644 --- a/src/IO/tests/gtest_writebuffer_s3.cpp +++ b/src/IO/tests/gtest_writebuffer_s3.cpp @@ -1119,4 +1119,32 @@ TEST_P(SyncAsync, IncreaseLimited) { } } +TEST_P(SyncAsync, StrictUploadPartSize) { + getSettings().s3_check_objects_after_upload = false; + + { + getSettings().s3_max_single_part_upload_size = 10; + getSettings().s3_strict_upload_part_size = 11; + + { + auto counters = MockS3::EventCounts{.multiUploadCreate = 1, .multiUploadComplete = 1, .uploadParts = 6}; + runSimpleScenario(counters, 66); + + auto actual_parts_sizes = MockS3::BucketMemStore::GetPartSizes(getCompletedPartUploads().back().second); + ASSERT_THAT(actual_parts_sizes, testing::ElementsAre(11, 11, 11, 11, 11, 11)); + + // parts: 11 22 33 44 55 66 + // size: 11 11 11 11 11 11 + } + + { + auto counters = MockS3::EventCounts{.multiUploadCreate = 1, .multiUploadComplete = 1, .uploadParts = 7}; + runSimpleScenario(counters, 67); + + auto actual_parts_sizes = MockS3::BucketMemStore::GetPartSizes(getCompletedPartUploads().back().second); + ASSERT_THAT(actual_parts_sizes, testing::ElementsAre(11, 11, 11, 11, 11, 11, 1)); + } + } +} + #endif From f13752a2805baf77a00d1ad0f50094e553a27f17 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Sun, 25 Jun 2023 13:29:41 +0200 Subject: [PATCH 0788/1997] delete 02720_s3_strict_upload_part_size --- ...02720_s3_strict_upload_part_size.reference | 4 --- .../02720_s3_strict_upload_part_size.sh | 25 ------------------- 2 files changed, 29 deletions(-) delete mode 100644 tests/queries/0_stateless/02720_s3_strict_upload_part_size.reference delete mode 100755 tests/queries/0_stateless/02720_s3_strict_upload_part_size.sh diff --git a/tests/queries/0_stateless/02720_s3_strict_upload_part_size.reference b/tests/queries/0_stateless/02720_s3_strict_upload_part_size.reference deleted file mode 100644 index f7c4ece5f1f..00000000000 --- a/tests/queries/0_stateless/02720_s3_strict_upload_part_size.reference +++ /dev/null @@ -1,4 +0,0 @@ -part size: 6000001, part number: 1 -part size: 6000001, part number: 2 -part size: 6000001, part number: 3 -part size: 2971517, part number: 4 diff --git a/tests/queries/0_stateless/02720_s3_strict_upload_part_size.sh b/tests/queries/0_stateless/02720_s3_strict_upload_part_size.sh deleted file mode 100755 index 9799ef0478a..00000000000 --- a/tests/queries/0_stateless/02720_s3_strict_upload_part_size.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-fasttest, long -# Tag no-fasttest: requires S3 - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -in="$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.in" -out="$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.out" -log="$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.log" - -set -e -trap 'rm -f "${out:?}" "${in:?}" "${log:?}"' EXIT - -# Generate a file of 20MiB in size, with our part size it will have 4 parts -# NOTE: 1 byte is for new line, so 1023 not 1024 -$CLICKHOUSE_LOCAL -q "SELECT randomPrintableASCII(1023) FROM numbers(20*1024) FORMAT LineAsString" > "$in" - -$CLICKHOUSE_CLIENT --send_logs_level=trace --server_logs_file="$log" -q "INSERT INTO FUNCTION s3(s3_conn, filename='$CLICKHOUSE_TEST_UNIQUE_NAME', format='LineAsString', structure='line String') FORMAT LineAsString" --s3_strict_upload_part_size=6000001 < "$in" -grep -F '' "$log" || : -grep -o 'WriteBufferFromS3: writePart.*, part size: .*' "$log" | grep -o 'part size: .*' -$CLICKHOUSE_CLIENT -q "SELECT * FROM s3(s3_conn, filename='$CLICKHOUSE_TEST_UNIQUE_NAME', format='LineAsString', structure='line String') FORMAT LineAsString" > "$out" - -diff -q "$in" "$out" From cc3d27c0a30e40532541324b100f2584327b5ba3 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 25 Jun 2023 15:14:29 +0000 Subject: [PATCH 0789/1997] clang-tidy fix --- src/Common/Exception.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index a43335ed8de..af48ce8fd99 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -164,7 +164,7 @@ std::string Exception::getStackTraceString() const { thread_stack_trace += "\nJob's origin stack trace:\n" + - StackTrace::toString(&frame_pointers[0], 0, std::ranges::find(frame_pointers, nullptr) - frame_pointers.begin()); + StackTrace::toString(frame_pointers.data(), 0, std::ranges::find(frame_pointers, nullptr) - frame_pointers.begin()); } ); From f1f0daa654755b2d12ec9548262adfe4e87fe9b6 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 15 Jun 2023 17:59:37 +0200 Subject: [PATCH 0790/1997] Show halves of checksums in "system.parts", "system.projection_parts" and error messages in the correct order. --- src/Compression/CompressedReadBufferBase.cpp | 4 ++-- src/Storages/Distributed/DistributedAsyncInsertHeader.cpp | 4 ++-- src/Storages/MergeTree/PartMetadataManagerWithCache.cpp | 8 ++++---- src/Storages/System/StorageSystemParts.cpp | 6 +++--- src/Storages/System/StorageSystemProjectionParts.cpp | 6 +++--- utils/checksum-for-compressed-block/main.cpp | 2 +- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/Compression/CompressedReadBufferBase.cpp b/src/Compression/CompressedReadBufferBase.cpp index 662cd6bf337..bae52c8bece 100644 --- a/src/Compression/CompressedReadBufferBase.cpp +++ b/src/Compression/CompressedReadBufferBase.cpp @@ -49,8 +49,8 @@ static void validateChecksum(char * data, size_t size, const Checksum expected_c /// TODO mess up of endianness in error message. message << "Checksum doesn't match: corrupted data." - " Reference: " + getHexUIntLowercase(expected_checksum.low64) + getHexUIntLowercase(expected_checksum.high64) - + ". Actual: " + getHexUIntLowercase(calculated_checksum.low64) + getHexUIntLowercase(calculated_checksum.high64) + " Reference: " + getHexUIntLowercase(expected_checksum.high64) + getHexUIntLowercase(expected_checksum.low64) + + ". Actual: " + getHexUIntLowercase(calculated_checksum.high64) + getHexUIntLowercase(calculated_checksum.low64) + ". Size of compressed block: " + toString(size); const char * message_hardware_failure = "This is most likely due to hardware failure. " diff --git a/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp index d815f671652..e1b54304f23 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp @@ -40,8 +40,8 @@ DistributedAsyncInsertHeader DistributedAsyncInsertHeader::read(ReadBufferFromFi { throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum of extra info doesn't match: corrupted data. Reference: {}{}. Actual: {}{}.", - getHexUIntLowercase(expected_checksum.low64), getHexUIntLowercase(expected_checksum.high64), - getHexUIntLowercase(calculated_checksum.low64), getHexUIntLowercase(calculated_checksum.high64)); + getHexUIntLowercase(expected_checksum.high64), getHexUIntLowercase(expected_checksum.low64), + getHexUIntLowercase(calculated_checksum.high64), getHexUIntLowercase(calculated_checksum.low64)); } /// Read the parts of the header. diff --git a/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp b/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp index 7deae69750f..324bd4bbaee 100644 --- a/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp +++ b/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp @@ -250,8 +250,8 @@ std::unordered_map PartMetadataManagerWit ErrorCodes::CORRUPTED_DATA, "Checksums doesn't match in part {} for {}. Expected: {}. Found {}.", part->name, file_path, - getHexUIntUppercase(disk_checksum.low64) + getHexUIntUppercase(disk_checksum.high64), - getHexUIntUppercase(cache_checksums[i].low64) + getHexUIntUppercase(cache_checksums[i].high64)); + getHexUIntUppercase(disk_checksum.high64) + getHexUIntUppercase(disk_checksum.low64), + getHexUIntUppercase(cache_checksums[i].high64) + getHexUIntUppercase(cache_checksums[i].low64)); disk_checksums.push_back(disk_checksum); continue; @@ -287,8 +287,8 @@ std::unordered_map PartMetadataManagerWit ErrorCodes::CORRUPTED_DATA, "Checksums doesn't match in projection part {} {}. Expected: {}. Found {}.", part->name, proj_name, - getHexUIntUppercase(disk_checksum.low64) + getHexUIntUppercase(disk_checksum.high64), - getHexUIntUppercase(cache_checksums[i].low64) + getHexUIntUppercase(cache_checksums[i].high64)); + getHexUIntUppercase(disk_checksum.high64) + getHexUIntUppercase(disk_checksum.low64), + getHexUIntUppercase(cache_checksums[i].high64) + getHexUIntUppercase(cache_checksums[i].low64)); disk_checksums.push_back(disk_checksum); } return results; diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 95bad0a20fe..b642f4b5088 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -252,17 +252,17 @@ void StorageSystemParts::processNextStorage( if (columns_mask[src_index++]) { auto checksum = helper.hash_of_all_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.low64) + getHexUIntLowercase(checksum.high64)); + columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64)); } if (columns_mask[src_index++]) { auto checksum = helper.hash_of_uncompressed_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.low64) + getHexUIntLowercase(checksum.high64)); + columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64)); } if (columns_mask[src_index++]) { auto checksum = helper.uncompressed_hash_of_compressed_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.low64) + getHexUIntLowercase(checksum.high64)); + columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64)); } } diff --git a/src/Storages/System/StorageSystemProjectionParts.cpp b/src/Storages/System/StorageSystemProjectionParts.cpp index 6508d062d37..05c83747c4d 100644 --- a/src/Storages/System/StorageSystemProjectionParts.cpp +++ b/src/Storages/System/StorageSystemProjectionParts.cpp @@ -221,17 +221,17 @@ void StorageSystemProjectionParts::processNextStorage( if (columns_mask[src_index++]) { auto checksum = helper.hash_of_all_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.low64) + getHexUIntLowercase(checksum.high64)); + columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64)); } if (columns_mask[src_index++]) { auto checksum = helper.hash_of_uncompressed_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.low64) + getHexUIntLowercase(checksum.high64)); + columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64)); } if (columns_mask[src_index++]) { auto checksum = helper.uncompressed_hash_of_compressed_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.low64) + getHexUIntLowercase(checksum.high64)); + columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64)); } } diff --git a/utils/checksum-for-compressed-block/main.cpp b/utils/checksum-for-compressed-block/main.cpp index 4f9923e7638..d30a3798820 100644 --- a/utils/checksum-for-compressed-block/main.cpp +++ b/utils/checksum-for-compressed-block/main.cpp @@ -45,7 +45,7 @@ int main(int, char **) { auto flipped = flipBit(str, pos); auto checksum = CityHash_v1_0_2::CityHash128(flipped.data(), flipped.size()); - std::cout << getHexUIntLowercase(checksum.first) << getHexUIntLowercase(checksum.second) << "\t" << pos / 8 << ", " << pos % 8 << "\n"; + std::cout << getHexUIntLowercase(checksum.high64) << getHexUIntLowercase(checksum.low64) << "\t" << pos / 8 << ", " << pos % 8 << "\n"; } return 0; From 5eeda0a0d24ae14a78da79273870aec9fa6bd8a0 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 24 Jun 2023 22:17:53 +0200 Subject: [PATCH 0791/1997] Fix test 00961_checksums_in_system_parts_columns_table --- .../00961_checksums_in_system_parts_columns_table.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.reference b/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.reference index 186f2feab79..4bf3cfe65a2 100644 --- a/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.reference +++ b/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.reference @@ -1 +1 @@ -20000101_1_1_0 test_00961 b5fce9c4ef1ca42ce4ed027389c208d2 fc3b062b646cd23d4c23d7f5920f89ae da96ff1e527a8a1f908ddf2b1d0af239 +20000101_1_1_0 test_00961 e4ed027389c208d2b5fce9c4ef1ca42c 4c23d7f5920f89aefc3b062b646cd23d 908ddf2b1d0af239da96ff1e527a8a1f From 71cded08ff2813f4c4757e71a773ca8cc0a293bf Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 25 Jun 2023 14:51:29 +0200 Subject: [PATCH 0792/1997] Remove unnecessary include from wide_integer_impl.h --- base/base/wide_integer_impl.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h index dc6a49694ae..411841e6d9f 100644 --- a/base/base/wide_integer_impl.h +++ b/base/base/wide_integer_impl.h @@ -15,8 +15,6 @@ #include #include -#include - // NOLINTBEGIN(*) /// Use same extended double for all platforms @@ -29,6 +27,8 @@ using FromDoubleIntermediateType = long double; using FromDoubleIntermediateType = boost::multiprecision::cpp_bin_float_double_extended; #endif +namespace CityHash_v1_0_2 { struct uint128; } + namespace wide { @@ -283,8 +283,11 @@ struct integer::_impl } } - constexpr static void wide_integer_from_cityhash_uint128(integer & self, const CityHash_v1_0_2::uint128 & value) noexcept + template + constexpr static void wide_integer_from_cityhash_uint128(integer & self, const CityHashUInt128 & value) noexcept { + static_assert(sizeof(item_count) >= 2); + if constexpr (std::endian::native == std::endian::little) wide_integer_from_tuple_like(self, std::make_pair(value.low64, value.high64)); else From 477b707ff1765d8a2d62ad21b869d544b212de96 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Jun 2023 02:02:24 +0200 Subject: [PATCH 0793/1997] Revert "Merge pull request #50951 from ZhiguoZh/20230607-toyear-fix" This reverts commit 6bbd0d144df01b07a28d3d9927ce2a6c1dc2ee56, reversing changes made to 74cb79769bbaa0c4619ca7cb382e6e37c8c7d7b5. --- src/Functions/DateTimeTransforms.h | 72 ------- .../FunctionDateOrDateTimeToSomething.h | 13 -- src/Functions/IFunction.h | 24 +-- src/Functions/IFunctionAdaptors.h | 7 - ...OrDateTimeConverterWithPreimageVisitor.cpp | 199 ------------------ ...teOrDateTimeConverterWithPreimageVisitor.h | 37 ---- src/Interpreters/TreeOptimizer.cpp | 19 -- ...783_date_predicate_optimizations.reference | 52 ----- .../02783_date_predicate_optimizations.sql | 76 ------- ...dicate_optimizations_ast_rewrite.reference | 87 -------- ...te_predicate_optimizations_ast_rewrite.sql | 47 ----- 11 files changed, 1 insertion(+), 632 deletions(-) delete mode 100644 src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp delete mode 100644 src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.h delete mode 100644 tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.reference delete mode 100644 tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.sql diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 84c71c89b11..019e0c42cde 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -322,7 +322,6 @@ struct ToTimeImpl { throwDateTimeIsNotSupported(name); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToDateImpl; }; @@ -394,7 +393,6 @@ struct ToStartOfSecondImpl { throwDateTimeIsNotSupported(name); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -442,7 +440,6 @@ struct ToStartOfMillisecondImpl { throwDateTimeIsNotSupported(name); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -486,7 +483,6 @@ struct ToStartOfMicrosecondImpl { throwDateTimeIsNotSupported(name); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -524,7 +520,6 @@ struct ToStartOfNanosecondImpl { throwDateTimeIsNotSupported(name); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -723,28 +718,6 @@ struct ToYearImpl return time_zone.toYear(DayNum(d)); } - static inline constexpr bool hasPreimage() { return true; } - - static inline RangeOrNull getPreimage(const IDataType & type, const Field & point) - { - if (point.getType() != Field::Types::UInt64) return std::nullopt; - - auto year = point.get(); - if (year < DATE_LUT_MIN_YEAR || year >= DATE_LUT_MAX_YEAR) return std::nullopt; - - const DateLUTImpl & date_lut = DateLUT::instance(); - - auto start_time = date_lut.makeDateTime(year, 1, 1, 0, 0, 0); - auto end_time = date_lut.addYears(start_time, 1); - - if (isDateOrDate32(type) || isDateTime(type) || isDateTime64(type)) - return {std::make_pair(Field(start_time), Field(end_time))}; - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64", - type.getName(), name); - } - using FactorTransform = ZeroTransform; }; @@ -818,7 +791,6 @@ struct ToQuarterImpl { return time_zone.toQuarter(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfYearImpl; }; @@ -843,7 +815,6 @@ struct ToMonthImpl { return time_zone.toMonth(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfYearImpl; }; @@ -869,7 +840,6 @@ struct ToDayOfMonthImpl return time_zone.toDayOfMonth(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfMonthImpl; }; @@ -917,7 +887,6 @@ struct ToDayOfYearImpl { return time_zone.toDayOfYear(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfYearImpl; }; @@ -942,7 +911,6 @@ struct ToHourImpl { throwDateTimeIsNotSupported(name); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToDateImpl; }; @@ -971,7 +939,6 @@ struct TimezoneOffsetImpl throwDateTimeIsNotSupported(name); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToTimeImpl; }; @@ -995,7 +962,6 @@ struct ToMinuteImpl { throwDateTimeIsNotSupported(name); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfHourImpl; }; @@ -1020,7 +986,6 @@ struct ToSecondImpl { throwDateTimeIsNotSupported(name); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfMinuteImpl; }; @@ -1045,7 +1010,6 @@ struct ToISOYearImpl { return time_zone.toISOYear(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1102,7 +1066,6 @@ struct ToISOWeekImpl { return time_zone.toISOWeek(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToISOYearImpl; }; @@ -1145,7 +1108,6 @@ struct ToRelativeYearNumImpl { return time_zone.toYear(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1177,7 +1139,6 @@ struct ToRelativeQuarterNumImpl { return time_zone.toRelativeQuarterNum(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1209,7 +1170,6 @@ struct ToRelativeMonthNumImpl { return time_zone.toRelativeMonthNum(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1241,7 +1201,6 @@ struct ToRelativeWeekNumImpl { return time_zone.toRelativeWeekNum(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1273,7 +1232,6 @@ struct ToRelativeDayNumImpl { return static_cast(d); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1311,7 +1269,6 @@ struct ToRelativeHourNumImpl else return static_cast(time_zone.toRelativeHourNum(DayNum(d))); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1343,7 +1300,6 @@ struct ToRelativeMinuteNumImpl { return static_cast(time_zone.toRelativeMinuteNum(DayNum(d))); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1372,7 +1328,6 @@ struct ToRelativeSecondNumImpl { return static_cast(time_zone.fromDayNum(DayNum(d))); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1397,31 +1352,6 @@ struct ToYYYYMMImpl { return time_zone.toNumYYYYMM(DayNum(d)); } - static inline constexpr bool hasPreimage() { return true; } - - static inline RangeOrNull getPreimage(const IDataType & type, const Field & point) - { - if (point.getType() != Field::Types::UInt64) return std::nullopt; - - auto year_month = point.get(); - auto year = year_month / 100; - auto month = year_month % 100; - - if (year < DATE_LUT_MIN_YEAR || year > DATE_LUT_MAX_YEAR || month < 1 || month > 12 || (year == DATE_LUT_MAX_YEAR && month == 12)) - return std::nullopt; - - const DateLUTImpl & date_lut = DateLUT::instance(); - - auto start_time = date_lut.makeDateTime(year, month, 1, 0, 0, 0); - auto end_time = date_lut.addMonths(start_time, 1); - - if (isDateOrDate32(type) || isDateTime(type) || isDateTime64(type)) - return {std::make_pair(Field(start_time), Field(end_time))}; - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64", - type.getName(), name); - } using FactorTransform = ZeroTransform; }; @@ -1446,7 +1376,6 @@ struct ToYYYYMMDDImpl { return time_zone.toNumYYYYMMDD(DayNum(d)); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1471,7 +1400,6 @@ struct ToYYYYMMDDhhmmssImpl { return time_zone.toNumYYYYMMDDhhmmss(time_zone.toDate(DayNum(d))); } - static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; diff --git a/src/Functions/FunctionDateOrDateTimeToSomething.h b/src/Functions/FunctionDateOrDateTimeToSomething.h index d98b788c7d7..82818cc3d2b 100644 --- a/src/Functions/FunctionDateOrDateTimeToSomething.h +++ b/src/Functions/FunctionDateOrDateTimeToSomething.h @@ -7,7 +7,6 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int NOT_IMPLEMENTED; } /// See DateTimeTransforms.h @@ -84,18 +83,6 @@ public: arguments[0].type->getName(), this->getName()); } - bool hasInformationAboutPreimage() const override { return Transform::hasPreimage(); } - - RangeOrNull getPreimage(const IDataType & type, const Field & point) const override - { - if constexpr (Transform::hasPreimage()) - return Transform::getPreimage(type, point); - else - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Function {} has no information about its preimage", - Transform::name); - } - }; } diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 433cb61d04e..1e4f8bf1102 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -2,8 +2,6 @@ #include #include -#include -#include #include #include #include @@ -32,8 +30,7 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } -/// A left-closed and right-open interval representing the preimage of a function. -using RangeOrNull = std::optional>; +class Field; /// The simplest executable object. /// Motivation: @@ -231,12 +228,6 @@ public: */ virtual bool hasInformationAboutMonotonicity() const { return false; } - /** Lets you know if the function has its definition of preimage. - * This is used to work with predicate optimizations, where the comparison between - * f(x) and a constant c could be converted to the comparison between x and f's preimage [b, e). - */ - virtual bool hasInformationAboutPreimage() const { return false; } - struct ShortCircuitSettings { /// Should we enable lazy execution for the first argument of short-circuit function? @@ -290,14 +281,6 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} has no information about its monotonicity", getName()); } - /** Get the preimage of a function in the form of a left-closed and right-open interval. Call only if hasInformationAboutPreimage. - * std::nullopt might be returned if the point (a single value) is invalid for this function. - */ - virtual RangeOrNull getPreimage(const IDataType & /*type*/, const Field & /*point*/) const - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} has no information about its preimage", getName()); - } - }; using FunctionBasePtr = std::shared_ptr; @@ -487,17 +470,12 @@ public: virtual bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const = 0; virtual bool hasInformationAboutMonotonicity() const { return false; } - virtual bool hasInformationAboutPreimage() const { return false; } using Monotonicity = IFunctionBase::Monotonicity; virtual Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} has no information about its monotonicity", getName()); } - virtual RangeOrNull getPreimage(const IDataType & /*type*/, const Field & /*point*/) const - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} has no information about its preimage", getName()); - } /// For non-variadic functions, return number of arguments; otherwise return zero (that should be ignored). virtual size_t getNumberOfArguments() const = 0; diff --git a/src/Functions/IFunctionAdaptors.h b/src/Functions/IFunctionAdaptors.h index 123fdbc2f50..23725b1a8b1 100644 --- a/src/Functions/IFunctionAdaptors.h +++ b/src/Functions/IFunctionAdaptors.h @@ -90,17 +90,10 @@ public: bool hasInformationAboutMonotonicity() const override { return function->hasInformationAboutMonotonicity(); } - bool hasInformationAboutPreimage() const override { return function->hasInformationAboutPreimage(); } - Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override { return function->getMonotonicityForRange(type, left, right); } - - RangeOrNull getPreimage(const IDataType & type, const Field & point) const override - { - return function->getPreimage(type, point); - } private: std::shared_ptr function; DataTypes arguments; diff --git a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp deleted file mode 100644 index a377bb4bba6..00000000000 --- a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp +++ /dev/null @@ -1,199 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -/** Given a monotonic non-decreasing function f(x), which satisfies f(x) = c for any value x within [b, e). - * We could convert it into its equivalent form, x >= b AND x < e, which is free from the invocation of the function. - * And we could apply the similar transformation to other comparisons. The suggested transformations list: - * - * f(x) == c -> x >= b AND x < e - * f(x) != c -> x < b OR x >= e - * f(x) > c -> x >= e - * f(x) >= c -> x >= b - * f(x) < c -> x < b - * f(x) <= c -> x < e - * - * This function generates a new AST with the transformed relation. - */ -ASTPtr generateOptimizedDateFilterAST(const String & comparator, const NameAndTypePair & column, const std::pair& range) -{ - const DateLUTImpl & date_lut = DateLUT::instance(); - - const String & column_name = column.name; - String start_date_or_date_time; - String end_date_or_date_time; - - if (isDateOrDate32(column.type.get())) - { - start_date_or_date_time = date_lut.dateToString(range.first.get()); - end_date_or_date_time = date_lut.dateToString(range.second.get()); - } - else if (isDateTime(column.type.get()) || isDateTime64(column.type.get())) - { - start_date_or_date_time = date_lut.timeToString(range.first.get()); - end_date_or_date_time = date_lut.timeToString(range.second.get()); - } - else [[unlikely]] return {}; - - if (comparator == "equals") - { - return makeASTFunction("and", - makeASTFunction("greaterOrEquals", - std::make_shared(column_name), - std::make_shared(start_date_or_date_time) - ), - makeASTFunction("less", - std::make_shared(column_name), - std::make_shared(end_date_or_date_time) - ) - ); - } - else if (comparator == "notEquals") - { - return makeASTFunction("or", - makeASTFunction("less", - std::make_shared(column_name), - std::make_shared(start_date_or_date_time) - ), - makeASTFunction("greaterOrEquals", - std::make_shared(column_name), - std::make_shared(end_date_or_date_time) - ) - ); - } - else if (comparator == "greater") - { - return makeASTFunction("greaterOrEquals", - std::make_shared(column_name), - std::make_shared(end_date_or_date_time) - ); - } - else if (comparator == "lessOrEquals") - { - return makeASTFunction("less", - std::make_shared(column_name), - std::make_shared(end_date_or_date_time) - ); - } - else if (comparator == "less" || comparator == "greaterOrEquals") - { - return makeASTFunction(comparator, - std::make_shared(column_name), - std::make_shared(start_date_or_date_time) - ); - } - else [[unlikely]] - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Expected equals, notEquals, less, lessOrEquals, greater, greaterOrEquals. Actual {}", - comparator); - } -} - -void OptimizeDateOrDateTimeConverterWithPreimageMatcher::visit(const ASTFunction & function, ASTPtr & ast, const Data & data) -{ - const static std::unordered_map swap_relations = { - {"equals", "equals"}, - {"notEquals", "notEquals"}, - {"less", "greater"}, - {"greater", "less"}, - {"lessOrEquals", "greaterOrEquals"}, - {"greaterOrEquals", "lessOrEquals"}, - }; - - if (!swap_relations.contains(function.name)) return; - - if (!function.arguments || function.arguments->children.size() != 2) return; - - size_t func_id = function.arguments->children.size(); - - for (size_t i = 0; i < function.arguments->children.size(); i++) - { - if (const auto * func = function.arguments->children[i]->as()) - { - func_id = i; - } - } - - if (func_id == function.arguments->children.size()) return; - - size_t literal_id = 1 - func_id; - const auto * literal = function.arguments->children[literal_id]->as(); - - if (!literal || literal->value.getType() != Field::Types::UInt64) return; - - String comparator = literal_id > func_id ? function.name : swap_relations.at(function.name); - - const auto * ast_func = function.arguments->children[func_id]->as(); - /// Currently we only handle single-argument functions. - if (!ast_func || !ast_func->arguments || ast_func->arguments->children.size() != 1) return; - - const auto * column_id = ast_func->arguments->children.at(0)->as(); - if (!column_id) return; - - auto pos = IdentifierSemantic::getMembership(*column_id); - if (!pos) - pos = IdentifierSemantic::chooseTableColumnMatch(*column_id, data.tables, true); - if (!pos) - return; - - if (*pos >= data.tables.size()) - return; - - auto data_type_and_name = data.tables[*pos].columns.tryGetByName(column_id->shortName()); - if (!data_type_and_name) return; - - const auto & converter = FunctionFactory::instance().tryGet(ast_func->name, data.context); - if (!converter) return; - - ColumnsWithTypeAndName args; - args.emplace_back(data_type_and_name->type, "tmp"); - auto converter_base = converter->build(args); - if (!converter_base || !converter_base->hasInformationAboutPreimage()) return; - - auto preimage_range = converter_base->getPreimage(*(data_type_and_name->type), literal->value); - if (!preimage_range) return; - - const auto new_ast = generateOptimizedDateFilterAST(comparator, *data_type_and_name, *preimage_range); - if (!new_ast) return; - - ast = new_ast; -} - -bool OptimizeDateOrDateTimeConverterWithPreimageMatcher::needChildVisit(ASTPtr & ast, ASTPtr & /*child*/) -{ - const static std::unordered_set relations = { - "equals", - "notEquals", - "less", - "greater", - "lessOrEquals", - "greaterOrEquals", - }; - - if (const auto * ast_function = ast->as()) - { - return !relations.contains(ast_function->name); - } - - return true; -} - -} diff --git a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.h b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.h deleted file mode 100644 index 778fa462364..00000000000 --- a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -class ASTFunction; - -/** Replace predicate having Date/DateTime converters with their preimages to improve performance. - * Given a Date column c, toYear(c) = 2023 -> c >= '2023-01-01' AND c < '2024-01-01' - * Or if c is a DateTime column, toYear(c) = 2023 -> c >= '2023-01-01 00:00:00' AND c < '2024-01-01 00:00:00'. - * The similar optimization also applies to other converters. - */ -class OptimizeDateOrDateTimeConverterWithPreimageMatcher -{ -public: - struct Data - { - const TablesWithColumns & tables; - ContextPtr context; - }; - - static void visit(ASTPtr & ast, Data & data) - { - if (const auto * ast_function = ast->as()) - visit(*ast_function, ast, data); - } - - static void visit(const ASTFunction & function, ASTPtr & ast, const Data & data); - - static bool needChildVisit(ASTPtr & ast, ASTPtr & child); -}; - -using OptimizeDateOrDateTimeConverterWithPreimageVisitor = InDepthNodeVisitor; -} diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index fd4d2c9d846..c38b3c79026 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -25,7 +25,6 @@ #include #include #include -#include #include #include @@ -678,21 +677,6 @@ void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, ContextPtr context) RemoveInjectiveFunctionsVisitor(data).visit(query); } -void optimizeDateFilters(ASTSelectQuery * select_query, const std::vector & tables_with_columns, ContextPtr context) -{ - /// Predicates in HAVING clause has been moved to WHERE clause. - if (select_query->where()) - { - OptimizeDateOrDateTimeConverterWithPreimageVisitor::Data data{tables_with_columns, context}; - OptimizeDateOrDateTimeConverterWithPreimageVisitor(data).visit(select_query->refWhere()); - } - if (select_query->prewhere()) - { - OptimizeDateOrDateTimeConverterWithPreimageVisitor::Data data{tables_with_columns, context}; - OptimizeDateOrDateTimeConverterWithPreimageVisitor(data).visit(select_query->refPrewhere()); - } -} - void transformIfStringsIntoEnum(ASTPtr & query) { std::unordered_set function_names = {"if", "transform"}; @@ -796,9 +780,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, tables_with_columns, result.storage_snapshot->metadata, result.storage); } - /// Rewrite date filters to avoid the calls of converters such as toYear, toYYYYMM, etc. - optimizeDateFilters(select_query, tables_with_columns, context); - /// GROUP BY injective function elimination. optimizeGroupBy(select_query, context); diff --git a/tests/queries/0_stateless/02783_date_predicate_optimizations.reference b/tests/queries/0_stateless/02783_date_predicate_optimizations.reference index 872a5dd1d7d..cd689b93034 100644 --- a/tests/queries/0_stateless/02783_date_predicate_optimizations.reference +++ b/tests/queries/0_stateless/02783_date_predicate_optimizations.reference @@ -1,54 +1,2 @@ 2021-12-31 23:00:00 0 2021-12-31 23:00:00 0 -Date -2 -3 -2 -4 -1 -3 -3 -2 -1 -4 -1 -4 -DateTime -2 -3 -2 -4 -1 -3 -3 -2 -1 -4 -1 -4 -Date32 -2 -3 -2 -4 -1 -3 -3 -2 -1 -4 -1 -4 -DateTime64 -2 -3 -2 -4 -1 -3 -3 -2 -1 -4 -1 -4 diff --git a/tests/queries/0_stateless/02783_date_predicate_optimizations.sql b/tests/queries/0_stateless/02783_date_predicate_optimizations.sql index 0a2fa6cc93b..abb13f1005e 100644 --- a/tests/queries/0_stateless/02783_date_predicate_optimizations.sql +++ b/tests/queries/0_stateless/02783_date_predicate_optimizations.sql @@ -11,79 +11,3 @@ INSERT INTO source values ('2021-12-31 23:00:00', 0); SELECT * FROM source WHERE toYYYYMM(ts) = 202112; SELECT * FROM source WHERE toYear(ts) = 2021; - -DROP TABLE IF EXISTS source; -CREATE TABLE source -( - `dt` Date, - `ts` DateTime, - `dt_32` Date32, - `ts_64` DateTime64(3), - `n` Int32 -) -ENGINE = MergeTree -PARTITION BY toYYYYMM(ts) -ORDER BY tuple(); - -INSERT INTO source values ('2022-12-31', '2022-12-31 23:59:59', '2022-12-31', '2022-12-31 23:59:59.123', 0); -INSERT INTO source values ('2023-01-01', '2023-01-01 00:00:00', '2023-01-01', '2023-01-01 00:00:00.000', 1); -INSERT INTO source values ('2023-12-01', '2023-12-01 00:00:00', '2023-12-01', '2023-12-01 00:00:00.000', 2); -INSERT INTO source values ('2023-12-31', '2023-12-31 23:59:59', '2023-12-31', '2023-12-31 23:59:59.123', 3); -INSERT INTO source values ('2024-01-01', '2024-01-01 00:00:00', '2024-01-01', '2024-01-01 00:00:00.000', 4); - -SELECT 'Date'; -SELECT count(*) FROM source WHERE toYYYYMM(dt) = 202312; -SELECT count(*) FROM source WHERE toYYYYMM(dt) <> 202312; -SELECT count(*) FROM source WHERE toYYYYMM(dt) < 202312; -SELECT count(*) FROM source WHERE toYYYYMM(dt) <= 202312; -SELECT count(*) FROM source WHERE toYYYYMM(dt) > 202312; -SELECT count(*) FROM source WHERE toYYYYMM(dt) >= 202312; -SELECT count(*) FROM source WHERE toYear(dt) = 2023; -SELECT count(*) FROM source WHERE toYear(dt) <> 2023; -SELECT count(*) FROM source WHERE toYear(dt) < 2023; -SELECT count(*) FROM source WHERE toYear(dt) <= 2023; -SELECT count(*) FROM source WHERE toYear(dt) > 2023; -SELECT count(*) FROM source WHERE toYear(dt) >= 2023; - -SELECT 'DateTime'; -SELECT count(*) FROM source WHERE toYYYYMM(ts) = 202312; -SELECT count(*) FROM source WHERE toYYYYMM(ts) <> 202312; -SELECT count(*) FROM source WHERE toYYYYMM(ts) < 202312; -SELECT count(*) FROM source WHERE toYYYYMM(ts) <= 202312; -SELECT count(*) FROM source WHERE toYYYYMM(ts) > 202312; -SELECT count(*) FROM source WHERE toYYYYMM(ts) >= 202312; -SELECT count(*) FROM source WHERE toYear(ts) = 2023; -SELECT count(*) FROM source WHERE toYear(ts) <> 2023; -SELECT count(*) FROM source WHERE toYear(ts) < 2023; -SELECT count(*) FROM source WHERE toYear(ts) <= 2023; -SELECT count(*) FROM source WHERE toYear(ts) > 2023; -SELECT count(*) FROM source WHERE toYear(ts) >= 2023; - -SELECT 'Date32'; -SELECT count(*) FROM source WHERE toYYYYMM(dt_32) = 202312; -SELECT count(*) FROM source WHERE toYYYYMM(dt_32) <> 202312; -SELECT count(*) FROM source WHERE toYYYYMM(dt_32) < 202312; -SELECT count(*) FROM source WHERE toYYYYMM(dt_32) <= 202312; -SELECT count(*) FROM source WHERE toYYYYMM(dt_32) > 202312; -SELECT count(*) FROM source WHERE toYYYYMM(dt_32) >= 202312; -SELECT count(*) FROM source WHERE toYear(dt_32) = 2023; -SELECT count(*) FROM source WHERE toYear(dt_32) <> 2023; -SELECT count(*) FROM source WHERE toYear(dt_32) < 2023; -SELECT count(*) FROM source WHERE toYear(dt_32) <= 2023; -SELECT count(*) FROM source WHERE toYear(dt_32) > 2023; -SELECT count(*) FROM source WHERE toYear(dt_32) >= 2023; - -SELECT 'DateTime64'; -SELECT count(*) FROM source WHERE toYYYYMM(ts_64) = 202312; -SELECT count(*) FROM source WHERE toYYYYMM(ts_64) <> 202312; -SELECT count(*) FROM source WHERE toYYYYMM(ts_64) < 202312; -SELECT count(*) FROM source WHERE toYYYYMM(ts_64) <= 202312; -SELECT count(*) FROM source WHERE toYYYYMM(ts_64) > 202312; -SELECT count(*) FROM source WHERE toYYYYMM(ts_64) >= 202312; -SELECT count(*) FROM source WHERE toYear(ts_64) = 2023; -SELECT count(*) FROM source WHERE toYear(ts_64) <> 2023; -SELECT count(*) FROM source WHERE toYear(ts_64) < 2023; -SELECT count(*) FROM source WHERE toYear(ts_64) <= 2023; -SELECT count(*) FROM source WHERE toYear(ts_64) > 2023; -SELECT count(*) FROM source WHERE toYear(ts_64) >= 2023; -DROP TABLE source; diff --git a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.reference b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.reference deleted file mode 100644 index 9235e7e106a..00000000000 --- a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.reference +++ /dev/null @@ -1,87 +0,0 @@ -SELECT value1 -FROM date_t -WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((date1 < \'1993-01-01\') OR (date1 >= \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 >= \'1994-01-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 < \'1994-01-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 >= \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1998-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) OR ((date1 >= \'1994-01-01\') AND (date1 < \'1995-01-01\'))) AND ((id >= 1) AND (id <= 3)) -SELECT - value1, - toYear(date1) AS year1 -FROM date_t -WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -PREWHERE (date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\') -WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((id >= 1) AND (id <= 3)) AND ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) -SELECT value1 -FROM date_t -WHERE (toYYYYMM(date1) = 199300) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (toYYYYMM(date1) = 199313) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((date1 >= \'1993-12-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((date1 >= \'1992-03-01\') AND (date1 < \'1992-04-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((date1 < \'1992-03-01\') OR (date1 >= \'1992-04-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 < \'1992-03-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 >= \'1992-04-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 < \'1992-04-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE (date1 >= \'1992-03-01\') AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date_t -WHERE ((date1 >= \'1992-03-01\') OR ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\'))) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM datetime_t -WHERE ((date1 >= \'1993-01-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM datetime_t -WHERE ((date1 >= \'1993-12-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date32_t -WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM date32_t -WHERE ((date1 >= \'1993-12-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM datetime64_t -WHERE ((date1 >= \'1993-01-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) -SELECT value1 -FROM datetime64_t -WHERE ((date1 >= \'1993-12-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) diff --git a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.sql b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.sql deleted file mode 100644 index 266be59b0a3..00000000000 --- a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.sql +++ /dev/null @@ -1,47 +0,0 @@ -DROP TABLE IF EXISTS date_t; -CREATE TABLE date_t (id UInt32, value1 String, date1 Date) ENGINE ReplacingMergeTree() ORDER BY id; - -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) <> 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) < 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) > 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) <= 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) >= 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) BETWEEN 1993 AND 1997 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE (toYear(date1) = 1993 OR toYear(date1) = 1994) AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1, toYear(date1) as year1 FROM date_t WHERE year1 = 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE 1993 > toYear(date1) AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t PREWHERE toYear(date1) = 1993 WHERE id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE id BETWEEN 1 AND 3 HAVING toYear(date1) = 1993; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199300 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199313 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199203 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) <> 199203 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) < 199203 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) > 199203 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) <= 199203 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) >= 199203 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE (toYYYYMM(date1) >= 199203 OR toYear(date1) = 1993) AND id BETWEEN 1 AND 3; -DROP TABLE date_t; - -DROP TABLE IF EXISTS datetime_t; -CREATE TABLE datetime_t (id UInt32, value1 String, date1 Datetime) ENGINE ReplacingMergeTree() ORDER BY id; - -EXPLAIN SYNTAX SELECT value1 FROM datetime_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM datetime_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; -DROP TABLE datetime_t; - -DROP TABLE IF EXISTS date32_t; -CREATE TABLE date32_t (id UInt32, value1 String, date1 Date32) ENGINE ReplacingMergeTree() ORDER BY id; - -EXPLAIN SYNTAX SELECT value1 FROM date32_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM date32_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; -DROP TABLE date32_t; - -DROP TABLE IF EXISTS datetime64_t; -CREATE TABLE datetime64_t (id UInt32, value1 String, date1 Datetime64) ENGINE ReplacingMergeTree() ORDER BY id; - -EXPLAIN SYNTAX SELECT value1 FROM datetime64_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; -EXPLAIN SYNTAX SELECT value1 FROM datetime64_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; -DROP TABLE datetime64_t; From 4ee74ec213b333893ddeb89331970e0d8758adbd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Jun 2023 02:19:52 +0200 Subject: [PATCH 0794/1997] Two tests are twice longer in average with Analyzer and sometimes failing --- docker/test/util/process_functional_tests_result.py | 4 ++-- tests/{broken_tests.txt => analyzer_tech_debt.txt} | 2 ++ tests/ci/functional_test_check.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) rename tests/{broken_tests.txt => analyzer_tech_debt.txt} (97%) diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index c75a3500831..fd4cc9f4bf7 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -86,7 +86,7 @@ def process_test_log(log_path, broken_tests): test_name, "NOT_FAILED", test_time, - ["This test passed. Update broken_tests.txt.\n"], + ["This test passed. Update analyzer_tech_debt.txt.\n"], ) ) else: @@ -205,7 +205,7 @@ if __name__ == "__main__": parser.add_argument("--in-results-dir", default="/test_output/") parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") - parser.add_argument("--broken-tests", default="/broken_tests.txt") + parser.add_argument("--broken-tests", default="/analyzer_tech_debt.txt") args = parser.parse_args() broken_tests = list() diff --git a/tests/broken_tests.txt b/tests/analyzer_tech_debt.txt similarity index 97% rename from tests/broken_tests.txt rename to tests/analyzer_tech_debt.txt index b3668b06e21..0872033aed0 100644 --- a/tests/broken_tests.txt +++ b/tests/analyzer_tech_debt.txt @@ -124,3 +124,5 @@ 02534_s3_cluster_insert_select_schema_inference 02765_parallel_replicas_final_modifier 02784_parallel_replicas_automatic_disabling +02581_share_big_sets_between_mutation_tasks_long +02581_share_big_sets_between_multiple_mutations_tasks_long diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index f77ef330ea2..9279b19b187 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -108,7 +108,7 @@ def get_run_command( env_str = " ".join(envs) volume_with_broken_test = ( - f"--volume={repo_tests_path}/broken_tests.txt:/broken_tests.txt" + f"--volume={repo_tests_path}/analyzer_tech_debt.txt:/analyzer_tech_debt.txt" if "analyzer" in check_name else "" ) From a487a1ab5acbe5fd2d5d1ee9872f895709408002 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Jun 2023 02:48:30 +0200 Subject: [PATCH 0795/1997] Fix bad test --- tests/queries/0_stateless/02479_mysql_connect_to_self.sql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/02479_mysql_connect_to_self.sql b/tests/queries/0_stateless/02479_mysql_connect_to_self.sql index c5c5dfb8c4a..cf2220073d3 100644 --- a/tests/queries/0_stateless/02479_mysql_connect_to_self.sql +++ b/tests/queries/0_stateless/02479_mysql_connect_to_self.sql @@ -2,6 +2,8 @@ SET send_logs_level = 'fatal'; -- failed connection tries are ok, if it succeeded after retry. +DROP TABLE IF EXISTS foo; + CREATE TABLE foo (key UInt32, a String, b Int64, c String) ENGINE = TinyLog; INSERT INTO foo VALUES (1, 'one', -1, 'een'), (2, 'two', -2, 'twee'), (3, 'three', -3, 'drie'), (4, 'four', -4, 'vier'), (5, 'five', -5, 'vijf'); @@ -40,3 +42,5 @@ SELECT * FROM mysql( SELECT '---'; SELECT count() FROM mysql('127.0.0.1:9004', currentDatabase(), foo, 'default', '', SETTINGS connection_pool_size = 1, connect_timeout = 100, connection_wait_timeout = 100); SELECT count() FROM mysql('127.0.0.1:9004', currentDatabase(), foo, 'default', '', SETTINGS connection_pool_size = 0); -- { serverError BAD_ARGUMENTS } + +DROP TABLE foo; From 0e88aae9d7ab53d4267324e17f5a48fae00ae72c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Jun 2023 04:27:50 +0200 Subject: [PATCH 0796/1997] Remove ConsoleCertificateHandler --- .../Poco/Net/ConsoleCertificateHandler.h | 53 ------------------- .../include/Poco/Net/SSLManager.h | 6 +-- .../src/CertificateHandlerFactoryMgr.cpp | 2 - .../src/ConsoleCertificateHandler.cpp | 53 ------------------- base/poco/NetSSL_OpenSSL/src/SSLManager.cpp | 2 +- .../settings.md | 2 +- .../settings.md | 2 +- .../settings.md | 2 +- 8 files changed, 7 insertions(+), 115 deletions(-) delete mode 100644 base/poco/NetSSL_OpenSSL/include/Poco/Net/ConsoleCertificateHandler.h delete mode 100644 base/poco/NetSSL_OpenSSL/src/ConsoleCertificateHandler.cpp diff --git a/base/poco/NetSSL_OpenSSL/include/Poco/Net/ConsoleCertificateHandler.h b/base/poco/NetSSL_OpenSSL/include/Poco/Net/ConsoleCertificateHandler.h deleted file mode 100644 index 8e09b6f18ae..00000000000 --- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/ConsoleCertificateHandler.h +++ /dev/null @@ -1,53 +0,0 @@ -// -// ConsoleCertificateHandler.h -// -// Library: NetSSL_OpenSSL -// Package: SSLCore -// Module: ConsoleCertificateHandler -// -// Definition of the ConsoleCertificateHandler class. -// -// Copyright (c) 2006-2009, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef NetSSL_ConsoleCertificateHandler_INCLUDED -#define NetSSL_ConsoleCertificateHandler_INCLUDED - - -#include "Poco/Net/InvalidCertificateHandler.h" -#include "Poco/Net/NetSSL.h" - - -namespace Poco -{ -namespace Net -{ - - - class NetSSL_API ConsoleCertificateHandler : public InvalidCertificateHandler - /// A ConsoleCertificateHandler is invoked whenever an error occurs verifying the certificate. - /// - /// The certificate is printed to stdout and the user is asked via console if he wants to accept it. - { - public: - ConsoleCertificateHandler(bool handleErrorsOnServerSide); - /// Creates the ConsoleCertificateHandler. - - virtual ~ConsoleCertificateHandler(); - /// Destroys the ConsoleCertificateHandler. - - void onInvalidCertificate(const void * pSender, VerificationErrorArgs & errorCert); - /// Prints the certificate to stdout and waits for user input on the console - /// to decide if a certificate should be accepted/rejected. - }; - - -} -} // namespace Poco::Net - - -#endif // NetSSL_ConsoleCertificateHandler_INCLUDED diff --git a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h index a4fde26286e..21a1ed685e5 100644 --- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h +++ b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h @@ -85,7 +85,7 @@ namespace Net /// /// /// - /// ConsoleCertificateHandler + /// RejectCertificateHandler /// /// true|false /// someString @@ -186,7 +186,7 @@ namespace Net /// /// Valid initialization code would be: /// SharedPtr pConsoleHandler = new KeyConsoleHandler; - /// SharedPtr pInvalidCertHandler = new ConsoleCertificateHandler; + /// SharedPtr pInvalidCertHandler = new RejectCertificateHandler; /// Context::Ptr pContext = new Context(Context::SERVER_USE, "any.pem", "any.pem", "rootcert.pem", Context::VERIFY_RELAXED, 9, false, "ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH"); /// SSLManager::instance().initializeServer(pConsoleHandler, pInvalidCertHandler, pContext); @@ -203,7 +203,7 @@ namespace Net /// /// Valid initialization code would be: /// SharedPtr pConsoleHandler = new KeyConsoleHandler; - /// SharedPtr pInvalidCertHandler = new ConsoleCertificateHandler; + /// SharedPtr pInvalidCertHandler = new RejectCertificateHandler; /// Context::Ptr pContext = new Context(Context::CLIENT_USE, "", "", "rootcert.pem", Context::VERIFY_RELAXED, 9, false, "ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH"); /// SSLManager::instance().initializeClient(pConsoleHandler, pInvalidCertHandler, pContext); diff --git a/base/poco/NetSSL_OpenSSL/src/CertificateHandlerFactoryMgr.cpp b/base/poco/NetSSL_OpenSSL/src/CertificateHandlerFactoryMgr.cpp index a89bbea11f2..f570e2d3599 100644 --- a/base/poco/NetSSL_OpenSSL/src/CertificateHandlerFactoryMgr.cpp +++ b/base/poco/NetSSL_OpenSSL/src/CertificateHandlerFactoryMgr.cpp @@ -13,7 +13,6 @@ #include "Poco/Net/CertificateHandlerFactoryMgr.h" -#include "Poco/Net/ConsoleCertificateHandler.h" #include "Poco/Net/AcceptCertificateHandler.h" #include "Poco/Net/RejectCertificateHandler.h" @@ -24,7 +23,6 @@ namespace Net { CertificateHandlerFactoryMgr::CertificateHandlerFactoryMgr() { - setFactory("ConsoleCertificateHandler", new CertificateHandlerFactoryImpl()); setFactory("AcceptCertificateHandler", new CertificateHandlerFactoryImpl()); setFactory("RejectCertificateHandler", new CertificateHandlerFactoryImpl()); } diff --git a/base/poco/NetSSL_OpenSSL/src/ConsoleCertificateHandler.cpp b/base/poco/NetSSL_OpenSSL/src/ConsoleCertificateHandler.cpp deleted file mode 100644 index db64752e70c..00000000000 --- a/base/poco/NetSSL_OpenSSL/src/ConsoleCertificateHandler.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// -// ConsoleCertificateHandler.cpp -// -// Library: NetSSL_OpenSSL -// Package: SSLCore -// Module: ConsoleCertificateHandler -// -// Copyright (c) 2006-2009, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Net/ConsoleCertificateHandler.h" -#include - - -namespace Poco { -namespace Net { - - -ConsoleCertificateHandler::ConsoleCertificateHandler(bool server): InvalidCertificateHandler(server) -{ -} - - -ConsoleCertificateHandler::~ConsoleCertificateHandler() -{ -} - - -void ConsoleCertificateHandler::onInvalidCertificate(const void*, VerificationErrorArgs& errorCert) -{ - const X509Certificate& aCert = errorCert.certificate(); - std::cout << "\n"; - std::cout << "WARNING: Certificate verification failed\n"; - std::cout << "----------------------------------------\n"; - std::cout << "Issuer Name: " << aCert.issuerName() << "\n"; - std::cout << "Subject Name: " << aCert.subjectName() << "\n\n"; - std::cout << "The certificate yielded the error: " << errorCert.errorMessage() << "\n\n"; - std::cout << "The error occurred in the certificate chain at position " << errorCert.errorDepth() << "\n"; - std::cout << "Accept the certificate (y,n)? "; - char c = 0; - std::cin >> c; - if (c == 'y' || c == 'Y') - errorCert.setIgnoreError(true); - else - errorCert.setIgnoreError(false); -} - - -} } // namespace Poco::Net diff --git a/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp b/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp index 82eed1a29eb..927602ca658 100644 --- a/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp +++ b/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp @@ -46,7 +46,7 @@ const std::string SSLManager::CFG_PREFER_SERVER_CIPHERS("preferServerCiphers"); const std::string SSLManager::CFG_DELEGATE_HANDLER("privateKeyPassphraseHandler.name"); const std::string SSLManager::VAL_DELEGATE_HANDLER("KeyConsoleHandler"); const std::string SSLManager::CFG_CERTIFICATE_HANDLER("invalidCertificateHandler.name"); -const std::string SSLManager::VAL_CERTIFICATE_HANDLER("ConsoleCertificateHandler"); +const std::string SSLManager::VAL_CERTIFICATE_HANDLER("RejectCertificateHandler"); const std::string SSLManager::CFG_SERVER_PREFIX("openSSL.server."); const std::string SSLManager::CFG_CLIENT_PREFIX("openSSL.client."); const std::string SSLManager::CFG_CACHE_SESSIONS("cacheSessions"); diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 5643e273d39..40c1b8d64a1 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1602,7 +1602,7 @@ Keys for server/client settings: - requireTLSv1_2 (default: false) – Require a TLSv1.2 connection. Acceptable values: `true`, `false`. - fips (default: false) – Activates OpenSSL FIPS mode. Supported if the library’s OpenSSL version supports FIPS. - privateKeyPassphraseHandler (default: `KeyConsoleHandler`)– Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: ``, `KeyFileHandler`, `test`, ``. -- invalidCertificateHandler (default: `ConsoleCertificateHandler`) – Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: ` ConsoleCertificateHandler ` . +- invalidCertificateHandler (default: `RejectCertificateHandler`) – Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: ` RejectCertificateHandler ` . - disableProtocols (default: "") – Protocols that are not allowed to use. - preferServerCiphers (default: false) – Preferred server ciphers on the client. diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 33db6df0fdd..5430469ea18 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -1067,7 +1067,7 @@ ClickHouse использует потоки из глобального пул - requireTLSv1_2 - Требование соединения TLSv1.2. Допустимые значения: `true`, `false`. - fips - Активация режима OpenSSL FIPS. Поддерживается, если версия OpenSSL, с которой собрана библиотека поддерживает fips. - privateKeyPassphraseHandler - Класс (подкласс PrivateKeyPassphraseHandler)запрашивающий кодовую фразу доступа к секретному ключу. Например, ``, `KeyFileHandler`, `test`, ``. -- invalidCertificateHandler - Класс (подкласс CertificateHandler) для подтверждения не валидных сертификатов. Например, ` ConsoleCertificateHandler `. +- invalidCertificateHandler - Класс (подкласс CertificateHandler) для подтверждения не валидных сертификатов. Например, ` RejectCertificateHandler `. - disableProtocols - Запрещенные к использованию протоколы. - preferServerCiphers - Предпочтение серверных шифров на клиенте. diff --git a/docs/zh/operations/server-configuration-parameters/settings.md b/docs/zh/operations/server-configuration-parameters/settings.md index 52142eda2e8..f6106d8734e 100644 --- a/docs/zh/operations/server-configuration-parameters/settings.md +++ b/docs/zh/operations/server-configuration-parameters/settings.md @@ -466,7 +466,7 @@ SSL客户端/服务器配置。 - requireTLSv1_2 – Require a TLSv1.2 connection. Acceptable values: `true`, `false`. - fips – Activates OpenSSL FIPS mode. Supported if the library’s OpenSSL version supports FIPS. - privateKeyPassphraseHandler – Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: ``, `KeyFileHandler`, `test`, ``. -- invalidCertificateHandler – Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: ` ConsoleCertificateHandler ` . +- invalidCertificateHandler – Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: ` RejectCertificateHandler ` . - disableProtocols – Protocols that are not allowed to use. - preferServerCiphers – Preferred server ciphers on the client. From 88048153d7af70dcfc27dc1c9bd480d9cad99d53 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Jun 2023 04:37:14 +0200 Subject: [PATCH 0797/1997] Fix 00899_long_attach_memory_limit --- tests/queries/0_stateless/00899_long_attach_memory_limit.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00899_long_attach_memory_limit.sql b/tests/queries/0_stateless/00899_long_attach_memory_limit.sql index aa507cda2ac..d4aa2a0eb7b 100644 --- a/tests/queries/0_stateless/00899_long_attach_memory_limit.sql +++ b/tests/queries/0_stateless/00899_long_attach_memory_limit.sql @@ -1,4 +1,5 @@ --- Tags: long, no-debug, no-parallel, no-fasttest +-- Tags: long, no-debug, no-parallel, no-fasttest, no-msan, no-tsan +-- This test is slow under MSan or TSan. DROP TABLE IF EXISTS index_memory; CREATE TABLE index_memory (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS index_granularity = 1; From cf2e110c134c6934fd22e0c9ec5a598ac5b107cc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Jun 2023 05:20:35 +0200 Subject: [PATCH 0798/1997] Fix test 01293_optimize_final_force --- tests/queries/0_stateless/01293_optimize_final_force.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01293_optimize_final_force.sh b/tests/queries/0_stateless/01293_optimize_final_force.sh index 994d5952dbc..eb3a2756899 100755 --- a/tests/queries/0_stateless/01293_optimize_final_force.sh +++ b/tests/queries/0_stateless/01293_optimize_final_force.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-debug, no-s3-storage +# This test is too slow with S3 storage and debug modes. CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 4cef0342979ced426f4823016ebc65f42067aab7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Jun 2023 05:27:45 +0200 Subject: [PATCH 0799/1997] Fix 02481_parquet_list_monotonically_increasing_offsets.sh --- .../02481_parquet_list_monotonically_increasing_offsets.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh index 47245eeb940..55e6ac2f758 100755 --- a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh +++ b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: no-ubsan, no-fasttest +# Tags: no-ubsan, no-fasttest, no-tsan +# It is too slow under TSan CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -13,4 +14,4 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (list Array(Int64), json cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" | md5sum ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM parquet_load" -${CLICKHOUSE_CLIENT} --query="drop table parquet_load" \ No newline at end of file +${CLICKHOUSE_CLIENT} --query="drop table parquet_load" From e3d999b023cad921dd740354125a8cbac9c30da5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Jun 2023 05:43:27 +0200 Subject: [PATCH 0800/1997] Fix test 02497_trace_events_stress_long --- .../02497_trace_events_stress_long.sh | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02497_trace_events_stress_long.sh b/tests/queries/0_stateless/02497_trace_events_stress_long.sh index 7dc72807c5a..f1dbb9c0399 100755 --- a/tests/queries/0_stateless/02497_trace_events_stress_long.sh +++ b/tests/queries/0_stateless/02497_trace_events_stress_long.sh @@ -44,4 +44,18 @@ timeout $TIMEOUT bash -c thread2 >/dev/null & wait $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE query_id LIKE '02497_$CLICKHOUSE_DATABASE%' SYNC" >/dev/null -$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id LIKE '02497_$CLICKHOUSE_DATABASE%'" + +# After this moment, the server can still run another query. +# For example, the 'timeout' command killed all threads of thread1, +# and the 'timeout' itself has finished, and we have successfully 'wait'-ed for it, +# but just before that, one of the threads successfully sent a query to the server, +# but the server didn't start to run this query yet, +# and even when the KILL QUERY was run, the query from the thread didn't start, +# but only started after the KILL QUERY has been already processed. + +# That's why we have to run the next command in a loop. + +for _ in {1..10} +do + $CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id LIKE '02497_$CLICKHOUSE_DATABASE%'" | rg '^0$' && break +done From 21c9feeeb3af922fb621688d8bfc7459d9a50f1a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Jun 2023 05:46:10 +0200 Subject: [PATCH 0801/1997] Fix test --- .../02497_trace_events_stress_long.sh | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/queries/0_stateless/02497_trace_events_stress_long.sh b/tests/queries/0_stateless/02497_trace_events_stress_long.sh index f1dbb9c0399..3ec729079b8 100755 --- a/tests/queries/0_stateless/02497_trace_events_stress_long.sh +++ b/tests/queries/0_stateless/02497_trace_events_stress_long.sh @@ -43,19 +43,21 @@ timeout $TIMEOUT bash -c thread2 >/dev/null & wait -$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE query_id LIKE '02497_$CLICKHOUSE_DATABASE%' SYNC" >/dev/null - -# After this moment, the server can still run another query. -# For example, the 'timeout' command killed all threads of thread1, -# and the 'timeout' itself has finished, and we have successfully 'wait'-ed for it, -# but just before that, one of the threads successfully sent a query to the server, -# but the server didn't start to run this query yet, -# and even when the KILL QUERY was run, the query from the thread didn't start, -# but only started after the KILL QUERY has been already processed. - -# That's why we have to run the next command in a loop. - for _ in {1..10} do + $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE query_id LIKE '02497_$CLICKHOUSE_DATABASE%' SYNC" >/dev/null + + # After this moment, the server can still run another query. + # For example, the 'timeout' command killed all threads of thread1, + # and the 'timeout' itself has finished, and we have successfully 'wait'-ed for it, + # but just before that, one of the threads successfully sent a query to the server, + # but the server didn't start to run this query yet, + # and even when the KILL QUERY was run, the query from the thread didn't start, + # but only started after the KILL QUERY has been already processed. + + # That's why we have to run this in a loop. + $CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id LIKE '02497_$CLICKHOUSE_DATABASE%'" | rg '^0$' && break + + sleep 1 done From 8e6f6655853cfd1106f96c75fddb4cd57f39f8a0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Jun 2023 05:57:38 +0200 Subject: [PATCH 0802/1997] Fix build --- src/Functions/IFunction.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 1e4f8bf1102..c5b9a78015d 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -11,6 +11,11 @@ #include +#if USE_EMBEDDED_COMPILER +# include +#endif + + /// This file contains user interface for functions. namespace llvm From c85ade9c27ae56584c924b4b18541bc8615d816e Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 26 Jun 2023 07:44:19 +0000 Subject: [PATCH 0803/1997] Add const multiplier --- .../functions/date-time-functions.md | 1 - src/Core/DecimalFunctions.h | 2 +- src/Functions/DateTimeTransforms.h | 16 ++++++++-------- src/Functions/dateDiff.cpp | 9 ++++----- 4 files changed, 13 insertions(+), 15 deletions(-) diff --git a/docs/zh/sql-reference/functions/date-time-functions.md b/docs/zh/sql-reference/functions/date-time-functions.md index 270fa44a421..e4b70322477 100644 --- a/docs/zh/sql-reference/functions/date-time-functions.md +++ b/docs/zh/sql-reference/functions/date-time-functions.md @@ -625,7 +625,6 @@ SELECT date_add(YEAR, 3, toDate('2018-01-01')); │ 2021-01-01 │ └───────────────────────────────────────────────┘ ``` -## age {#age} ## date_diff {#date_diff} diff --git a/src/Core/DecimalFunctions.h b/src/Core/DecimalFunctions.h index defc21a5f43..17d95650730 100644 --- a/src/Core/DecimalFunctions.h +++ b/src/Core/DecimalFunctions.h @@ -33,7 +33,7 @@ template <> inline constexpr size_t max_precision = 38; template <> inline constexpr size_t max_precision = 76; template -constexpr inline auto scaleMultiplier(UInt32 scale) +inline auto scaleMultiplier(UInt32 scale) { if constexpr (std::is_same_v || std::is_same_v) return common::exp10_i32(scale); diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index afff8d6523d..c967d74da0c 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -19,8 +19,8 @@ namespace DB { -static constexpr auto microsecond_scale = 6; -static constexpr auto millisecond_scale = 3; +static constexpr auto microsecond_multiplier = 1000000; +static constexpr auto millisecond_multiplier = 1000; namespace ErrorCodes { @@ -1387,6 +1387,7 @@ struct ToRelativeSubsecondNumImpl static inline Int64 execute(const DateTime64 & t, DateTime64::NativeType scale, const DateLUTImpl &) { + static_assert(scale_multiplier == 1000 || scale_multiplier == 1000000); if (scale == scale_multiplier) return t.value; if (scale > scale_multiplier) @@ -1521,7 +1522,6 @@ struct ToDateTimeComponentsImpl static inline DateTimeComponentsWithFractionalPart execute(const DateTime64 & t, DateTime64::NativeType scale_multiplier, const DateLUTImpl & time_zone) { auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier); - constexpr auto multiplier = DecimalUtils::scaleMultiplier(microsecond_scale); if (t.value < 0 && components.fractional) { @@ -1529,12 +1529,12 @@ struct ToDateTimeComponentsImpl --components.whole; } Int64 fractional = components.fractional; - if (scale_multiplier > multiplier) - fractional = fractional / (scale_multiplier / multiplier); - else if (scale_multiplier < multiplier) - fractional = fractional * (multiplier / scale_multiplier); + if (scale_multiplier > microsecond_multiplier) + fractional = fractional / (scale_multiplier / microsecond_multiplier); + else if (scale_multiplier < microsecond_multiplier) + fractional = fractional * (microsecond_multiplier / scale_multiplier); - constexpr auto divider = DecimalUtils::scaleMultiplier(microsecond_scale - millisecond_scale); + constexpr Int64 divider = microsecond_multiplier/ millisecond_multiplier; UInt16 millisecond = static_cast(fractional / divider); UInt16 microsecond = static_cast(fractional % divider); return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(components.whole), millisecond, microsecond}; diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index 79be3059b2a..253ed703bb9 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -177,9 +177,8 @@ public: DateTimeComponentsWithFractionalPart a_comp; DateTimeComponentsWithFractionalPart b_comp; Int64 adjust_value; - constexpr auto multiplier = DecimalUtils::scaleMultiplier(microsecond_scale); - auto x_microseconds = TransformDateTime64>(transform_x.getScaleMultiplier()).execute(x, timezone_x); - auto y_microseconds = TransformDateTime64>(transform_y.getScaleMultiplier()).execute(y, timezone_y); + auto x_microseconds = TransformDateTime64>(transform_x.getScaleMultiplier()).execute(x, timezone_x); + auto y_microseconds = TransformDateTime64>(transform_y.getScaleMultiplier()).execute(y, timezone_y); if (x_microseconds <= y_microseconds) { @@ -399,9 +398,9 @@ public: else if (unit == "second" || unit == "ss" || unit == "s") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "millisecond" || unit == "ms") - impl.template dispatchForColumns(millisecond_scale)>>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "microsecond" || unit == "us" || unit == "u") - impl.template dispatchForColumns(microsecond_scale)>>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} does not support '{}' unit", getName(), unit); From 1d2600f7068268802090b24d88d2c959325e7361 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 26 Jun 2023 16:47:13 +0800 Subject: [PATCH 0804/1997] add example for test --- src/IO/examples/read_buffer_from_hdfs.cpp | 25 +++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 src/IO/examples/read_buffer_from_hdfs.cpp diff --git a/src/IO/examples/read_buffer_from_hdfs.cpp b/src/IO/examples/read_buffer_from_hdfs.cpp new file mode 100644 index 00000000000..07c56961db0 --- /dev/null +++ b/src/IO/examples/read_buffer_from_hdfs.cpp @@ -0,0 +1,25 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace DB; + +int main() +{ + setenv("LIBHDFS3_CONF", "/data1/clickhouse_official/conf/hdfs-site.bigocluster.xml", true); /// NOLINT + String hdfs_uri = "hdfs://bigocluster"; + String hdfs_file_path = "/data/hive/report_tb.db/bigolive_wj_pos_sdk_video_stats_event_allv1/day=2023-03-14/" + "part-00014-272de29e-098c-4007-987a-f6b7ae740402-c000"; + ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration()); + ReadSettings read_settings; + ReadBufferFromHDFS read_buffer(hdfs_uri, hdfs_file_path, *config, read_settings, 625150306UL, false); + + String download_path = "./download"; + WriteBufferFromFile write_buffer(download_path); + copyData(read_buffer, write_buffer); +} From d208b0de3d0168b298cdc0410dc0bdaa8c33532c Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 26 Jun 2023 16:50:18 +0800 Subject: [PATCH 0805/1997] add example for test --- src/IO/examples/CMakeLists.txt | 6 ++++++ src/IO/examples/read_buffer_from_hdfs.cpp | 10 +++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/IO/examples/CMakeLists.txt b/src/IO/examples/CMakeLists.txt index b42aa1a4f96..12b85c483a1 100644 --- a/src/IO/examples/CMakeLists.txt +++ b/src/IO/examples/CMakeLists.txt @@ -73,3 +73,9 @@ target_link_libraries (snappy_read_buffer PRIVATE clickhouse_common_io) clickhouse_add_executable (hadoop_snappy_read_buffer hadoop_snappy_read_buffer.cpp) target_link_libraries (hadoop_snappy_read_buffer PRIVATE clickhouse_common_io) +if (TARGET ch_contrib::hdfs) + clickhouse_add_executable (read_buffer_from_hdfs read_buffer_from_hdfs.cpp) + target_link_libraries (read_buffer_from_hdfs PRIVATE dbms ch_contrib::hdfs) +endif () + + diff --git a/src/IO/examples/read_buffer_from_hdfs.cpp b/src/IO/examples/read_buffer_from_hdfs.cpp index 07c56961db0..da4e5298681 100644 --- a/src/IO/examples/read_buffer_from_hdfs.cpp +++ b/src/IO/examples/read_buffer_from_hdfs.cpp @@ -11,15 +11,15 @@ using namespace DB; int main() { - setenv("LIBHDFS3_CONF", "/data1/clickhouse_official/conf/hdfs-site.bigocluster.xml", true); /// NOLINT - String hdfs_uri = "hdfs://bigocluster"; - String hdfs_file_path = "/data/hive/report_tb.db/bigolive_wj_pos_sdk_video_stats_event_allv1/day=2023-03-14/" - "part-00014-272de29e-098c-4007-987a-f6b7ae740402-c000"; + setenv("LIBHDFS3_CONF", "/path/to/hdfs-site.xml", true); /// NOLINT + String hdfs_uri = "hdfs://cluster_name"; + String hdfs_file_path = "/path/to/hdfs/file"; ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration()); ReadSettings read_settings; - ReadBufferFromHDFS read_buffer(hdfs_uri, hdfs_file_path, *config, read_settings, 625150306UL, false); + ReadBufferFromHDFS read_buffer(hdfs_uri, hdfs_file_path, *config, read_settings, 2097152UL, false); String download_path = "./download"; WriteBufferFromFile write_buffer(download_path); copyData(read_buffer, write_buffer); + return 0; } From ae08fb20198a953a31ea5930e5890eded39b0642 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 26 Jun 2023 12:02:09 +0200 Subject: [PATCH 0806/1997] no finalize in d-tor WriteBufferFromOStream --- src/IO/WriteBufferFromOStream.cpp | 14 +------------- src/IO/WriteBufferFromOStream.h | 2 -- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/src/IO/WriteBufferFromOStream.cpp b/src/IO/WriteBufferFromOStream.cpp index e0ec0b770e2..ffc3e62e9a6 100644 --- a/src/IO/WriteBufferFromOStream.cpp +++ b/src/IO/WriteBufferFromOStream.cpp @@ -19,14 +19,7 @@ void WriteBufferFromOStream::nextImpl() ostr->flush(); if (!ostr->good()) - { - /// FIXME do not call finalize in dtors (and remove iostreams) - bool avoid_throwing_exceptions = std::uncaught_exceptions(); - if (avoid_throwing_exceptions) - LOG_ERROR(&Poco::Logger::get("WriteBufferFromOStream"), "Cannot write to ostream at offset {}. Stack trace: {}", count(), StackTrace().toString()); - else - throw Exception(ErrorCodes::CANNOT_WRITE_TO_OSTREAM, "Cannot write to ostream at offset {}", count()); - } + throw Exception(ErrorCodes::CANNOT_WRITE_TO_OSTREAM, "Cannot write to ostream at offset {}", count()); } WriteBufferFromOStream::WriteBufferFromOStream( @@ -46,9 +39,4 @@ WriteBufferFromOStream::WriteBufferFromOStream( { } -WriteBufferFromOStream::~WriteBufferFromOStream() -{ - finalize(); -} - } diff --git a/src/IO/WriteBufferFromOStream.h b/src/IO/WriteBufferFromOStream.h index f8b45c2fa59..5a933739cb1 100644 --- a/src/IO/WriteBufferFromOStream.h +++ b/src/IO/WriteBufferFromOStream.h @@ -18,8 +18,6 @@ public: char * existing_memory = nullptr, size_t alignment = 0); - ~WriteBufferFromOStream() override; - protected: explicit WriteBufferFromOStream(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0); From 1f60a6ed4e1040623f7482a64a2ae493996be3e7 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 26 Jun 2023 12:34:11 +0200 Subject: [PATCH 0807/1997] Fix --- src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp | 5 +---- src/Interpreters/Cache/FileSegment.cpp | 2 +- src/Interpreters/Cache/FileSegment.h | 2 -- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 76d54f9d27c..960d2a72410 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -510,9 +510,6 @@ bool CachedOnDiskReadBufferFromFile::completeFileSegmentAndGetNext() current_file_segment->use(); implementation_buffer = getImplementationBuffer(*current_file_segment); - if (read_type == ReadType::CACHED) - current_file_segment->incrementHitsCount(); - LOG_TEST( log, "New segment range: {}, old range: {}", current_file_segment->range().toString(), completed_range.toString()); @@ -857,7 +854,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() implementation_buffer = getImplementationBuffer(file_segments->front()); if (read_type == ReadType::CACHED) - file_segments->front().incrementHitsCount(); + file_segments->front().use(); } chassert(!internal_buffer.empty()); diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 95592fc7c12..a77f0726d74 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -898,7 +898,7 @@ void FileSegment::use() if (it) { auto cache_lock = cache->lockCache(); - it->use(cache_lock); + hits_count = it->use(cache_lock); } } diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index 681c0d719e4..2e6bbe5657e 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -180,8 +180,6 @@ public: size_t getRefCount() const { return ref_count; } - void incrementHitsCount() { ++hits_count; } - size_t getCurrentWriteOffset(bool sync) const; size_t getFirstNonDownloadedOffset(bool sync) const; From e92035072b7b3367da12089b28041893eb90e636 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 26 Jun 2023 13:44:09 +0300 Subject: [PATCH 0808/1997] Update MergeTreeTransaction.cpp --- src/Interpreters/MergeTreeTransaction.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Interpreters/MergeTreeTransaction.cpp b/src/Interpreters/MergeTreeTransaction.cpp index 1358e3ed3c2..6b8e09a64f5 100644 --- a/src/Interpreters/MergeTreeTransaction.cpp +++ b/src/Interpreters/MergeTreeTransaction.cpp @@ -326,6 +326,8 @@ void MergeTreeTransaction::afterFinalize() is_read_only = storages.empty(); /// Release shared pointers just in case + creating_parts.clear(); + removing_parts.clear(); storages.clear(); mutations.clear(); finalized = true; From eb649873b379a29fec5584205558f70641f16bba Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 26 Jun 2023 13:48:21 +0300 Subject: [PATCH 0809/1997] Unify merge predicate (#51344) * unify merge predicate * Update BaseDaemon.cpp --- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 234 +++++++++--------- .../MergeTree/ReplicatedMergeTreeQueue.h | 95 +++---- 2 files changed, 175 insertions(+), 154 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 03ded2ef260..3ba3048b812 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1530,7 +1530,7 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( Int64 ReplicatedMergeTreeQueue::getCurrentMutationVersion( - const String & partition_id, Int64 data_version, std::lock_guard & /* state_lock */) const + const String & partition_id, Int64 data_version) const { auto in_partition = mutations_by_partition.find(partition_id); if (in_partition == mutations_by_partition.end()) @@ -2108,24 +2108,19 @@ ReplicatedMergeTreeQueue::QueueLocks ReplicatedMergeTreeQueue::lockQueue() } LocalMergePredicate::LocalMergePredicate(ReplicatedMergeTreeQueue & queue_) - : queue(queue_) { + /// Use only information that can be quickly accessed locally without querying ZooKeeper + virtual_parts_ = &queue_.virtual_parts; + mutations_state_ = &queue_; + virtual_parts_mutex = &queue_.state_mutex; } -ReplicatedMergeTreeMergePredicate::ReplicatedMergeTreeMergePredicate( - ReplicatedMergeTreeQueue & queue_, zkutil::ZooKeeperPtr & zookeeper, std::optional && partition_ids_hint_) - : nested_pred(queue_) - , queue(queue_) - , partition_ids_hint(std::move(partition_ids_hint_)) - , prev_virtual_parts(queue.format_version) -{ - { - std::lock_guard lock(queue.state_mutex); - prev_virtual_parts = queue.virtual_parts; - } - /// Load current quorum status. - auto quorum_status_future = zookeeper->asyncTryGet(fs::path(queue.zookeeper_path) / "quorum" / "status"); +template +CommittingBlocks BaseMergePredicate::getCommittingBlocks( + zkutil::ZooKeeperPtr & zookeeper, const std::string & zookeeper_path, Poco::Logger * log_) +{ + CommittingBlocks committing_blocks; /// Load current inserts /// Hint avoids listing partitions that we don't really need. @@ -2133,14 +2128,14 @@ ReplicatedMergeTreeMergePredicate::ReplicatedMergeTreeMergePredicate( /// so without hint it can do a few thousands requests (if not using MultiRead). Strings partitions; if (!partition_ids_hint) - partitions = zookeeper->getChildren(fs::path(queue.zookeeper_path) / "block_numbers"); + partitions = zookeeper->getChildren(fs::path(zookeeper_path) / "block_numbers"); else std::copy(partition_ids_hint->begin(), partition_ids_hint->end(), std::back_inserter(partitions)); std::vector paths; paths.reserve(partitions.size()); for (const String & partition : partitions) - paths.push_back(fs::path(queue.zookeeper_path) / "block_numbers" / partition); + paths.push_back(fs::path(zookeeper_path) / "block_numbers" / partition); auto locks_children = zookeeper->tryGetChildren(paths); @@ -2153,22 +2148,40 @@ ReplicatedMergeTreeMergePredicate::ReplicatedMergeTreeMergePredicate( if (response.error != Coordination::Error::ZOK) { /// Probably a wrong hint was provided (it's ok if a user passed non-existing partition to OPTIMIZE) - LOG_WARNING(queue.log, "Partition id '{}' was provided as a hint, but there's not such partition in ZooKeeper", partitions[i]); + LOG_WARNING(log_, "Partition id '{}' was provided as a hint, but there's not such partition in ZooKeeper", partitions[i]); partition_ids_hint->erase(partitions[i]); continue; } - Strings partition_block_numbers = response.names; + Strings partition_block_numbers = locks_children[i].names; for (const String & entry : partition_block_numbers) { if (!startsWith(entry, "block-")) continue; + Int64 block_number = parse(entry.substr(strlen("block-"))); - String zk_path = fs::path(queue.zookeeper_path) / "block_numbers" / partitions[i] / entry; committing_blocks[partitions[i]].insert(block_number); } } + return committing_blocks; +} + +ReplicatedMergeTreeMergePredicate::ReplicatedMergeTreeMergePredicate( + ReplicatedMergeTreeQueue & queue_, zkutil::ZooKeeperPtr & zookeeper, std::optional && partition_ids_hint_) + : BaseMergePredicate(std::move(partition_ids_hint_)) + , queue(queue_) +{ + { + std::lock_guard lock(queue.state_mutex); + prev_virtual_parts = std::make_shared(queue.virtual_parts); + } + + /// Load current quorum status. + auto quorum_status_future = zookeeper->asyncTryGet(fs::path(queue.zookeeper_path) / "quorum" / "status"); + + committing_blocks = std::make_shared(getCommittingBlocks(zookeeper, queue.zookeeper_path, queue.log)); + merges_version = queue_.pullLogsToQueue(zookeeper, {}, ReplicatedMergeTreeQueue::MERGE_PREDICATE); { @@ -2179,7 +2192,8 @@ ReplicatedMergeTreeMergePredicate::ReplicatedMergeTreeMergePredicate( /// /// If pinned parts are fetched after logs are pulled then we can safely say that it contains all locks up to `merges_version`. String s = zookeeper->get(queue.zookeeper_path + "/pinned_part_uuids"); - pinned_part_uuids.fromString(s); + pinned_part_uuids = std::make_shared(); + pinned_part_uuids->fromString(s); } Coordination::GetResponse quorum_status_response = quorum_status_future.get(); @@ -2187,13 +2201,21 @@ ReplicatedMergeTreeMergePredicate::ReplicatedMergeTreeMergePredicate( { ReplicatedMergeTreeQuorumEntry quorum_status; quorum_status.fromString(quorum_status_response.data); - inprogress_quorum_part = quorum_status.part_name; + inprogress_quorum_part = std::make_shared(quorum_status.part_name); } - else - inprogress_quorum_part.clear(); + + /// Use all information about parts + prev_virtual_parts_ = prev_virtual_parts.get(); + virtual_parts_ = &queue.virtual_parts; + committing_blocks_ = committing_blocks.get(); + pinned_part_uuids_ = pinned_part_uuids.get(); + inprogress_quorum_part_ = inprogress_quorum_part.get(); + mutations_state_ = &queue; + virtual_parts_mutex = &queue.state_mutex; } -bool LocalMergePredicate::operator()( +template +bool BaseMergePredicate::operator()( const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, const MergeTreeTransaction *, @@ -2205,20 +2227,8 @@ bool LocalMergePredicate::operator()( return canMergeSinglePart(right, out_reason); } -bool ReplicatedMergeTreeMergePredicate::operator()( - const MergeTreeData::DataPartPtr & left, - const MergeTreeData::DataPartPtr & right, - const MergeTreeTransaction *, - String * out_reason) const -{ - if (left) - return canMergeTwoParts(left, right, out_reason); - else - return canMergeSinglePart(right, out_reason); -} - - -bool ReplicatedMergeTreeMergePredicate::canMergeTwoParts( +template +bool BaseMergePredicate::canMergeTwoParts( const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, String * out_reason) const @@ -2263,21 +2273,21 @@ bool ReplicatedMergeTreeMergePredicate::canMergeTwoParts( for (const MergeTreeData::DataPartPtr & part : {left, right}) { - if (pinned_part_uuids.part_uuids.contains(part->uuid)) + if (pinned_part_uuids_ && pinned_part_uuids_->part_uuids.contains(part->uuid)) { if (out_reason) *out_reason = "Part " + part->name + " has uuid " + toString(part->uuid) + " which is currently pinned"; return false; } - if (part->name == inprogress_quorum_part) + if (inprogress_quorum_part_ && part->name == *inprogress_quorum_part_) { if (out_reason) *out_reason = "Quorum insert for part " + part->name + " is currently in progress"; return false; } - if (prev_virtual_parts.getContainingPart(part->info).empty()) + if (prev_virtual_parts_ && prev_virtual_parts_->getContainingPart(part->info).empty()) { if (out_reason) *out_reason = "Entry for part " + part->name + " hasn't been read from the replication log yet"; @@ -2290,7 +2300,7 @@ bool ReplicatedMergeTreeMergePredicate::canMergeTwoParts( if (left_max_block > right_min_block) std::swap(left_max_block, right_min_block); - if (left_max_block + 1 < right_min_block) + if (committing_blocks_ && left_max_block + 1 < right_min_block) { if (partition_ids_hint && !partition_ids_hint->contains(left->info.partition_id)) { @@ -2299,8 +2309,8 @@ bool ReplicatedMergeTreeMergePredicate::canMergeTwoParts( return false; } - auto committing_blocks_in_partition = committing_blocks.find(left->info.partition_id); - if (committing_blocks_in_partition != committing_blocks.end()) + auto committing_blocks_in_partition = committing_blocks_->find(left->info.partition_id); + if (committing_blocks_in_partition != committing_blocks_->end()) { const std::set & block_numbers = committing_blocks_in_partition->second; @@ -2316,109 +2326,107 @@ bool ReplicatedMergeTreeMergePredicate::canMergeTwoParts( } } - return nested_pred.canMergeTwoParts(left, right, out_reason); -} + std::unique_lock lock; + if (virtual_parts_mutex) + lock = std::unique_lock(*virtual_parts_mutex); -bool LocalMergePredicate::canMergeTwoParts( - const MergeTreeData::DataPartPtr & left, - const MergeTreeData::DataPartPtr & right, - String * out_reason) const -{ - Int64 left_max_block = left->info.max_block; - Int64 right_min_block = right->info.min_block; - - std::lock_guard lock(queue.state_mutex); - - for (const MergeTreeData::DataPartPtr & part : {left, right}) + if (virtual_parts_) { - /// We look for containing parts in queue.virtual_parts (and not in prev_virtual_parts) because queue.virtual_parts is newer - /// and it is guaranteed that it will contain all merges assigned before this object is constructed. - String containing_part = queue.virtual_parts.getContainingPart(part->info); - if (containing_part != part->name) + for (const MergeTreeData::DataPartPtr & part : {left, right}) { - if (out_reason) - *out_reason = "Part " + part->name + " has already been assigned a merge into " + containing_part; - return false; + /// We look for containing parts in queue.virtual_parts (and not in prev_virtual_parts) because queue.virtual_parts is newer + /// and it is guaranteed that it will contain all merges assigned before this object is constructed. + String containing_part = virtual_parts_->getContainingPart(part->info); + if (containing_part != part->name) + { + if (out_reason) + *out_reason = "Part " + part->name + " has already been assigned a merge into " + containing_part; + return false; + } + } + + if (left_max_block + 1 < right_min_block) + { + /// Fake part which will appear as merge result + MergeTreePartInfo gap_part_info( + left->info.partition_id, left_max_block + 1, right_min_block - 1, + MergeTreePartInfo::MAX_LEVEL, MergeTreePartInfo::MAX_BLOCK_NUMBER); + + /// We don't select parts if any smaller part covered by our merge must exist after + /// processing replication log up to log_pointer. + Strings covered = virtual_parts_->getPartsCoveredBy(gap_part_info); + if (!covered.empty()) + { + if (out_reason) + *out_reason = "There are " + toString(covered.size()) + " parts (from " + covered.front() + + " to " + covered.back() + ") that are still not present or being processed by " + + " other background process on this replica between " + left->name + " and " + right->name; + return false; + } } } - if (left_max_block + 1 < right_min_block) + if (mutations_state_) { - /// Fake part which will appear as merge result - MergeTreePartInfo gap_part_info( - left->info.partition_id, left_max_block + 1, right_min_block - 1, - MergeTreePartInfo::MAX_LEVEL, MergeTreePartInfo::MAX_BLOCK_NUMBER); + Int64 left_mutation_ver = mutations_state_->getCurrentMutationVersion( + left->info.partition_id, left->info.getDataVersion()); - /// We don't select parts if any smaller part covered by our merge must exist after - /// processing replication log up to log_pointer. - Strings covered = queue.virtual_parts.getPartsCoveredBy(gap_part_info); - if (!covered.empty()) + Int64 right_mutation_ver = mutations_state_->getCurrentMutationVersion( + left->info.partition_id, right->info.getDataVersion()); + + if (left_mutation_ver != right_mutation_ver) { if (out_reason) - *out_reason = "There are " + toString(covered.size()) + " parts (from " + covered.front() - + " to " + covered.back() + ") that are still not present or being processed by " - + " other background process on this replica between " + left->name + " and " + right->name; + *out_reason = "Current mutation versions of parts " + left->name + " and " + right->name + " differ: " + + toString(left_mutation_ver) + " and " + toString(right_mutation_ver) + " respectively"; return false; } } - Int64 left_mutation_ver = queue.getCurrentMutationVersion( - left->info.partition_id, left->info.getDataVersion(), lock); - - Int64 right_mutation_ver = queue.getCurrentMutationVersion( - left->info.partition_id, right->info.getDataVersion(), lock); - - if (left_mutation_ver != right_mutation_ver) - { - if (out_reason) - *out_reason = "Current mutation versions of parts " + left->name + " and " + right->name + " differ: " - + toString(left_mutation_ver) + " and " + toString(right_mutation_ver) + " respectively"; - return false; - } - return MergeTreeData::partsContainSameProjections(left, right); } -bool ReplicatedMergeTreeMergePredicate::canMergeSinglePart( +template +bool BaseMergePredicate::canMergeSinglePart( const MergeTreeData::DataPartPtr & part, String * out_reason) const { - if (pinned_part_uuids.part_uuids.contains(part->uuid)) + if (pinned_part_uuids_ && pinned_part_uuids_->part_uuids.contains(part->uuid)) { if (out_reason) *out_reason = fmt::format("Part {} has uuid {} which is currently pinned", part->name, part->uuid); return false; } - if (part->name == inprogress_quorum_part) + if (inprogress_quorum_part_ && part->name == *inprogress_quorum_part_) { if (out_reason) *out_reason = fmt::format("Quorum insert for part {} is currently in progress", part->name); return false; } - if (prev_virtual_parts.getContainingPart(part->info).empty()) + if (prev_virtual_parts_ && prev_virtual_parts_->getContainingPart(part->info).empty()) { if (out_reason) *out_reason = fmt::format("Entry for part {} hasn't been read from the replication log yet", part->name); return false; } - return nested_pred.canMergeSinglePart(part, out_reason); -} + std::unique_lock lock; + if (virtual_parts_mutex) + lock = std::unique_lock(*virtual_parts_mutex); -bool LocalMergePredicate::canMergeSinglePart(const MergeTreeData::DataPartPtr & part, String * out_reason) const -{ - std::lock_guard lock(queue.state_mutex); - - /// We look for containing parts in queue.virtual_parts (and not in prev_virtual_parts) because queue.virtual_parts is newer - /// and it is guaranteed that it will contain all merges assigned before this object is constructed. - String containing_part = queue.virtual_parts.getContainingPart(part->info); - if (containing_part != part->name) + if (virtual_parts_) { - if (out_reason) - *out_reason = fmt::format("Part {} has already been assigned a merge into {}", part->name, containing_part); - return false; + /// We look for containing parts in queue.virtual_parts (and not in prev_virtual_parts) because queue.virtual_parts is newer + /// and it is guaranteed that it will contain all merges assigned before this object is constructed. + String containing_part = virtual_parts_->getContainingPart(part->info); + if (containing_part != part->name) + { + if (out_reason) + *out_reason = fmt::format("Part {} has already been assigned a merge into {}", part->name, containing_part); + return false; + } } return true; @@ -2459,7 +2467,7 @@ std::optional> ReplicatedMergeTreeMergePredicate::getDesir /// We cannot mutate part if it's being inserted with quorum and it's not /// already reached. - if (part->name == inprogress_quorum_part) + if (inprogress_quorum_part && part->name == *inprogress_quorum_part) return {}; std::lock_guard lock(queue.state_mutex); @@ -2474,7 +2482,7 @@ std::optional> ReplicatedMergeTreeMergePredicate::getDesir UInt64 mutations_limit = queue.storage.getSettings()->replicated_max_mutations_in_one_entry; UInt64 mutations_count = 0; - Int64 current_version = queue.getCurrentMutationVersion(part->info.partition_id, part->info.getDataVersion(), lock); + Int64 current_version = queue.getCurrentMutationVersion(part->info.partition_id, part->info.getDataVersion()); Int64 max_version = in_partition->second.begin()->first; int alter_version = -1; @@ -2548,8 +2556,8 @@ bool ReplicatedMergeTreeMergePredicate::isMutationFinished(const std::string & z if (partition_ids_hint && !partition_ids_hint->contains(partition_id)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Partition id {} was not provided as hint, it's a bug", partition_id); - auto partition_it = committing_blocks.find(partition_id); - if (partition_it != committing_blocks.end()) + auto partition_it = committing_blocks->find(partition_id); + if (partition_it != committing_blocks->end()) { size_t blocks_count = std::distance( partition_it->second.begin(), partition_it->second.lower_bound(block_num)); @@ -2671,4 +2679,6 @@ void ReplicatedMergeTreeQueue::removeCurrentPartsFromMutations() removeCoveredPartsFromMutations(part_name, /*remove_part = */ false, /*remove_covered_parts = */ true); } +template class BaseMergePredicate; + } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 79572e13963..f205526a660 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -34,6 +34,7 @@ private: friend class CurrentlyExecuting; friend class LocalMergePredicate; friend class ReplicatedMergeTreeMergePredicate; + template friend class BaseMergePredicate; friend class MergeFromLogEntryTask; friend class ReplicatedMergeMutateTaskBase; @@ -212,7 +213,7 @@ private: /// with getDataVersion() == data_version. (Either this mutation was already applied or the part /// was created after the mutation). /// If there is no such mutation or it has already been executed and deleted, return 0. - Int64 getCurrentMutationVersion(const String & partition_id, Int64 data_version, std::lock_guard & /* state_lock */) const; + Int64 getCurrentMutationVersion(const String & partition_id, Int64 data_version) const; /** Check that part isn't in currently generating parts and isn't covered by them. * Should be called under state_mutex. @@ -491,33 +492,14 @@ public: void createLogEntriesToFetchBrokenParts(); }; -/// Lightweight version of ReplicatedMergeTreeMergePredicate that do not make any ZooKeeper requests, -/// but may return false-positive results. Checks only a subset of required conditions. -class LocalMergePredicate +using CommittingBlocks = std::unordered_map>; + +template +class BaseMergePredicate { public: - LocalMergePredicate(ReplicatedMergeTreeQueue & queue_); - - bool operator()(const MergeTreeData::DataPartPtr & left, - const MergeTreeData::DataPartPtr & right, - const MergeTreeTransaction * txn, - String * out_reason = nullptr) const; - - bool canMergeTwoParts(const MergeTreeData::DataPartPtr & left, - const MergeTreeData::DataPartPtr & right, - String * out_reason = nullptr) const; - - bool canMergeSinglePart(const MergeTreeData::DataPartPtr & part, String * out_reason) const; - -private: - const ReplicatedMergeTreeQueue & queue; -}; - -class ReplicatedMergeTreeMergePredicate -{ -public: - ReplicatedMergeTreeMergePredicate(ReplicatedMergeTreeQueue & queue_, zkutil::ZooKeeperPtr & zookeeper, - std::optional && partition_ids_hint_); + BaseMergePredicate() = default; + BaseMergePredicate(std::optional && partition_ids_hint_) : partition_ids_hint(std::move(partition_ids_hint_)) {} /// Depending on the existence of left part checks a merge predicate for two parts or for single part. bool operator()(const MergeTreeData::DataPartPtr & left, @@ -537,6 +519,46 @@ public: /// This predicate is checked for the first part of each range. bool canMergeSinglePart(const MergeTreeData::DataPartPtr & part, String * out_reason) const; + CommittingBlocks getCommittingBlocks(zkutil::ZooKeeperPtr & zookeeper, const std::string & zookeeper_path, Poco::Logger * log_); + +protected: + /// A list of partitions that can be used in the merge predicate + std::optional partition_ids_hint; + + /// A snapshot of active parts that would appear if the replica executes all log entries in its queue. + const VirtualPartsT * prev_virtual_parts_ = nullptr; + const VirtualPartsT * virtual_parts_ = nullptr; + + /// partition ID -> block numbers of the inserts and mutations that are about to commit + /// (loaded at some later time than prev_virtual_parts). + const CommittingBlocks * committing_blocks_ = nullptr; + + /// List of UUIDs for parts that have their identity "pinned". + const PinnedPartUUIDs * pinned_part_uuids_ = nullptr; + + /// Quorum state taken at some later time than prev_virtual_parts. + const String * inprogress_quorum_part_ = nullptr; + + /// An object that provides current mutation version for a part + const MutationsStateT * mutations_state_ = nullptr; + + std::mutex * virtual_parts_mutex = nullptr; +}; + +/// Lightweight version of ReplicatedMergeTreeMergePredicate that do not make any ZooKeeper requests, +/// but may return false-positive results. Checks only a subset of required conditions. +class LocalMergePredicate : public BaseMergePredicate +{ +public: + LocalMergePredicate(ReplicatedMergeTreeQueue & queue_); +}; + +class ReplicatedMergeTreeMergePredicate : public BaseMergePredicate +{ +public: + ReplicatedMergeTreeMergePredicate(ReplicatedMergeTreeQueue & queue_, zkutil::ZooKeeperPtr & zookeeper, + std::optional && partition_ids_hint_); + /// Returns true if part is needed for some REPLACE_RANGE entry. /// We should not drop part in this case, because replication queue may stuck without that part. bool partParticipatesInReplaceRange(const MergeTreeData::DataPartPtr & part, String * out_reason) const; @@ -561,28 +583,17 @@ public: String getCoveringVirtualPart(const String & part_name) const; private: - LocalMergePredicate nested_pred; - const ReplicatedMergeTreeQueue & queue; - std::optional partition_ids_hint; - - /// A snapshot of active parts that would appear if the replica executes all log entries in its queue. - ActiveDataPartSet prev_virtual_parts; - /// partition ID -> block numbers of the inserts and mutations that are about to commit - /// (loaded at some later time than prev_virtual_parts). - std::unordered_map> committing_blocks; - - /// List of UUIDs for parts that have their identity "pinned". - PinnedPartUUIDs pinned_part_uuids; - - /// Quorum state taken at some later time than prev_virtual_parts. - String inprogress_quorum_part; + /// We copy a merge predicate when we cast it to AllowedMergingPredicate, let's keep the pointers valid + std::shared_ptr prev_virtual_parts; + std::shared_ptr committing_blocks; + std::shared_ptr pinned_part_uuids; + std::shared_ptr inprogress_quorum_part; int32_t merges_version = -1; }; - /** Convert a number to a string in the format of the suffixes of auto-incremental nodes in ZooKeeper. * Negative numbers are also supported - for them the name of the node looks somewhat silly * and does not match any auto-incremented node in ZK. From c64f929b9cb57ca171dbe28e82b60a956004daf8 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 26 Jun 2023 12:10:58 +0200 Subject: [PATCH 0810/1997] Fix broken labeling for `manual approve` --- tests/ci/workflow_approve_rerun_lambda/app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index 3db62430d85..1a3874ad01a 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -224,8 +224,8 @@ def approve_run(workflow_description: WorkflowDescription, token: str) -> None: def label_manual_approve(pull_request, token): - url = f"{pull_request['url']}/labels" - data = {"labels": "manual approve"} + url = f"{pull_request['issue_url']}/labels" + data = {"labels": ["manual approve"]} _exec_post_with_retry(url, token, data) From 065b87aa30d6e6a3f7e64ef3fdfabdc79039a91d Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 26 Jun 2023 12:21:20 +0200 Subject: [PATCH 0811/1997] Improve logging a little bit, avoid double labeling --- tests/ci/workflow_approve_rerun_lambda/app.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index 1a3874ad01a..5e2331ece3c 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -376,11 +376,10 @@ def main(event): changed_files = get_changed_files_for_pull_request(pull_request, token) print(f"Totally have {len(changed_files)} changed files in PR:", changed_files) if check_suspicious_changed_files(changed_files): - print( - f"Pull Request {pull_request['number']} has suspicious changes, " - "label it for manuall approve" - ) - label_manual_approve(pull_request, token) + print(f"Pull Request {pull_request['number']} has suspicious changes") + if "manual approve" not in labels: + print("Label the PR as needed for manuall approve") + label_manual_approve(pull_request, token) else: print(f"Pull Request {pull_request['number']} has no suspicious changes") approve_run(workflow_description, token) From 9af56624f523b63a078815fc5e31bc16f49821c9 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 26 Jun 2023 13:30:51 +0200 Subject: [PATCH 0812/1997] Fix flaky test test_skip_empty_files --- tests/integration/test_storage_s3/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index a7293337a9e..6c251d2f84e 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1776,7 +1776,7 @@ def test_skip_empty_files(started_cluster): assert int(res) == 0 res = instance.query( - f"select * from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/skip_empty_files{{11|1|22}}.parquet') settings engine_url_skip_empty_files=1" + f"select * from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/skip_empty_files{{11|1|22}}.parquet', auto, 'number UInt64') settings engine_url_skip_empty_files=1" ) assert len(res.strip()) == 0 From 7d4e7e320d44cc067c9142596209c9139454d1af Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 26 Jun 2023 12:03:25 +0000 Subject: [PATCH 0813/1997] Impelement support for function range of Nullable argument --- .../gtest_DataType_deserializeAsText.cpp | 1 - src/Functions/array/arrayDotProduct.cpp | 1 - src/Functions/array/arrayNorm.cpp | 1 - src/Functions/array/range.cpp | 37 +++++++++++++++++-- src/Functions/concat.cpp | 1 - src/Functions/ifNotFinite.cpp | 1 - .../02790_range_nullable.reference | 3 ++ .../0_stateless/02790_range_nullable.sql | 7 ++++ 8 files changed, 43 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/02790_range_nullable.reference create mode 100644 tests/queries/0_stateless/02790_range_nullable.sql diff --git a/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp b/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp index 2c0feab6d86..b755bd109d0 100644 --- a/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp +++ b/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp index d17c223cc2f..47e865785d4 100644 --- a/src/Functions/array/arrayDotProduct.cpp +++ b/src/Functions/array/arrayDotProduct.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include diff --git a/src/Functions/array/arrayNorm.cpp b/src/Functions/array/arrayNorm.cpp index e14133f931f..027a33d094c 100644 --- a/src/Functions/array/arrayNorm.cpp +++ b/src/Functions/array/arrayNorm.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include diff --git a/src/Functions/array/range.cpp b/src/Functions/array/range.cpp index f1f0fef8fd9..b638bc3c5b7 100644 --- a/src/Functions/array/range.cpp +++ b/src/Functions/array/range.cpp @@ -5,7 +5,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -21,6 +23,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_ARGUMENTS; } @@ -43,6 +46,7 @@ private: size_t getNumberOfArguments() const override { return 0; } bool isVariadic() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } @@ -58,10 +62,12 @@ private: DataTypes arg_types; for (size_t i = 0, size = arguments.size(); i < size; ++i) { - if (i < 2 && WhichDataType(arguments[i]).isIPv4()) + DataTypePtr type_no_nullable = removeNullable(arguments[i]); + + if (i < 2 && WhichDataType(type_no_nullable).isIPv4()) arg_types.emplace_back(std::make_shared()); - else if (isInteger(arguments[i])) - arg_types.push_back(arguments[i]); + else if (isInteger(type_no_nullable)) + arg_types.push_back(type_no_nullable); else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[i]->getName(), getName()); @@ -386,10 +392,32 @@ private: "for unsigned/signed integers up to 64 bit", getName()); } + auto throwIfNullValue = [&](const ColumnWithTypeAndName & col) + { + if (!col.type->isNullable()) + { + return; + } + const auto & nullable_col = assert_cast(*col.column); + const auto & null_map = nullable_col.getNullMapData(); + + if (!memoryIsZero(null_map.data(), 0, null_map.size())) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal (null) value column {} of argument of function {}", col.column->getName(), getName()); + } + }; + ColumnPtr res; if (arguments.size() == 1) { - const auto * col = arguments[0].column.get(); + throwIfNullValue(arguments[0]); + auto * col = arguments[0].column.get(); + if (arguments[0].type->isNullable()) + { + const auto * nullable = checkAndGetColumn(*arguments[0].column); + col= nullable->getNestedColumnPtr().get(); + } + if (!((res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)))) @@ -404,6 +432,7 @@ private: for (size_t i = 0; i < arguments.size(); ++i) { + throwIfNullValue(arguments[i]); if (i == 1) columns_holder[i] = castColumn(arguments[i], elem_type)->convertToFullColumnIfConst(); else diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp index 8fefc2d5b8a..8288d872f18 100644 --- a/src/Functions/concat.cpp +++ b/src/Functions/concat.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include diff --git a/src/Functions/ifNotFinite.cpp b/src/Functions/ifNotFinite.cpp index 5ce5d0ede70..d7af10eec44 100644 --- a/src/Functions/ifNotFinite.cpp +++ b/src/Functions/ifNotFinite.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include diff --git a/tests/queries/0_stateless/02790_range_nullable.reference b/tests/queries/0_stateless/02790_range_nullable.reference new file mode 100644 index 00000000000..7a98702e98c --- /dev/null +++ b/tests/queries/0_stateless/02790_range_nullable.reference @@ -0,0 +1,3 @@ +[0] +[0,2,4,6,8] +[0,2,4,6,8] diff --git a/tests/queries/0_stateless/02790_range_nullable.sql b/tests/queries/0_stateless/02790_range_nullable.sql new file mode 100644 index 00000000000..16e16512fc5 --- /dev/null +++ b/tests/queries/0_stateless/02790_range_nullable.sql @@ -0,0 +1,7 @@ +SELECT range(toNullable(1)); +SELECT range(0::Nullable(UInt64), 10::Nullable(UInt64), 2::Nullable(UInt64)); +SELECT range(0::Nullable(Int64), 10::Nullable(Int64), 2::Nullable(Int64)); +SELECT range(null); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT range(Null::Nullable(UInt64), 10::Nullable(UInt64), 2::Nullable(UInt64)); -- { serverError BAD_ARGUMENTS } +SELECT range(0::Nullable(UInt64), Null::Nullable(UInt64), 2::Nullable(UInt64)); -- { serverError BAD_ARGUMENTS } +SELECT range(0::Nullable(UInt64), 10::Nullable(UInt64), Null::Nullable(UInt64)); -- { serverError BAD_ARGUMENTS } From 7d8d19d8003ebaaac910a6af802ee4874e1821f8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 26 Jun 2023 14:27:13 +0200 Subject: [PATCH 0814/1997] Add test --- .../tests/gtest_lru_file_cache.cpp | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp index 3cba1e48e1e..9ff9f92afe4 100644 --- a/src/Interpreters/tests/gtest_lru_file_cache.cpp +++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp @@ -22,6 +22,8 @@ #include #include +#include +#include #include namespace fs = std::filesystem; @@ -862,3 +864,78 @@ TEST_F(FileCacheTest, temporaryData) ASSERT_LE(file_cache.getUsedCacheSize(), size_used_before_temporary_data); ASSERT_LE(file_cache.getFileSegmentsNum(), segments_used_before_temporary_data); } + +TEST_F(FileCacheTest, CachedReadBuffer) +{ + DB::ThreadStatus thread_status; + + /// To work with cache need query_id and query context. + std::string query_id = "query_id"; + + Poco::XML::DOMParser dom_parser; + std::string xml(R"CONFIG( +)CONFIG"); + Poco::AutoPtr document = dom_parser.parseString(xml); + Poco::AutoPtr config = new Poco::Util::XMLConfiguration(document); + getMutableContext().context->setConfig(config); + + auto query_context = DB::Context::createCopy(getContext().context); + query_context->makeQueryContext(); + query_context->setCurrentQueryId(query_id); + chassert(&DB::CurrentThread::get() == &thread_status); + DB::CurrentThread::QueryScope query_scope_holder(query_context); + + DB::FileCacheSettings settings; + settings.base_path = cache_base_path; + settings.max_file_segment_size = 5; + settings.max_size = 30; + settings.max_elements = 10; + settings.boundary_alignment = 1; + + ReadSettings read_settings; + read_settings.enable_filesystem_cache = 1; + read_settings.local_fs_method = LocalFSReadMethod::pread; + + std::string file_path = fs::current_path() / "test"; + auto read_buffer_creator = [&]() + { + return createReadBufferFromFileBase(file_path, read_settings, std::nullopt, std::nullopt); + }; + + auto wb = std::make_unique(file_path, DBMS_DEFAULT_BUFFER_SIZE); + std::string s(30, '*'); + wb->write(s.data(), s.size()); + wb->next(); + wb->finalize(); + + auto cache = std::make_shared(settings); + cache->initialize(); + auto key = cache->createKeyForPath(file_path); + + { + auto cached_buffer = std::make_shared( + file_path, key, cache, read_buffer_creator, read_settings, "test", s.size(), false, false, std::nullopt, nullptr); + + WriteBufferFromOwnString result; + copyData(*cached_buffer, result); + ASSERT_EQ(result.str(), s); + + assertEqual(cache->dumpQueue(), { Range(0, 4), Range(5, 9), Range(10, 14), Range(15, 19), Range(20, 24), Range(25, 29) }); + } + + { + ReadSettings modified_settings{read_settings}; + modified_settings.local_fs_buffer_size = 10; + modified_settings.remote_fs_buffer_size = 10; + + auto cached_buffer = std::make_shared( + file_path, key, cache, read_buffer_creator, modified_settings, "test", s.size(), false, false, std::nullopt, nullptr); + + cached_buffer->next(); + assertEqual(cache->dumpQueue(), { Range(5, 9), Range(10, 14), Range(15, 19), Range(20, 24), Range(25, 29), Range(0, 4) }); + + cached_buffer->position() = cached_buffer->buffer().end(); + cached_buffer->next(); + assertEqual(cache->dumpQueue(), {Range(10, 14), Range(15, 19), Range(20, 24), Range(25, 29), Range(0, 4), Range(5, 9) }); + } +} From 179a7ce20263fd3bc50431ee234a99520595aebe Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Sun, 25 Jun 2023 11:07:15 +0800 Subject: [PATCH 0815/1997] debug --- src/Interpreters/GraceHashJoin.cpp | 30 +++++++++++++++++++++++------- tests/ci/stress.py | 3 +++ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 197b64865e1..4218a8ea4e1 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -385,11 +385,23 @@ GraceHashJoin::Buckets GraceHashJoin::rehashBuckets(size_t to_size) void GraceHashJoin::addBucket(Buckets & destination) { - auto & left_file = tmp_data->createStream(left_sample_block); - auto & right_file = tmp_data->createStream(prepareRightBlock(right_sample_block)); + // There could be exceptions from createStream, In ci tests + // there is a certain probability of failure in allocating memory, see memory_tracker_fault_probability. + // It may terminate this thread and leave a broken hash_join, and another thread cores when it tries to + // use the broken hash_join. So we print an exception message here to help debug. + try + { + auto & left_file = tmp_data->createStream(left_sample_block); + auto & right_file = tmp_data->createStream(prepareRightBlock(right_sample_block)); - BucketPtr new_bucket = std::make_shared(destination.size(), left_file, right_file, log); - destination.emplace_back(std::move(new_bucket)); + BucketPtr new_bucket = std::make_shared(destination.size(), left_file, right_file, log); + destination.emplace_back(std::move(new_bucket)); + } + catch (...) + { + LOG_ERROR(&Poco::Logger::get("GraceHashJoin"), "Can't create bucket. current buckets size: {}", destination.size()); + throw; + } } void GraceHashJoin::checkTypesOfKeys(const Block & block) const @@ -626,7 +638,11 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) if (current_block.rows() > 0) { std::lock_guard lock(hash_join_mutex); - + auto current_buckets = getCurrentBuckets(); + if (!isPowerOf2(current_buckets.size())) [[unlikely]] + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Broken buckets. its size({}) is not power of 2", current_buckets.size()); + } if (!hash_join) hash_join = makeInMemoryJoin(); @@ -637,11 +653,11 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) current_block = {}; + // Must use the latest buckets snapshot in case that it has been rehashed by other threads. + buckets_snapshot = rehashBuckets(current_buckets.size() * 2); auto right_blocks = hash_join->releaseJoinedBlocks(/* restructure */ false); hash_join = nullptr; - buckets_snapshot = rehashBuckets(buckets_snapshot.size() * 2); - { Blocks current_blocks; current_blocks.reserve(right_blocks.size()); diff --git a/tests/ci/stress.py b/tests/ci/stress.py index 1aa483854fc..6d17384c63f 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -38,6 +38,9 @@ def get_options(i, upgrade_check): client_options.append("join_algorithm='partial_merge'") if join_alg_num % 5 == 2: client_options.append("join_algorithm='full_sorting_merge'") + if join_alg_num % 5 == 3 and not upgrade_check: + # Some crashes are not fixed in 23.2 yet, so ignore the setting in Upgrade check + client_options.append("join_algorithm='grace_hash'") if join_alg_num % 5 == 4: client_options.append("join_algorithm='auto'") client_options.append("max_rows_in_join=1000") From aab86da4e9bc1840d740c00fa02c599a36c3d04c Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 26 Jun 2023 20:42:21 +0800 Subject: [PATCH 0816/1997] increase max_bytes_in_join --- tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 b/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 index 98cc46c9cb4..7276e77dc16 100644 --- a/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 +++ b/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 @@ -30,7 +30,7 @@ SELECT 'skipped'; {% for join_algorithm in ['full_sorting_merge', 'grace_hash'] -%} -SET max_bytes_in_join = '{% if join_algorithm == 'grace_hash' %}1M{% else %}0{% endif %}'; +SET max_bytes_in_join = '{% if join_algorithm == 'grace_hash' %}16M{% else %}0{% endif %}'; SELECT '-- {{ join_algorithm }} --'; SET join_algorithm = '{{ join_algorithm }}'; From c4d190617c72436bfeae8563c636ed8e925e1849 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 26 Jun 2023 14:46:51 +0200 Subject: [PATCH 0817/1997] fix flacky test test_profile_events_s3 --- tests/integration/test_profile_events_s3/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_profile_events_s3/test.py b/tests/integration/test_profile_events_s3/test.py index 10c9385f865..de18142d4f6 100644 --- a/tests/integration/test_profile_events_s3/test.py +++ b/tests/integration/test_profile_events_s3/test.py @@ -139,7 +139,7 @@ def test_profile_events(cluster): ) stat1 = get_query_stat(instance, query1) for metric in stat1: - assert stat1[metric] == metrics1[metric] - metrics0[metric] + assert stat1[metric] == metrics1.get(metric, 0) - metrics0.get(metric, 0) assert ( metrics1["WriteBufferFromS3Bytes"] - metrics0["WriteBufferFromS3Bytes"] == size1 ) @@ -163,7 +163,7 @@ def test_profile_events(cluster): stat2 = get_query_stat(instance, query2) for metric in stat2: - assert stat2[metric] == metrics2[metric] - metrics1[metric] + assert stat2[metric] == metrics2.get(metric, 0)- metrics1.get(metric, 0) assert ( metrics2["WriteBufferFromS3Bytes"] - metrics1["WriteBufferFromS3Bytes"] @@ -189,4 +189,4 @@ def test_profile_events(cluster): # With async reads profile events are not updated fully because reads are done in a separate thread. # for metric in stat3: # print(metric) - # assert stat3[metric] == metrics3[metric] - metrics2[metric] + # assert stat3[metric] == metrics3.get(metric, 0) - metrics2.get(metric, 0) From 047f656980f01cc339043c1b5ba225bb6f4ce07e Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 26 Jun 2023 15:04:50 +0200 Subject: [PATCH 0818/1997] add explicit finalize calls --- src/Client/QueryFuzzer.cpp | 2 +- src/Parsers/MySQL/tests/gtest_create_parser.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index bfcfe659982..39ca7a5ed88 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -1244,7 +1244,7 @@ void QueryFuzzer::fuzzMain(ASTPtr & ast) std::cout << std::endl; WriteBufferFromOStream ast_buf(std::cout, 4096); formatAST(*ast, ast_buf, false /*highlight*/); - ast_buf.next(); + ast_buf.finalize(); std::cout << std::endl << std::endl; } diff --git a/src/Parsers/MySQL/tests/gtest_create_parser.cpp b/src/Parsers/MySQL/tests/gtest_create_parser.cpp index 554b3f0a67d..2f65eb6e592 100644 --- a/src/Parsers/MySQL/tests/gtest_create_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_create_parser.cpp @@ -40,5 +40,5 @@ TEST(CreateTableParser, SS) ASTPtr ast = parseQuery(p_create_query, input.data(), input.data() + input.size(), "", 0, 0); WriteBufferFromOStream buf(std::cerr, 4096); ast->dumpTree(buf); - + buf.finalize(); } From 4fabce9cc85cf649af55c0bb5341dad15aa91231 Mon Sep 17 00:00:00 2001 From: Tyler Hannan Date: Mon, 26 Jun 2023 15:10:56 +0200 Subject: [PATCH 0819/1997] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 87fb4920222..eac036c2d9b 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ curl https://clickhouse.com/ | sh * [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/293913596) - Jul 18 * [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/293913441) - Jul 19 * [**ClickHouse Meetup in Toronto**](https://www.meetup.com/clickhouse-toronto-user-group/events/294183127) - Jul 20 +* [**ClickHouse Meetup in Singapore**](https://www.meetup.com/clickhouse-singapore-meetup-group/events/294428050/) - Jul 27 Also, keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler clickhouse com. From 237805705c0239d3b252b3fa3b59f0214452ccd0 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Mon, 26 Jun 2023 15:44:30 +0200 Subject: [PATCH 0820/1997] Update test.py --- tests/integration/test_profile_events_s3/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_profile_events_s3/test.py b/tests/integration/test_profile_events_s3/test.py index de18142d4f6..a8e037d667f 100644 --- a/tests/integration/test_profile_events_s3/test.py +++ b/tests/integration/test_profile_events_s3/test.py @@ -163,7 +163,7 @@ def test_profile_events(cluster): stat2 = get_query_stat(instance, query2) for metric in stat2: - assert stat2[metric] == metrics2.get(metric, 0)- metrics1.get(metric, 0) + assert stat2[metric] == metrics2.get(metric, 0) - metrics1.get(metric, 0) assert ( metrics2["WriteBufferFromS3Bytes"] - metrics1["WriteBufferFromS3Bytes"] From d4d85d9fe92430e08c1006f2d6c9d935445473e8 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 26 Jun 2023 16:03:47 +0200 Subject: [PATCH 0821/1997] add explicit finalize calls --- programs/client/Client.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 14516bfa939..19b601b9a7b 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -790,7 +790,7 @@ bool Client::processWithFuzzing(const String & full_query) WriteBufferFromOStream cerr_buf(std::cerr, 4096); fuzz_base->dumpTree(cerr_buf); - cerr_buf.next(); + cerr_buf.finalize(); fmt::print( stderr, @@ -928,7 +928,7 @@ bool Client::processWithFuzzing(const String & full_query) std::cout << std::endl; WriteBufferFromOStream ast_buf(std::cout, 4096); formatAST(*query, ast_buf, false /*highlight*/); - ast_buf.next(); + ast_buf.finalize(); if (const auto * insert = query->as()) { /// For inserts with data it's really useful to have the data itself available in the logs, as formatAST doesn't print it From e6535b1f875931d46421166897a2a912b6b8a17d Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 26 Jun 2023 22:22:22 +0800 Subject: [PATCH 0822/1997] update libhdfs3 version --- contrib/libhdfs3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libhdfs3 b/contrib/libhdfs3 index 164b89253fa..377220ef351 160000 --- a/contrib/libhdfs3 +++ b/contrib/libhdfs3 @@ -1 +1 @@ -Subproject commit 164b89253fad7991bce77882f01b51ab81d19f3d +Subproject commit 377220ef351ae24994a5fcd2b5fa3930d00c4db0 From 7cadfeac29c68dc9ac749cd48d115da02ae48f62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Mon, 26 Jun 2023 14:55:03 +0000 Subject: [PATCH 0823/1997] Add retries to flaky tests --- tests/integration/test_tlsv1_3/test.py | 67 ++++++++++++++++++-------- 1 file changed, 47 insertions(+), 20 deletions(-) diff --git a/tests/integration/test_tlsv1_3/test.py b/tests/integration/test_tlsv1_3/test.py index e25216c67df..a29dcb5fc1c 100644 --- a/tests/integration/test_tlsv1_3/test.py +++ b/tests/integration/test_tlsv1_3/test.py @@ -4,6 +4,7 @@ from helpers.ssl_context import WrapSSLContextWithSNI import urllib.request, urllib.parse import ssl import os.path +import logging # The test cluster is configured with certificate for that host name, see 'server-ext.cnf'. @@ -11,6 +12,7 @@ import os.path SSL_HOST = "integration-tests.clickhouse.com" HTTPS_PORT = 8443 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +MAX_RETRY = 5 cluster = ClickHouseCluster(__file__) instance = cluster.add_instance( @@ -88,10 +90,18 @@ def test_https_wrong_cert(): execute_query_https("SELECT currentUser()", user="john", cert_name="client2") assert "HTTP Error 403" in str(err.value) + count = 0 # Wrong certificate: self-signed certificate. - with pytest.raises(Exception) as err: - execute_query_https("SELECT currentUser()", user="john", cert_name="wrong") - assert "unknown ca" in str(err.value) + while count <= MAX_RETRY: + with pytest.raises(Exception) as err: + execute_query_https("SELECT currentUser()", user="john", cert_name="wrong") + err_str = str(err.value) + if count < MAX_RETRY and "Broken pipe" in err_str: + count = count + 1 + logging.warning(f"Failed attempt with wrong cert, err: {err_str}") + continue + assert "unknown ca" in err_str + break # No certificate. with pytest.raises(Exception) as err: @@ -181,24 +191,41 @@ def test_https_non_ssl_auth(): == "jane\n" ) + count = 0 # However if we send a certificate it must not be wrong. - with pytest.raises(Exception) as err: - execute_query_https( - "SELECT currentUser()", - user="peter", - enable_ssl_auth=False, - cert_name="wrong", - ) - assert "unknown ca" in str(err.value) - with pytest.raises(Exception) as err: - execute_query_https( - "SELECT currentUser()", - user="jane", - enable_ssl_auth=False, - password="qwe123", - cert_name="wrong", - ) - assert "unknown ca" in str(err.value) + while count <= MAX_RETRY: + with pytest.raises(Exception) as err: + execute_query_https( + "SELECT currentUser()", + user="peter", + enable_ssl_auth=False, + cert_name="wrong", + ) + err_str = str(err.value) + if count < MAX_RETRY and "Broken pipe" in err_str: + count = count + 1 + logging.warning(f"Failed attempt with wrong cert, user: peter, err: {err_str}") + continue + assert "unknown ca" in err_str + break + + count = 0 + while count <= MAX_RETRY: + with pytest.raises(Exception) as err: + execute_query_https( + "SELECT currentUser()", + user="jane", + enable_ssl_auth=False, + password="qwe123", + cert_name="wrong", + ) + err_str = str(err.value) + if count < MAX_RETRY and "Broken pipe" in err_str: + count = count + 1 + logging.warning(f"Failed attempt with wrong cert, user: jane, err: {err_str}") + continue + assert "unknown ca" in err_str + break def test_create_user(): From 45c263cbb551c27e303f6a050df558ec63a3b463 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 26 Jun 2023 15:00:46 +0000 Subject: [PATCH 0824/1997] Replace try/catch logic in hasTokenOrNull() by sth more lightweight --- src/Common/StringSearcher.h | 16 ++++- src/Common/Volnitsky.h | 3 +- src/Functions/HasTokenImpl.h | 74 +++++++++++------------ src/Functions/hasToken.cpp | 12 ++-- src/Functions/hasTokenCaseInsensitive.cpp | 7 +-- 5 files changed, 59 insertions(+), 53 deletions(-) diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h index ae440f9151b..120a79120bb 100644 --- a/src/Common/StringSearcher.h +++ b/src/Common/StringSearcher.h @@ -811,15 +811,24 @@ class TokenSearcher : public StringSearcherBase size_t needle_size; public: + + template + requires (sizeof(CharT) == 1) + static bool isValidNeedle(const CharT * needle_, size_t needle_size_) + { + if (std::any_of(needle_, needle_ + needle_size_, isTokenSeparator)) + return false; + return true; + } + template requires (sizeof(CharT) == 1) TokenSearcher(const CharT * needle_, size_t needle_size_) : searcher(needle_, needle_size_) , needle_size(needle_size_) { - if (std::any_of(needle_, needle_ + needle_size_, isTokenSeparator)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Needle must not contain whitespace or separator characters"); - + /// The caller is responsible for calling isValidNeedle() + chassert(isValidNeedle(needle_, needle_size_)); } template @@ -880,6 +889,7 @@ using ASCIICaseSensitiveStringSearcher = impl::StringSearcher; using ASCIICaseInsensitiveStringSearcher = impl::StringSearcher; using UTF8CaseSensitiveStringSearcher = impl::StringSearcher; using UTF8CaseInsensitiveStringSearcher = impl::StringSearcher; + using ASCIICaseSensitiveTokenSearcher = impl::TokenSearcher; using ASCIICaseInsensitiveTokenSearcher = impl::TokenSearcher; diff --git a/src/Common/Volnitsky.h b/src/Common/Volnitsky.h index 64c5bf4c0d3..8f9aa23a38a 100644 --- a/src/Common/Volnitsky.h +++ b/src/Common/Volnitsky.h @@ -406,8 +406,7 @@ public: /// And also adding from the end guarantees that we will find first occurrence because we will lookup bigger offsets first. for (auto i = static_cast(needle_size - sizeof(VolnitskyTraits::Ngram)); i >= 0; --i) { - bool ok = VolnitskyTraits::putNGram( - needle + i, static_cast(i + 1), needle, needle_size, callback); + bool ok = VolnitskyTraits::putNGram(needle + i, static_cast(i + 1), needle, needle_size, callback); /** `putNGramUTF8CaseInsensitive` does not work if characters with lower and upper cases * are represented by different number of bytes or code points. diff --git a/src/Functions/HasTokenImpl.h b/src/Functions/HasTokenImpl.h index 18e629b58fb..8cacdfff99d 100644 --- a/src/Functions/HasTokenImpl.h +++ b/src/Functions/HasTokenImpl.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -9,6 +10,7 @@ namespace DB namespace ErrorCodes { + extern const int BAD_ARGUMENTS; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; } @@ -44,51 +46,47 @@ struct HasTokenImpl const UInt8 * const end = haystack_data.data() + haystack_data.size(); const UInt8 * pos = begin; - try + if (!ASCIICaseSensitiveTokenSearcher::isValidNeedle(pattern.data(), pattern.size())) { - /// Parameter `pattern` is supposed to be a literal of letters and/or numbers. - /// Otherwise, an exception from the constructor of `TokenSearcher` is thrown. - /// If no exception is thrown at that point, then no further error cases may occur. - TokenSearcher searcher(pattern.data(), pattern.size(), end - pos); if (res_null) - std::ranges::fill(res_null->getData(), false); - - /// The current index in the array of strings. - size_t i = 0; - /// We will search for the next occurrence in all rows at once. - while (pos < end && end != (pos = searcher.search(pos, end - pos))) - { - /// Let's determine which index it refers to. - while (begin + haystack_offsets[i] <= pos) - { - res[i] = negate; - ++i; - } - - /// We check that the entry does not pass through the boundaries of strings. - if (pos + pattern.size() < begin + haystack_offsets[i]) - res[i] = !negate; - else - res[i] = negate; - - pos = begin + haystack_offsets[i]; - ++i; - } - - /// Tail, in which there can be no substring. - if (i < res.size()) - memset(&res[i], negate, (res.size() - i) * sizeof(res[0])); - } - catch (...) - { - if (!res_null) - throw; - else { std::ranges::fill(res, 0); std::ranges::fill(res_null->getData(), true); + return; } + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Needle must not contain whitespace or separator characters"); } + + TokenSearcher searcher(pattern.data(), pattern.size(), end - pos); + if (res_null) + std::ranges::fill(res_null->getData(), false); + + /// The current index in the array of strings. + size_t i = 0; + /// We will search for the next occurrence in all rows at once. + while (pos < end && end != (pos = searcher.search(pos, end - pos))) + { + /// Let's determine which index it refers to. + while (begin + haystack_offsets[i] <= pos) + { + res[i] = negate; + ++i; + } + + /// We check that the entry does not pass through the boundaries of strings. + if (pos + pattern.size() < begin + haystack_offsets[i]) + res[i] = !negate; + else + res[i] = negate; + + pos = begin + haystack_offsets[i]; + ++i; + } + + /// Tail, in which there can be no substring. + if (i < res.size()) + memset(&res[i], negate, (res.size() - i) * sizeof(res[0])); } template diff --git a/src/Functions/hasToken.cpp b/src/Functions/hasToken.cpp index de67e80e752..b90750ea233 100644 --- a/src/Functions/hasToken.cpp +++ b/src/Functions/hasToken.cpp @@ -16,18 +16,18 @@ struct NameHasTokenOrNull static constexpr auto name = "hasTokenOrNull"; }; -using FunctionHasToken = DB::FunctionsStringSearch>; -using FunctionHasTokenOrNull = DB:: - FunctionsStringSearch, DB::ExecutionErrorPolicy::Null>; +using FunctionHasToken + = FunctionsStringSearch>; +using FunctionHasTokenOrNull + = FunctionsStringSearch, ExecutionErrorPolicy::Null>; REGISTER_FUNCTION(HasToken) { factory.registerFunction(FunctionDocumentation - {.description="Performs lookup of needle in haystack using tokenbf_v1 index."}, DB::FunctionFactory::CaseSensitive); + {.description="Performs lookup of needle in haystack using tokenbf_v1 index."}, FunctionFactory::CaseSensitive); factory.registerFunction(FunctionDocumentation - {.description="Performs lookup of needle in haystack using tokenbf_v1 index. Returns null if needle is ill-formed."}, - DB::FunctionFactory::CaseSensitive); + {.description="Performs lookup of needle in haystack using tokenbf_v1 index. Returns null if needle is ill-formed."}, FunctionFactory::CaseSensitive); } } diff --git a/src/Functions/hasTokenCaseInsensitive.cpp b/src/Functions/hasTokenCaseInsensitive.cpp index a6e8ecf3e9d..d7381e336b5 100644 --- a/src/Functions/hasTokenCaseInsensitive.cpp +++ b/src/Functions/hasTokenCaseInsensitive.cpp @@ -17,10 +17,9 @@ struct NameHasTokenCaseInsensitiveOrNull }; using FunctionHasTokenCaseInsensitive - = DB::FunctionsStringSearch>; -using FunctionHasTokenCaseInsensitiveOrNull = DB::FunctionsStringSearch< - DB::HasTokenImpl, - DB::ExecutionErrorPolicy::Null>; + = FunctionsStringSearch>; +using FunctionHasTokenCaseInsensitiveOrNull + = FunctionsStringSearch, ExecutionErrorPolicy::Null>; REGISTER_FUNCTION(HasTokenCaseInsensitive) { From 5de8c4ac679c0c79f0554681a3113be4a5c000d7 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 26 Jun 2023 15:27:05 +0000 Subject: [PATCH 0825/1997] Automatic style fix --- tests/integration/test_tlsv1_3/test.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_tlsv1_3/test.py b/tests/integration/test_tlsv1_3/test.py index a29dcb5fc1c..f5c2be51ed7 100644 --- a/tests/integration/test_tlsv1_3/test.py +++ b/tests/integration/test_tlsv1_3/test.py @@ -204,7 +204,9 @@ def test_https_non_ssl_auth(): err_str = str(err.value) if count < MAX_RETRY and "Broken pipe" in err_str: count = count + 1 - logging.warning(f"Failed attempt with wrong cert, user: peter, err: {err_str}") + logging.warning( + f"Failed attempt with wrong cert, user: peter, err: {err_str}" + ) continue assert "unknown ca" in err_str break @@ -222,7 +224,9 @@ def test_https_non_ssl_auth(): err_str = str(err.value) if count < MAX_RETRY and "Broken pipe" in err_str: count = count + 1 - logging.warning(f"Failed attempt with wrong cert, user: jane, err: {err_str}") + logging.warning( + f"Failed attempt with wrong cert, user: jane, err: {err_str}" + ) continue assert "unknown ca" in err_str break From ed6bfd66fee9aa4c2e06eaf25ca81d02e09f075e Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Mon, 26 Jun 2023 17:28:59 +0200 Subject: [PATCH 0826/1997] Publish changes --- docker/packager/binary/build.sh | 4 ++++ docker/packager/packager | 1 + 2 files changed, 5 insertions(+) diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index c0803c74147..08a9b07f3ce 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -15,6 +15,10 @@ if [ "$EXTRACT_TOOLCHAIN_DARWIN" = "1" ]; then mkdir -p /build/cmake/toolchain/darwin-x86_64 tar xJf /MacOSX11.0.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1 ln -sf darwin-x86_64 /build/cmake/toolchain/darwin-aarch64 + + if [ "$EXPORT_SOURCES_WITH_SUBMODULES" = "1" ]; then + tar -c /build --exclude-vcs-ignores --exclude-vcs --exclude '/build/build' --exclude '/build/build_docker' --exclude '/build/debian' --exclude '/build/.cache' --exclude '/build/docs' --exclude '/build/tests/integration' | pigz -9 > /output/source_sub.tar.gz + fi fi # Uncomment to debug ccache. Don't put ccache log in /output right away, or it diff --git a/docker/packager/packager b/docker/packager/packager index 1b3df858cd2..42dc52aa37f 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -168,6 +168,7 @@ def parse_env_variables( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake" ) result.append("EXTRACT_TOOLCHAIN_DARWIN=1") + result.append("EXPORT_SOURCES_WITH_SUBMODULES=1") elif is_cross_darwin_arm: cc = compiler[: -len(DARWIN_ARM_SUFFIX)] cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar") From 4da82d10d0a7eba0a10fc8f02889d1bd533f7b82 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 26 Jun 2023 17:57:46 +0200 Subject: [PATCH 0827/1997] Update gtest_lru_file_cache.cpp --- src/Interpreters/tests/gtest_lru_file_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp index 9ff9f92afe4..58b1302a72c 100644 --- a/src/Interpreters/tests/gtest_lru_file_cache.cpp +++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp @@ -893,7 +893,7 @@ TEST_F(FileCacheTest, CachedReadBuffer) settings.boundary_alignment = 1; ReadSettings read_settings; - read_settings.enable_filesystem_cache = 1; + read_settings.enable_filesystem_cache = true; read_settings.local_fs_method = LocalFSReadMethod::pread; std::string file_path = fs::current_path() / "test"; From a1eb27ce3038df0921d6667e65074168776988dc Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 26 Jun 2023 18:12:41 +0200 Subject: [PATCH 0828/1997] Update log message --- .../IO/CachedOnDiskReadBufferFromFile.cpp | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 76d54f9d27c..b65a368b936 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -878,10 +878,11 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() LOG_TEST( log, - "Current read type: {}, read offset: {}, impl offset: {}, file segment: {}", + "Current read type: {}, read offset: {}, impl offset: {}, impl position: {}, file segment: {}", toString(read_type), file_offset_of_buffer_end, implementation_buffer->getFileOffsetOfBufferEnd(), + implementation_buffer->getPosition(), file_segment.getInfoForLog()); chassert(current_read_range.left <= file_offset_of_buffer_end); @@ -940,7 +941,8 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() // We don't support implementation_buffer implementations that use nextimpl_working_buffer_offset. chassert(implementation_buffer->position() == implementation_buffer->buffer().begin()); - size = implementation_buffer->buffer().size(); + if (result) + size = implementation_buffer->buffer().size(); LOG_TEST( log, @@ -954,15 +956,21 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromCacheBytes, size); ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromCacheMicroseconds, elapsed); - const size_t new_file_offset = file_offset_of_buffer_end + size; - const size_t file_segment_write_offset = file_segment.getCurrentWriteOffset(true); - if (new_file_offset > file_segment.range().right + 1 || new_file_offset > file_segment_write_offset) + if (result) { - auto file_segment_path = file_segment.getPathInLocalCache(); - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Read unexpected size. File size: {}, file path: {}, file segment info: {}", - fs::file_size(file_segment_path), file_segment_path, file_segment.getInfoForLog()); + const size_t new_file_offset = file_offset_of_buffer_end + size; + const size_t file_segment_write_offset = file_segment.getCurrentWriteOffset(true); + if (new_file_offset > file_segment.range().right + 1 || new_file_offset > file_segment_write_offset) + { + auto file_segment_path = file_segment.getPathInLocalCache(); + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Read unexpected size. " + "File size: {}, file segment path: {}, impl size: {}, impl path: {}" + "file segment info: {}", + fs::file_size(file_segment_path), file_segment_path, + implementation_buffer->getFileSize(), implementation_buffer->getFileName(), + file_segment.getInfoForLog()); + } } } else From d8cee8a34c4f3376b64977e222f46bd6abf0ffd5 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 26 Jun 2023 19:00:25 +0200 Subject: [PATCH 0829/1997] Fix style --- src/Common/StringSearcher.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h index 120a79120bb..24b707c97ae 100644 --- a/src/Common/StringSearcher.h +++ b/src/Common/StringSearcher.h @@ -21,12 +21,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - - /** Variants for searching a substring in a string. * In most cases, performance is less than Volnitsky (see Volnitsky.h). */ From 1ed33b193628dd69ccfefb7ca343066608399bbb Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 26 Jun 2023 19:22:46 +0200 Subject: [PATCH 0830/1997] Update src/Common/StringSearcher.h Co-authored-by: ltrk2 <107155950+ltrk2@users.noreply.github.com> --- src/Common/StringSearcher.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h index 24b707c97ae..ff32218b64b 100644 --- a/src/Common/StringSearcher.h +++ b/src/Common/StringSearcher.h @@ -810,9 +810,7 @@ public: requires (sizeof(CharT) == 1) static bool isValidNeedle(const CharT * needle_, size_t needle_size_) { - if (std::any_of(needle_, needle_ + needle_size_, isTokenSeparator)) - return false; - return true; + return std::none_of(needle_, needle_ + needle_size_, isTokenSeparator)); } template From 61136680e403ae88469f66157e847168996be786 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 26 Jun 2023 20:05:58 +0200 Subject: [PATCH 0831/1997] Update src/Common/StringSearcher.h Co-authored-by: ltrk2 <107155950+ltrk2@users.noreply.github.com> --- src/Common/StringSearcher.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h index ff32218b64b..3ed192d05f3 100644 --- a/src/Common/StringSearcher.h +++ b/src/Common/StringSearcher.h @@ -810,7 +810,7 @@ public: requires (sizeof(CharT) == 1) static bool isValidNeedle(const CharT * needle_, size_t needle_size_) { - return std::none_of(needle_, needle_ + needle_size_, isTokenSeparator)); + return std::none_of(needle_, needle_ + needle_size_, isTokenSeparator); } template From 01607f0b32291f75d86e55d17a9baa0ef82c1075 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 26 Jun 2023 20:21:25 +0200 Subject: [PATCH 0832/1997] Add check for intersecting ranges --- src/Interpreters/Cache/FileCache.cpp | 24 +++++++++++++++++++----- src/Interpreters/Cache/FileSegment.h | 2 ++ src/Interpreters/Cache/Metadata.cpp | 22 ++++++++++++++++++++++ src/Interpreters/Cache/Metadata.h | 2 ++ 4 files changed, 45 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index b601c4029f3..9bd1bc503ff 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -499,12 +499,14 @@ KeyMetadata::iterator FileCache::addFileSegment( chassert(size > 0); /// Empty file segments in cache are not allowed. const auto & key = locked_key.getKey(); - if (locked_key.tryGetByOffset(offset)) + const FileSegment::Range range(offset, offset + size - 1); + + if (auto intersecting_range = locked_key.hasIntersectingRange(range)) { throw Exception( ErrorCodes::LOGICAL_ERROR, - "Cache entry already exists for key: `{}`, offset: {}, size: {}.", - key, offset, size); + "Attempt to add intersecting file segment in cache ({} intersects {})", + range.toString(), intersecting_range->toString()); } FileSegment::State result_state; @@ -964,8 +966,20 @@ void FileCache::loadMetadata() if ((main_priority->getSizeLimit() == 0 || main_priority->getSize(lock) + size <= main_priority->getSizeLimit()) && (main_priority->getElementsLimit() == 0 || main_priority->getElementsCount(lock) + 1 <= main_priority->getElementsLimit())) { - auto file_segment_metadata_it = addFileSegment( - *locked_key, offset, size, FileSegment::State::DOWNLOADED, CreateFileSegmentSettings(segment_kind), &lock); + KeyMetadata::iterator file_segment_metadata_it; + try + { + file_segment_metadata_it = addFileSegment( + *locked_key, offset, size, FileSegment::State::DOWNLOADED, CreateFileSegmentSettings(segment_kind), &lock); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + chassert(false); + + fs::remove(offset_it->path()); + continue; + } const auto & file_segment_metadata = file_segment_metadata_it->second; chassert(file_segment_metadata->file_segment->assertCorrectness()); diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index 681c0d719e4..75053ff5dad 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -134,6 +134,8 @@ public: bool operator==(const Range & other) const { return left == other.left && right == other.right; } + bool operator<(const Range & other) const { return right < other.left; } + size_t size() const { return right - left + 1; } String toString() const { return fmt::format("[{}, {}]", std::to_string(left), std::to_string(right)); } diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index f799bae1e10..5be3bb6490a 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -451,6 +451,28 @@ void LockedKey::shrinkFileSegmentToDownloadedSize( chassert(file_segment->assertCorrectnessUnlocked(segment_lock)); } +std::optional LockedKey::hasIntersectingRange(const FileSegment::Range & range) const +{ + if (key_metadata->empty()) + return {}; + + auto it = key_metadata->lower_bound(range.left); + if (it == key_metadata->end()) + { + auto check_range = std::prev(it)->second->file_segment->range(); + return check_range < range ? std::nullopt : std::optional(check_range); + } + + std::optional check_range; + if (range < (check_range = it->second->file_segment->range()) + && (it == key_metadata->begin() || (check_range = std::prev(it)->second->file_segment->range()) < range)) + { + return {}; + } + + return check_range; +} + std::shared_ptr LockedKey::getByOffset(size_t offset) const { auto it = key_metadata->find(offset); diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index 635594a289a..8ee40aa977f 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -164,6 +164,8 @@ struct LockedKey : private boost::noncopyable bool isLastOwnerOfFileSegment(size_t offset) const; + std::optional hasIntersectingRange(const FileSegment::Range & range) const; + void removeFromCleanupQueue(); void markAsRemoved(); From 72e407eb06e506a03f3d491e81cf9b8982b469c0 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 26 Jun 2023 20:40:18 +0200 Subject: [PATCH 0833/1997] Better --- src/Interpreters/Cache/Metadata.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 5be3bb6490a..cde11a071f5 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -457,20 +457,21 @@ std::optional LockedKey::hasIntersectingRange(const FileSegm return {}; auto it = key_metadata->lower_bound(range.left); - if (it == key_metadata->end()) + if (it != key_metadata->end()) /// has next range { - auto check_range = std::prev(it)->second->file_segment->range(); - return check_range < range ? std::nullopt : std::optional(check_range); + auto next_range = it->second->file_segment->range(); + if (!(range < next_range)) + return next_range; + + if (it == key_metadata->begin()) + return {}; } - std::optional check_range; - if (range < (check_range = it->second->file_segment->range()) - && (it == key_metadata->begin() || (check_range = std::prev(it)->second->file_segment->range()) < range)) - { - return {}; - } + auto prev_range = std::prev(it)->second->file_segment->range(); + if (!(prev_range < range)) + return prev_range; - return check_range; + return {}; } std::shared_ptr LockedKey::getByOffset(size_t offset) const From 5521bf3f3570d910d3123f8839f78f99f9292051 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 26 Jun 2023 20:19:34 +0000 Subject: [PATCH 0834/1997] Fix null constant case --- src/Functions/array/range.cpp | 16 +++++++++++++++- ....reference => 02797_range_nullable.reference} | 4 ++++ ...nge_nullable.sql => 02797_range_nullable.sql} | 5 ++++- 3 files changed, 23 insertions(+), 2 deletions(-) rename tests/queries/0_stateless/{02790_range_nullable.reference => 02797_range_nullable.reference} (70%) rename tests/queries/0_stateless/{02790_range_nullable.sql => 02797_range_nullable.sql} (83%) diff --git a/src/Functions/array/range.cpp b/src/Functions/array/range.cpp index b638bc3c5b7..8c524566110 100644 --- a/src/Functions/array/range.cpp +++ b/src/Functions/array/range.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -59,6 +60,14 @@ private: getName(), arguments.size()); } + for (size_t i = 0, size = arguments.size(); i < size; ++i) + { + if (arguments[i]->onlyNull()) + { + return makeNullable(std::make_shared()); + } + } + DataTypes arg_types; for (size_t i = 0, size = arguments.size(); i < size; ++i) { @@ -382,6 +391,12 @@ private: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { + NullPresence null_presence = getNullPresense(arguments); + if (null_presence.has_null_constant) + { + return result_type->createColumnConstWithDefaultValue(input_rows_count); + } + DataTypePtr elem_type = checkAndGetDataType(result_type.get())->getNestedType(); WhichDataType which(elem_type); @@ -400,7 +415,6 @@ private: } const auto & nullable_col = assert_cast(*col.column); const auto & null_map = nullable_col.getNullMapData(); - if (!memoryIsZero(null_map.data(), 0, null_map.size())) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal (null) value column {} of argument of function {}", col.column->getName(), getName()); diff --git a/tests/queries/0_stateless/02790_range_nullable.reference b/tests/queries/0_stateless/02797_range_nullable.reference similarity index 70% rename from tests/queries/0_stateless/02790_range_nullable.reference rename to tests/queries/0_stateless/02797_range_nullable.reference index 7a98702e98c..a082a71f4ec 100644 --- a/tests/queries/0_stateless/02790_range_nullable.reference +++ b/tests/queries/0_stateless/02797_range_nullable.reference @@ -1,3 +1,7 @@ +\N +\N +\N +\N [0] [0,2,4,6,8] [0,2,4,6,8] diff --git a/tests/queries/0_stateless/02790_range_nullable.sql b/tests/queries/0_stateless/02797_range_nullable.sql similarity index 83% rename from tests/queries/0_stateless/02790_range_nullable.sql rename to tests/queries/0_stateless/02797_range_nullable.sql index 16e16512fc5..2b0fe69b123 100644 --- a/tests/queries/0_stateless/02790_range_nullable.sql +++ b/tests/queries/0_stateless/02797_range_nullable.sql @@ -1,7 +1,10 @@ +SELECT range(null); +SELECT range(10, null); +SELECT range(10, 2, null); +select range('string', Null); SELECT range(toNullable(1)); SELECT range(0::Nullable(UInt64), 10::Nullable(UInt64), 2::Nullable(UInt64)); SELECT range(0::Nullable(Int64), 10::Nullable(Int64), 2::Nullable(Int64)); -SELECT range(null); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT range(Null::Nullable(UInt64), 10::Nullable(UInt64), 2::Nullable(UInt64)); -- { serverError BAD_ARGUMENTS } SELECT range(0::Nullable(UInt64), Null::Nullable(UInt64), 2::Nullable(UInt64)); -- { serverError BAD_ARGUMENTS } SELECT range(0::Nullable(UInt64), 10::Nullable(UInt64), Null::Nullable(UInt64)); -- { serverError BAD_ARGUMENTS } From c01d5ed004bbc64f95a7bfbacba1bda3bf2aa273 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 27 Jun 2023 00:27:51 +0200 Subject: [PATCH 0835/1997] Fix after review. --- src/Interpreters/ProcessList.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index c8e33b477a1..daa8d434ab6 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -593,10 +593,13 @@ QueryStatusInfo QueryStatus::getInfo(bool get_thread_list, bool get_profile_even res.profile_counters = std::make_shared(thread_group->performance_counters.getPartiallyAtomicSnapshot()); } - if (get_settings && getContext()) + if (get_settings) { - res.query_settings = std::make_shared(getContext()->getSettings()); - res.current_database = getContext()->getCurrentDatabase(); + if (auto ctx = context.lock()) + { + res.query_settings = std::make_shared(ctx->getSettings()); + res.current_database = ctx->getCurrentDatabase(); + } } return res; From bcb106e1385b55b046643b55bb6683a666db81e4 Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Mon, 26 Jun 2023 19:39:47 -0400 Subject: [PATCH 0836/1997] add missing aggregate functions (#51443) * add missing aggregate functions * add sparkBar --- .../aggregate-functions/reference/index.md | 20 +++++++++++++++++++ .../aspell-ignore/en/aspell-dict.txt | 1 + 2 files changed, 21 insertions(+) diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index 17ef494e9ad..6c56aefd51d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -19,8 +19,19 @@ Standard aggregate functions: - [stddevSamp](/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md) - [varPop](/docs/en/sql-reference/aggregate-functions/reference/varpop.md) - [varSamp](/docs/en/sql-reference/aggregate-functions/reference/varsamp.md) +- [corr](./corr.md) - [covarPop](/docs/en/sql-reference/aggregate-functions/reference/covarpop.md) - [covarSamp](/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md) +- [entropy](./entropy.md) +- [exponentialMovingAverage](./exponentialmovingaverage.md) +- [intervalLengthSum](./intervalLengthSum.md) +- [kolmogorovSmirnovTest](./kolmogorovsmirnovtest.md) +- [mannwhitneyutest](./mannwhitneyutest.md) +- [median](./median.md) +- [rankCorr](./rankCorr.md) +- [sumKahan](./sumkahan.md) +- [studentTTest](./studentttest.md) +- [welchTTest](./welchttest.md) ClickHouse-specific aggregate functions: @@ -34,12 +45,15 @@ ClickHouse-specific aggregate functions: - [avgWeighted](/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md) - [topK](/docs/en/sql-reference/aggregate-functions/reference/topk.md) - [topKWeighted](/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md) +- [deltaSum](./deltasum.md) +- [deltaSumTimestamp](./deltasumtimestamp.md) - [groupArray](/docs/en/sql-reference/aggregate-functions/reference/grouparray.md) - [groupArrayLast](/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md) - [groupUniqArray](/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md) - [groupArrayInsertAt](/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md) - [groupArrayMovingAvg](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md) - [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md) +- [groupArraySample](./grouparraysample.md) - [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md) - [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md) - [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md) @@ -84,3 +98,9 @@ ClickHouse-specific aggregate functions: - [theilsU](./theilsu.md) - [maxIntersections](./maxintersections.md) - [maxIntersectionsPosition](./maxintersectionsposition.md) +- [meanZTest](./meanztest.md) +- [quantileGK](./quantileGK.md) +- [quantileInterpolatedWeighted](./quantileinterpolatedweighted.md) +- [sparkBar](./sparkbar.md) +- [sumCount](./sumcount.md) + diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index bf988295644..00d047121e6 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2169,6 +2169,7 @@ snowflakeToDateTime socketcache soundex sparkbar +sparkBar sparsehash speedscope splitByChar From 45f40cf2220fd7e37195fbed8d83db7f7cfa75e4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 27 Jun 2023 04:16:26 +0200 Subject: [PATCH 0837/1997] Update system_warnings test --- tests/queries/0_stateless/01945_system_warnings.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01945_system_warnings.sh b/tests/queries/0_stateless/01945_system_warnings.sh index bf11cee2911..c9bd0fd4b8a 100755 --- a/tests/queries/0_stateless/01945_system_warnings.sh +++ b/tests/queries/0_stateless/01945_system_warnings.sh @@ -21,5 +21,4 @@ ${CLICKHOUSE_CLIENT} --multiple_joins_rewriter_version=42 -q "SELECT message FRO ${CLICKHOUSE_CLIENT} -q "SELECT count() = countDistinct(message) FROM system.warnings" # Avoid too many warnings, especially in CI -${CLICKHOUSE_CLIENT} -q "SELECT count() < 5 FROM system.warnings" - +${CLICKHOUSE_CLIENT} -q "SELECT count() < 10 FROM system.warnings" From 8c38d10bb4e838a156a2c1031cf2b60548e8cc42 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 27 Jun 2023 05:49:03 +0200 Subject: [PATCH 0838/1997] A fix for clang-17 --- src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp index 4ad3cc7373a..188956b34fc 100644 --- a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp +++ b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp @@ -144,9 +144,9 @@ Pipe ReadFromMemoryStorageStep::makePipe() storage_snapshot, nullptr /* data */, nullptr /* parallel execution index */, - [storage = storage](std::shared_ptr & data_to_initialize) + [my_storage = storage](std::shared_ptr & data_to_initialize) { - data_to_initialize = static_cast(*storage).data.get(); + data_to_initialize = assert_cast(*my_storage).data.get(); })); } From 9d8b996f684ff5f669db49a616bd3ec3bf0e4c23 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 27 Jun 2023 04:30:56 +0000 Subject: [PATCH 0839/1997] Automatic style fix --- tests/ci/fast_test_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index 7bb09d30177..2849759a3ee 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -150,7 +150,7 @@ def main(): os.makedirs(logs_path) run_log_path = os.path.join(logs_path, "run.log") - with TeePopen(run_cmd, run_log_path, timeout = 90 * 60) as process: + with TeePopen(run_cmd, run_log_path, timeout=90 * 60) as process: retcode = process.wait() if retcode == 0: logging.info("Run successfully") From 76108b955cdab459ba98dd660912f6e90aad57f4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 27 Jun 2023 07:06:50 +0200 Subject: [PATCH 0840/1997] Fix test 01605_adaptive_granularity_block_borders --- .../01605_adaptive_granularity_block_borders.sql | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql index ca7d0f3c950..4623c456475 100644 --- a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql +++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql @@ -5,9 +5,9 @@ SET allow_prefetched_read_pool_for_remote_filesystem=0; DROP TABLE IF EXISTS adaptive_table; ---- If granularity of consequent blocks differs a lot, then adaptive ---- granularity will adjust amout of marks correctly. Data for test empirically ---- derived, it's quite hard to get good parameters. +-- If granularity of consequent blocks differs a lot, then adaptive +-- granularity will adjust the amount of marks correctly. +-- Data for test was empirically derived, it's quite hard to get good parameters. CREATE TABLE adaptive_table( key UInt64, @@ -32,6 +32,7 @@ SET enable_filesystem_cache = 0; -- If we have computed granularity incorrectly than we will exceed this limit. SET max_memory_usage='30M'; +SET max_threads = 1; SELECT max(length(value)) FROM adaptive_table; From dd3d2c9aeaa5798467521eaf2fc85f2332a07a6a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 27 Jun 2023 08:01:15 +0200 Subject: [PATCH 0841/1997] Fix syntax error --- tests/integration/test_attach_table_normalizer/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index ba0068e9c59..49acefdcd17 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -4,7 +4,7 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node = cluster.add_instance( - 'node', main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True + "node", main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True ) @@ -18,13 +18,13 @@ def started_cluster(): def replace_substring_to_substr(node): - node.exec_in_container(( + node.exec_in_container( [ "bash", "-c", "sed -i 's/substring/substr/g' /var/lib/clickhouse/metadata/default/file.sql", ], - user="root" + user="root", ) From 5a4a774db7e961133dd124c5d337a402cb2ee9ee Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 27 Jun 2023 06:06:56 +0000 Subject: [PATCH 0842/1997] Style fix --- src/Functions/array/range.cpp | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/src/Functions/array/range.cpp b/src/Functions/array/range.cpp index 8c524566110..47e90de2e2b 100644 --- a/src/Functions/array/range.cpp +++ b/src/Functions/array/range.cpp @@ -60,13 +60,8 @@ private: getName(), arguments.size()); } - for (size_t i = 0, size = arguments.size(); i < size; ++i) - { - if (arguments[i]->onlyNull()) - { - return makeNullable(std::make_shared()); - } - } + if (std::find_if (arguments.cbegin(), arguments.cend(), [](const auto & arg) { return arg->onlyNull(); }) != arguments.cend()) + return makeNullable(std::make_shared()); DataTypes arg_types; for (size_t i = 0, size = arguments.size(); i < size; ++i) @@ -393,9 +388,7 @@ private: { NullPresence null_presence = getNullPresense(arguments); if (null_presence.has_null_constant) - { return result_type->createColumnConstWithDefaultValue(input_rows_count); - } DataTypePtr elem_type = checkAndGetDataType(result_type.get())->getNestedType(); WhichDataType which(elem_type); @@ -410,15 +403,11 @@ private: auto throwIfNullValue = [&](const ColumnWithTypeAndName & col) { if (!col.type->isNullable()) - { return; - } const auto & nullable_col = assert_cast(*col.column); const auto & null_map = nullable_col.getNullMapData(); if (!memoryIsZero(null_map.data(), 0, null_map.size())) - { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal (null) value column {} of argument of function {}", col.column->getName(), getName()); - } }; ColumnPtr res; @@ -429,7 +418,7 @@ private: if (arguments[0].type->isNullable()) { const auto * nullable = checkAndGetColumn(*arguments[0].column); - col= nullable->getNestedColumnPtr().get(); + col = nullable->getNestedColumnPtr().get(); } if (!((res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) From 8ad1d4b94f2d53c28f974be7ef8f0f4002eb4245 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 27 Jun 2023 06:31:30 +0000 Subject: [PATCH 0843/1997] Add comment --- src/Functions/formatDateTime.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index bdd694c7b94..50772866648 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -945,6 +945,7 @@ public: { auto c = DecimalUtils::split(vec[i], scale); + // -1.123 splits to -1 / 0.123 if (vec[i].value < 0 && c.fractional) { using F = typename DataType::FieldType; From b4b21c0cf2e0c9c128a1558b543787297059748a Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 27 Jun 2023 10:05:14 +0200 Subject: [PATCH 0844/1997] Try fix flaky 02497_storage_file_reader_selection --- .../queries/0_stateless/02497_storage_file_reader_selection.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh index 20bde68718d..66b894404cf 100755 --- a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh +++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) DATA_FILE=test_02497_$CLICKHOUSE_TEST_UNIQUE_NAME.tsv echo -e 'key\nfoo\nbar' > $DATA_FILE -$CLICKHOUSE_LOCAL --storage_file_read_method=mmap --print-profile-events -q "SELECT * FROM file($DATA_FILE) FORMAT Null" 2>&1 | grep -F -c "CreatedReadBufferMMap" +$CLICKHOUSE_LOCAL --storage_file_read_method=mmap --print-profile-events -q "SELECT * FROM file($DATA_FILE) FORMAT Null" 2>&1 | grep -F -c "CreatedReadBufferMMap:" $CLICKHOUSE_LOCAL --storage_file_read_method=mmap --print-profile-events -q "SELECT * FROM file($DATA_FILE) FORMAT Null" 2>&1 | grep -F -c "CreatedReadBufferOrdinary" $CLICKHOUSE_LOCAL --storage_file_read_method=pread --print-profile-events -q "SELECT * FROM file($DATA_FILE) FORMAT Null" 2>&1 | grep -F -c "CreatedReadBufferMMap" From bd0ce5fc0bcaaff9586e9e4549210882cc26aa64 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 27 Jun 2023 16:34:04 +0800 Subject: [PATCH 0845/1997] wip --- src/Functions/substringIndex.cpp | 245 +++++++++++++++++++++++++++++++ 1 file changed, 245 insertions(+) create mode 100644 src/Functions/substringIndex.cpp diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp new file mode 100644 index 00000000000..67699304320 --- /dev/null +++ b/src/Functions/substringIndex.cpp @@ -0,0 +1,245 @@ +#include +#include +#include +#include +#include +#include +#include "base/find_symbols.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ZERO_ARRAY_OR_TUPLE_INDEX; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +template +class FunctionSubstringIndex : public IFunction +{ +public: + static constexpr auto name = is_utf8 ? "substringIndexUTF8" : "substringIndex"; + + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 3; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isString(arguments[0])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}", + arguments[0]->getName(), + getName()); + + if (!isString(arguments[1])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of second argument of function {}", + arguments[1]->getName(), + getName()); + + if (!isNativeNumber(arguments[2])) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of third argument of function {}", + arguments[2]->getName(), getName()); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + ColumnPtr column_string = arguments[0].column; + ColumnPtr column_delim = arguments[1].column; + ColumnPtr column_index = arguments[2].column; + + const ColumnConst * column_delim_const = checkAndGetColumnConst(column_delim.get()); + if (!column_delim_const) + throw Exception(ErrorCodes::ILLEGAL_COLUMN , "Second argument to {} must be a constant String", getName()); + + String delim = column_delim_const->getValue(); + if constexpr (!is_utf8) + { + if (delim.size() != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single character", getName()); + } + else + { + // TODO + } + + auto column_res = ColumnString::create(); + ColumnString::Chars & vec_res = column_res->getChars(); + ColumnString::Offsets & offsets_res = column_res->getOffsets(); + + const ColumnConst * column_string_const = checkAndGetColumnConst(column_string.get()); + if (column_string_const) + { + String str = column_string_const->getValue(); + constantVector(str, delim[0], column_index.get(), vec_res, offsets_res); + } + else + { + const auto * col_str = checkAndGetColumn(column_string.get()); + if (!col_str) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument to {} must be a String", getName()); + + bool is_index_const = isColumnConst(*column_index); + if (is_index_const) + { + Int64 index = column_index->getInt(0); + vectorConstant(col_str->getChars(), col_str->getOffsets(), delim[0], index, vec_res, offsets_res); + } + else + vectorVector(col_str->getChars(), col_str->getOffsets(), delim[0], column_index.get(), vec_res, offsets_res); + } + } + +protected: + static void vectorVector( + const ColumnString::Chars & str_data, + const ColumnString::Offsets & str_offsets, + char delim, + const IColumn * index_column, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + size_t rows = str_offsets.size(); + res_data.reserve(str_data.size() / 2); + res_offsets.reserve(rows); + + for (size_t i=0; igetInt(i); + StringRef res_ref = substringIndex(str_ref, index); + appendToResultColumn(res_ref, res_data, res_offsets); + } + } + + static void vectorConstant( + const ColumnString::Chars & str_data, + const ColumnString::Offsets & str_offsets, + char delim, + Int64 index, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + size_t rows = str_offsets.size(); + res_data.reserve(str_data.size() / 2); + res_offsets.reserve(rows); + + for (size_t i = 0; i(str_ref, index); + appendToResultColumn(res_ref, res_data, res_offsets); + } + } + + static void constantVector( + const String & str, + char delim, + const IColumn * index_column, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + size_t rows = index_column->size(); + res_data.reserve(str.size() * rows / 2); + res_offsets.reserve(rows); + + StringRef str_ref{str.data(), str.size()}; + for (size_t i=0; igetInt(i); + StringRef res_ref = substringIndex(str_ref, index); + appendToResultColumn(res_ref, res_data, res_offsets); + } + } + + static void appendToResultColumn( + const StringRef & res_ref, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) + { + size_t res_offset = res_data.size(); + res_data.resize(res_offset + res_ref.size + 1); + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], res_ref.data, res_ref.size); + res_offset += res_ref.size; + res_data[res_offset] = 0; + ++res_offset; + + res_offsets.emplace_back(res_offset); + } + + template + static StringRef substringIndex( + const StringRef & str, + Int64 index) + { + if (index == 0) + return {str.data, 0}; + + if (index > 0) + { + const auto * end = str.data + str.size; + const auto * pos = str.data; + Int64 i = 0; + while (i < index) + { + pos = find_first_symbols(pos, end); + + if (pos != end) + { + ++pos; + ++i; + } + else + return str; + } + return {str.data, static_cast(pos - str.data)}; + } + else + { + const auto * begin = str.data; + const auto * pos = str.data + str.size; + Int64 i = 0; + while (i < index) + { + const auto * next_pos = detail::find_last_symbols_sse2(begin, pos); + + if (next_pos != pos) + { + pos = next_pos; + ++i; + } + else + return str; + } + + return {pos + 1, static_cast(str.data + str.size - pos - 1)}; + } + } +}; +} + +} + From 9ee0476d32262653d67e406a0946fa91c0bff451 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 27 Jun 2023 11:59:01 +0300 Subject: [PATCH 0846/1997] Update src/Functions/GregorianDate.h Co-authored-by: Antonio Andelic --- src/Functions/GregorianDate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/GregorianDate.h b/src/Functions/GregorianDate.h index 31b3c8df0de..16fcb5ea061 100644 --- a/src/Functions/GregorianDate.h +++ b/src/Functions/GregorianDate.h @@ -317,7 +317,7 @@ namespace DB writeChar('0' + d , buf); } - return ReturnType(); + return ReturnType(true); } template From 0de5fcfbee1d4add8c0a350392163e5f46f23f97 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 27 Jun 2023 18:13:25 +0800 Subject: [PATCH 0847/1997] finish dev --- src/Functions/substringIndex.cpp | 127 +++++++++++++++++++++++++------ 1 file changed, 102 insertions(+), 25 deletions(-) diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp index 67699304320..0a5dfd00656 100644 --- a/src/Functions/substringIndex.cpp +++ b/src/Functions/substringIndex.cpp @@ -1,10 +1,14 @@ #include +#include #include +#include #include #include +#include #include -#include -#include "base/find_symbols.h" +#include +#include +#include namespace DB { @@ -67,7 +71,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { ColumnPtr column_string = arguments[0].column; ColumnPtr column_delim = arguments[1].column; @@ -85,7 +89,8 @@ public: } else { - // TODO + if (UTF8::countCodePoints(reinterpret_cast(delim.data()), delim.size()) != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single UTF-8 character", getName()); } auto column_res = ColumnString::create(); @@ -96,7 +101,7 @@ public: if (column_string_const) { String str = column_string_const->getValue(); - constantVector(str, delim[0], column_index.get(), vec_res, offsets_res); + constantVector(str, delim, column_index.get(), vec_res, offsets_res); } else { @@ -108,10 +113,10 @@ public: if (is_index_const) { Int64 index = column_index->getInt(0); - vectorConstant(col_str->getChars(), col_str->getOffsets(), delim[0], index, vec_res, offsets_res); + vectorConstant(col_str->getChars(), col_str->getOffsets(), delim, index, vec_res, offsets_res); } else - vectorVector(col_str->getChars(), col_str->getOffsets(), delim[0], column_index.get(), vec_res, offsets_res); + vectorVector(col_str->getChars(), col_str->getOffsets(), delim, column_index.get(), vec_res, offsets_res); } } @@ -119,7 +124,7 @@ protected: static void vectorVector( const ColumnString::Chars & str_data, const ColumnString::Offsets & str_offsets, - char delim, + const String & delim, const IColumn * index_column, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) @@ -128,11 +133,15 @@ protected: res_data.reserve(str_data.size() / 2); res_offsets.reserve(rows); - for (size_t i=0; i searcher + = !is_utf8 ? nullptr : std::make_unique(delim); + + for (size_t i = 0; i < rows; ++i) { StringRef str_ref{&str_data[str_offsets[i]], str_offsets[i] - str_offsets[i - 1] - 1}; Int64 index = index_column->getInt(i); - StringRef res_ref = substringIndex(str_ref, index); + StringRef res_ref + = !is_utf8 ? substringIndex(str_ref, index) : substringIndexUTF8(searcher.get(), str_ref, delim, index); appendToResultColumn(res_ref, res_data, res_offsets); } } @@ -140,7 +149,7 @@ protected: static void vectorConstant( const ColumnString::Chars & str_data, const ColumnString::Offsets & str_offsets, - char delim, + const String & delim, Int64 index, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) @@ -149,17 +158,21 @@ protected: res_data.reserve(str_data.size() / 2); res_offsets.reserve(rows); + std::unique_ptr searcher + = !is_utf8 ? nullptr : std::make_unique(delim); + for (size_t i = 0; i(str_ref, index); + StringRef res_ref + = !is_utf8 ? substringIndex(str_ref, index) : substringIndexUTF8(searcher.get(), str_ref, delim, index); appendToResultColumn(res_ref, res_data, res_offsets); } } static void constantVector( const String & str, - char delim, + const String & delim, const IColumn * index_column, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) @@ -168,11 +181,15 @@ protected: res_data.reserve(str.size() * rows / 2); res_offsets.reserve(rows); + std::unique_ptr searcher + = !is_utf8 ? nullptr : std::make_unique(delim); + StringRef str_ref{str.data(), str.size()}; for (size_t i=0; igetInt(i); - StringRef res_ref = substringIndex(str_ref, index); + StringRef res_ref + = !is_utf8 ? substringIndex(str_ref, index) : substringIndexUTF8(searcher.get(), str_ref, delim, index); appendToResultColumn(res_ref, res_data, res_offsets); } } @@ -190,18 +207,68 @@ protected: res_offsets.emplace_back(res_offset); } + static StringRef substringIndexUTF8( + const PositionCaseSensitiveUTF8::SearcherInBigHaystack * searcher, const StringRef & str_ref, const String & delim, Int64 index) + { + if (index == 0) + return {str_ref.data, 0}; + + const auto * begin = reinterpret_cast(str_ref.data); + const auto * end = reinterpret_cast(str_ref.data + str_ref.size); + const auto * pos = begin; + if (index > 0) + { + Int64 i = 0; + while (i < index) + { + pos = searcher->search(pos, end - pos); + + if (pos != end) + { + pos += delim.size(); + ++i; + } + else + return str_ref; + } + return {begin, static_cast(pos - begin - delim.size())}; + } + else + { + Int64 total = 0; + while (pos < end && end != (pos = searcher->search(pos, end - pos))) + { + pos += delim.size(); + ++total; + } + + if (total + index < 0) + return str_ref; + + Int64 index_from_left = total + 1 + index; + pos = begin; + Int64 i = 0; + while (pos < end && end != (pos = searcher->search(pos, end - pos)) && i < index_from_left) + { + pos += delim.size(); + ++i; + } + return {pos, static_cast(end - pos)}; + } + } + template static StringRef substringIndex( - const StringRef & str, + const StringRef & str_ref, Int64 index) { if (index == 0) - return {str.data, 0}; + return {str_ref.data, 0}; if (index > 0) { - const auto * end = str.data + str.size; - const auto * pos = str.data; + const auto * end = str_ref.data + str_ref.size; + const auto * pos = str_ref.data; Int64 i = 0; while (i < index) { @@ -213,18 +280,18 @@ protected: ++i; } else - return str; + return str_ref; } - return {str.data, static_cast(pos - str.data)}; + return {str_ref.data, static_cast(pos - str_ref.data - 1)}; } else { - const auto * begin = str.data; - const auto * pos = str.data + str.size; + const auto * begin = str_ref.data; + const auto * pos = str_ref.data + str_ref.size; Int64 i = 0; while (i < index) { - const auto * next_pos = detail::find_last_symbols_sse2(begin, pos); + const auto * next_pos = ::detail::find_last_symbols_sse2(begin, pos); if (next_pos != pos) { @@ -232,14 +299,24 @@ protected: ++i; } else - return str; + return str_ref; } - return {pos + 1, static_cast(str.data + str.size - pos - 1)}; + return {pos + 1, static_cast(str_ref.data + str_ref.size - pos - 1)}; } } }; } + +REGISTER_FUNCTION(SubstringIndex) +{ + factory.registerFunction>(); /// substringIndex + factory.registerFunction>(); /// substringIndexUTF8 + + factory.registerAlias("SUBSTRING_INDEX", "substringIndex", FunctionFactory::CaseInsensitive); +} + + } From dc2c1a42267ec8c0edf86b3ebbad00d7bc4fa42b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 27 Jun 2023 13:21:34 +0200 Subject: [PATCH 0848/1997] Convert 02003_memory_limit_in_client from expect to sh test (to fix flakiness) I don't see the reason for using expect here, and plus now the test is only 2 lines instead. Also this conversion should fix the test flakiness, since sometimes 60 seconds is not enough [1]: 2023.06.26 17:42:29.582168 [ 13456 ] {8a324061-a504-4843-be65-6abb5db4bc5c} executeQuery: Query span trace_id for opentelemetry log: 00000000-0000-0000-0000-000000000000 2023.06.26 17:42:29.584019 [ 13456 ] {8a324061-a504-4843-be65-6abb5db4bc5c} executeQuery: (from [::1]:38130) (comment: 02003_memory_limit_in_client.expect) SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) (stage: Complete) 2023.06.26 17:42:29.588131 [ 13456 ] {8a324061-a504-4843-be65-6abb5db4bc5c} InterpreterSelectQuery: FetchColumns -> Complete 2023.06.26 17:42:29.780980 [ 2566 ] {8a324061-a504-4843-be65-6abb5db4bc5c} MemoryTracker: Current memory usage (for query): 1.24 GiB. 2023.06.26 17:43:29.179098 [ 13456 ] {8a324061-a504-4843-be65-6abb5db4bc5c} TCPHandler: Client has dropped the connection, cancel the query. 2023.06.26 17:44:05.523345 [ 13456 ] {8a324061-a504-4843-be65-6abb5db4bc5c} executeQuery: Code: 394. DB::Exception: Query was cancelled or a client has unexpectedly dropped the connection. (QUERY_WAS_CANCELLED) (version 23.6.1.1) (from [::1]:38130) (comment: 02003_memory_limit_in_client.expect) (in query: SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000)) 2023.06.26 17:44:05.680631 [ 13456 ] {8a324061-a504-4843-be65-6abb5db4bc5c} MemoryTracker: Peak memory usage (for query): 1.25 GiB. 2023.06.26 17:44:05.680747 [ 13456 ] {8a324061-a504-4843-be65-6abb5db4bc5c} TCPHandler: Processed in 96.108615451 sec. [1]: https://s3.amazonaws.com/clickhouse-test-reports/51407/e92035072b7b3367da12089b28041893eb90e636/stateless_tests__tsan__[2_5].html And it is either expect is too slow, or server did not respond for awhile, but this was a TSan build and trace_log is empty, so it is not possible to check which one was the problem. Signed-off-by: Azat Khuzhin --- .../02003_memory_limit_in_client.expect | 64 ------------------- .../02003_memory_limit_in_client.reference | 1 + .../02003_memory_limit_in_client.sh | 8 +++ 3 files changed, 9 insertions(+), 64 deletions(-) delete mode 100755 tests/queries/0_stateless/02003_memory_limit_in_client.expect create mode 100755 tests/queries/0_stateless/02003_memory_limit_in_client.sh diff --git a/tests/queries/0_stateless/02003_memory_limit_in_client.expect b/tests/queries/0_stateless/02003_memory_limit_in_client.expect deleted file mode 100755 index 377656fa641..00000000000 --- a/tests/queries/0_stateless/02003_memory_limit_in_client.expect +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/expect -f - -# This is a test for system.warnings. Testing in interactive mode is necessary, -# as we want to see certain warnings from client - -set basedir [file dirname $argv0] -set basename [file tail $argv0] -exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 -set history_file $env(CLICKHOUSE_TMP)/$basename.history - -log_user 0 -set timeout 60 -match_max 100000 - -expect_after { - # Do not ignore eof from expect - -i $any_spawn_id eof { exp_continue } - # A default timeout action is to do nothing, change it to fail - -i $any_spawn_id timeout { exit 1 } -} - -# -# Check that the query will fail in clickhouse-client -# -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1 --history_file=$history_file" -expect ":) " - -send -- "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000)\r" -expect "Code: 241" - -expect ":) " - -# Exit. -send -- "\4" -expect eof - -# -# Check that the query will fail in clickhouse-client -# -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1 --history_file=$history_file" -expect ":) " - -send -- "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000)\r" -expect "Code: 241" - -expect ":) " - -# Exit. -send -- "\4" -expect eof - -# -# Check that the query will not fail (due to max_untracked_memory) -# -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1 --history_file=$history_file" -expect ":) " - -send -- "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000\r" -expect "60000" -expect ":) " - -# Exit. -send -- "\4" -expect eof diff --git a/tests/queries/0_stateless/02003_memory_limit_in_client.reference b/tests/queries/0_stateless/02003_memory_limit_in_client.reference index e69de29bb2d..541b3a18e90 100644 --- a/tests/queries/0_stateless/02003_memory_limit_in_client.reference +++ b/tests/queries/0_stateless/02003_memory_limit_in_client.reference @@ -0,0 +1 @@ +60000 diff --git a/tests/queries/0_stateless/02003_memory_limit_in_client.sh b/tests/queries/0_stateless/02003_memory_limit_in_client.sh new file mode 100755 index 00000000000..2d2493828c8 --- /dev/null +++ b/tests/queries/0_stateless/02003_memory_limit_in_client.sh @@ -0,0 +1,8 @@ +#!/usr/bin/bash -f + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --max_memory_usage_in_client=1 -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client=0 -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" From acea2d66f0c4abad3c700ef2153977a5bcfba819 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 27 Jun 2023 13:40:07 +0200 Subject: [PATCH 0849/1997] Fix --- tests/integration/test_disk_over_web_server/test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index 719de5e8bef..d62adfb3343 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -45,6 +45,8 @@ def cluster(): f"CREATE TABLE data{i} (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'def', min_bytes_for_wide_part=1;" ) + node.query("SYSTEM STOP MERGES") + for _ in range(10): node.query( f"INSERT INTO data{i} SELECT number FROM numbers(500000 * {i+1})" From ea1177493917a55e6f5f611f540d41199dbbc145 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Tue, 27 Jun 2023 11:47:24 +0000 Subject: [PATCH 0850/1997] Skip the analysis of headers argument with the new analyzer --- src/TableFunctions/TableFunctionS3.cpp | 20 ++++++++++++++++++++ src/TableFunctions/TableFunctionS3.h | 4 ++++ 2 files changed, 24 insertions(+) diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index c8cc0cddd30..8b519168b4a 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -18,6 +18,8 @@ #include #include #include "registerTableFunctions.h" +#include +#include #include @@ -32,6 +34,24 @@ namespace ErrorCodes } +std::vector TableFunctionS3::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const +{ + auto & table_function_node = query_node_table_function->as(); + auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes(); + size_t table_function_arguments_size = table_function_arguments_nodes.size(); + + std::vector result; + + for (size_t i = 0; i < table_function_arguments_size; ++i) + { + auto * function_node = table_function_arguments_nodes[i]->as(); + if (function_node && function_node->getFunctionName() == "headers") + result.push_back(i); + } + + return result; +} + /// This is needed to avoid copy-pase. Because s3Cluster arguments only differ in additional argument (first) - cluster name void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context) { diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h index c983bec9bf4..d308f469236 100644 --- a/src/TableFunctions/TableFunctionS3.h +++ b/src/TableFunctions/TableFunctionS3.h @@ -73,6 +73,10 @@ protected: mutable StorageS3::Configuration configuration; ColumnsDescription structure_hint; + +private: + + std::vector skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override; }; } From d4c8021f8078425ae3e7a74e32477f78307a0999 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 27 Jun 2023 12:41:21 +0000 Subject: [PATCH 0851/1997] Extend consistency check for ColumnObject + check that there arer subcolumns when there are rows --- src/Columns/ColumnObject.cpp | 14 ++++++++++++-- .../02789_object_type_invalid_num_of_rows.sql | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 07872774559..4fa0c3ee41a 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -626,6 +626,9 @@ ColumnObject::ColumnObject(Subcolumns && subcolumns_, bool is_nullable_) void ColumnObject::checkConsistency() const { + if (num_rows && subcolumns.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnObject is inconsistent: it has no subcolumns, but has {} rows", num_rows); + if (subcolumns.empty()) return; @@ -642,9 +645,9 @@ void ColumnObject::checkConsistency() const size_t ColumnObject::size() const { -#ifndef NDEBUG +// #ifndef NDEBUG checkConsistency(); -#endif +// #endif return num_rows; } @@ -711,6 +714,8 @@ void ColumnObject::insert(const Field & field) } ++num_rows; + + checkConsistency(); } void ColumnObject::insertDefault() @@ -719,6 +724,8 @@ void ColumnObject::insertDefault() entry->data.insertDefault(); ++num_rows; + + checkConsistency(); } Field ColumnObject::operator[](size_t n) const @@ -776,6 +783,8 @@ void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t len num_rows += length; finalize(); + + checkConsistency(); } void ColumnObject::popBack(size_t length) @@ -784,6 +793,7 @@ void ColumnObject::popBack(size_t length) entry->data.popBack(length); num_rows -= length; + checkConsistency(); } template diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql index 8ced133c8eb..a9c8a844aa0 100644 --- a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql @@ -1,2 +1,2 @@ set allow_experimental_object_type=1; -SELECT '0.02' GROUP BY GROUPING SETS (('6553.6'), (CAST(CAST('{"x" : 1}', 'Object(Nullable(\'json\'))'), 'Object(\'json\')'))) settings max_threads=1; -- { serverError NOT_IMPLEMENTED } +SELECT '0.02' GROUP BY GROUPING SETS (('6553.6'), (CAST('{"x" : 1}', 'Object(\'json\')'))) settings max_threads=1; -- { serverError NOT_IMPLEMENTED } From 46a69829ff5a50867c21013b2ae9d28f2331876e Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 27 Jun 2023 15:04:47 +0200 Subject: [PATCH 0852/1997] Delay shutdown of system and temporary databases --- src/Interpreters/DatabaseCatalog.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 8d3fa91a7fe..9a96bd09d82 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -216,8 +216,22 @@ void DatabaseCatalog::shutdownImpl() /// We still hold "databases" (instead of std::move) for Buffer tables to flush data correctly. + /// Delay shutdown of temporary and system databases. They will be shutdown last. + std::vector databases_with_delayed_shutdown; for (auto & database : current_databases) + { + if (database.first == TEMPORARY_DATABASE || database.first == SYSTEM_DATABASE) + { + databases_with_delayed_shutdown.push_back(database.second); + continue; + } database.second->shutdown(); + } + + for (auto & database : databases_with_delayed_shutdown) + { + database->shutdown(); + } { std::lock_guard lock(tables_marked_dropped_mutex); From 89de7a64eb3d36c3cbfed7388338e7352d216c78 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 27 Jun 2023 15:06:31 +0200 Subject: [PATCH 0853/1997] Fix memory leakage in CompressionCodecDeflateQpl. --- src/Compression/CompressionCodecDeflateQpl.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Compression/CompressionCodecDeflateQpl.cpp b/src/Compression/CompressionCodecDeflateQpl.cpp index 4a2866d152d..859a9c2463a 100644 --- a/src/Compression/CompressionCodecDeflateQpl.cpp +++ b/src/Compression/CompressionCodecDeflateQpl.cpp @@ -9,6 +9,7 @@ #include #include "libaccel_config.h" #include +#include namespace DB { @@ -34,6 +35,7 @@ DeflateQplJobHWPool::DeflateQplJobHWPool() // loop all configured workqueue size to get maximum job number. accfg_ctx * ctx_ptr = nullptr; auto ctx_status = accfg_new(&ctx_ptr); + SCOPE_EXIT({ accfg_unref(ctx_ptr); }); if (ctx_status == 0) { auto * dev_ptr = accfg_device_get_first(ctx_ptr); From c475e706d34761d7b5ff94b5f186e6f0e5479436 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 27 Jun 2023 13:20:00 +0000 Subject: [PATCH 0854/1997] Fix optimization to move functions before sorting. --- .../Optimizations/liftUpFunctions.cpp | 20 +++ ..._and_columns_with_same_names_bug.reference | 0 ...orting_and_columns_with_same_names_bug.sql | 133 ++++++++++++++++++ 3 files changed, 153 insertions(+) create mode 100644 tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug.reference create mode 100644 tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug.sql diff --git a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp index c3b03a5385f..b2c3f3b4a6d 100644 --- a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp +++ b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -28,6 +29,22 @@ const DB::DataStream & getChildOutputStream(DB::QueryPlan::Node & node) namespace DB::QueryPlanOptimizations { +/// This is a check that output columns with the same name have the same types. +/// This is ok to have such a situation in DAG, but not for Block. +/// TODO: we should have a different data structure for headers. +static bool areOutputsAreConvertableToBlock(const ActionsDAG::NodeRawConstPtrs & outputs) +{ + std::unordered_map name_to_type; + for (const auto & output : outputs) + { + auto [it, inserted] = name_to_type.emplace(output->result_name, output->result_type.get()); + if (!inserted && !it->second->equals(*output->result_type)) + return false; + } + + return true; +} + size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) { if (parent_node->children.size() != 1) @@ -57,6 +74,9 @@ size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan: if (unneeded_for_sorting->trivial()) return 0; + if (!areOutputsAreConvertableToBlock(needed_for_sorting->getOutputs())) + return 0; + // Sorting (parent_node) -> Expression (child_node) auto & node_with_needed = nodes.emplace_back(); std::swap(node_with_needed.children, child_node->children); diff --git a/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug.reference b/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug.sql b/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug.sql new file mode 100644 index 00000000000..4a9ede36335 --- /dev/null +++ b/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug.sql @@ -0,0 +1,133 @@ +drop table if exists test; +drop table if exists test1; + +CREATE TABLE test +( + `pt` String, + `count_distinct_exposure_uv` AggregateFunction(uniqHLL12, Int64) +) +ENGINE = AggregatingMergeTree +ORDER BY pt; + +SELECT * +FROM +( + SELECT m0.pt AS pt + ,m0.`exposure_uv` AS exposure_uv + ,round(m2.exposure_uv,4) AS exposure_uv_hb_last_value + ,if(m2.exposure_uv IS NULL OR m2.exposure_uv = 0,NULL,round((m0.exposure_uv - m2.exposure_uv) * 1.0 / m2.exposure_uv,4)) AS exposure_uv_hb_diff_percent + ,round(m1.exposure_uv,4) AS exposure_uv_tb_last_value + ,if(m1.exposure_uv IS NULL OR m1.exposure_uv = 0,NULL,round((m0.exposure_uv - m1.exposure_uv) * 1.0 / m1.exposure_uv,4)) AS exposure_uv_tb_diff_percent + FROM + ( + SELECT m0.pt AS pt + ,`exposure_uv` AS `exposure_uv` + FROM + ( + SELECT pt AS pt + ,CASE WHEN COUNT(`exposure_uv`) > 0 THEN AVG(`exposure_uv`) ELSE 0 END AS `exposure_uv` + FROM + ( + SELECT pt AS pt + ,uniqHLL12Merge(count_distinct_exposure_uv) AS `exposure_uv` + FROM test + GROUP BY pt + ) m + GROUP BY pt + ) m0 + ) m0 + LEFT JOIN + ( + SELECT m0.pt AS pt + ,`exposure_uv` AS `exposure_uv` + FROM + ( + SELECT formatDateTime(addYears(parseDateTimeBestEffort(pt),1),'%Y%m%d') AS pt + ,CASE WHEN COUNT(`exposure_uv`) > 0 THEN AVG(`exposure_uv`) ELSE 0 END AS `exposure_uv` + FROM + ( + SELECT pt AS pt + ,uniqHLL12Merge(count_distinct_exposure_uv) AS `exposure_uv` + FROM test + GROUP BY pt + ) m + GROUP BY pt + ) m0 + ) m1 + ON m0.pt = m1.pt + LEFT JOIN + ( + SELECT m0.pt AS pt + ,`exposure_uv` AS `exposure_uv` + FROM + ( + SELECT formatDateTime(addDays(toDate(parseDateTimeBestEffort(pt)),1),'%Y%m%d') AS pt + ,CASE WHEN COUNT(`exposure_uv`) > 0 THEN AVG(`exposure_uv`) ELSE 0 END AS `exposure_uv` + FROM + ( + SELECT pt AS pt + ,uniqHLL12Merge(count_distinct_exposure_uv) AS `exposure_uv` + FROM test + GROUP BY pt + ) m + GROUP BY pt + ) m0 + ) m2 + ON m0.pt = m2.pt +) c0 +ORDER BY pt ASC, exposure_uv DESC +settings join_use_nulls = 1; + +CREATE TABLE test1 +( + `pt` String, + `exposure_uv` Float64 +) +ENGINE = Memory; + +SELECT * +FROM +( + SELECT m0.pt + ,m0.exposure_uv AS exposure_uv + ,round(m2.exposure_uv,4) + FROM + ( + SELECT pt + ,exposure_uv + FROM test1 + ) m0 + LEFT JOIN + ( + SELECT pt + ,exposure_uv + FROM test1 + ) m1 + ON m0.pt = m1.pt + LEFT JOIN + ( + SELECT pt + ,exposure_uv + FROM test1 + ) m2 + ON m0.pt = m2.pt +) c0 +ORDER BY exposure_uv +settings join_use_nulls = 1; + +SELECT + pt AS pt, + exposure_uv AS exposure_uv +FROM +( + SELECT + pt + FROM test1 +) AS m0 +FULL OUTER JOIN +( + SELECT + pt, + exposure_uv + FROM test1 +) AS m1 ON m0.pt = m1.pt; From ddca9ece501e7e5554fbf439008f990997c7f2d1 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 27 Jun 2023 15:32:14 +0200 Subject: [PATCH 0855/1997] Increase retries --- tests/integration/test_multiple_disks/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index 54e7f6dd8ee..b5606ee8bc2 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -846,7 +846,7 @@ def test_start_stop_moves(start_cluster, name, engine): node1.query("SYSTEM START MOVES {}".format(name)) # wait sometime until background backoff finishes - retry = 30 + retry = 60 i = 0 while not sum(1 for x in used_disks if x == "jbod1") <= 2 and i < retry: time.sleep(1) From 99c9830737941e133814c75cd811ce4c5576b4fa Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 27 Jun 2023 13:34:58 +0000 Subject: [PATCH 0856/1997] Docs: Add more blog posts to join docs --- docs/en/sql-reference/statements/select/join.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 08ffae838f8..7971b3ba275 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -21,6 +21,9 @@ Expressions from `ON` clause and columns from `USING` clause are called “join ## Related Content - Blog: [ClickHouse: A Blazingly Fast DBMS with Full SQL Join Support - Part 1](https://clickhouse.com/blog/clickhouse-fully-supports-joins) +- Blog: [ClickHouse: A Blazingly Fast DBMS with Full SQL Join Support - Under the Hood - Part 2](https://clickhouse.com/blog/clickhouse-fully-supports-joins-hash-joins-part2) +- Blog: [ClickHouse: A Blazingly Fast DBMS with Full SQL Join Support - Under the Hood - Part 3](https://clickhouse.com/blog/clickhouse-fully-supports-joins-full-sort-partial-merge-part3) +- Blog: [ClickHouse: A Blazingly Fast DBMS with Full SQL Join Support - Under the Hood - Part 4](https://clickhouse.com/blog/clickhouse-fully-supports-joins-direct-join-part4) ## Supported Types of JOIN From 648b647f5f878c4da0265cfae3dc39e50021170b Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 27 Jun 2023 15:49:26 +0200 Subject: [PATCH 0857/1997] Fix race --- src/IO/BoundedReadBuffer.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/IO/BoundedReadBuffer.h b/src/IO/BoundedReadBuffer.h index 183dbedb78b..eb65857e83a 100644 --- a/src/IO/BoundedReadBuffer.h +++ b/src/IO/BoundedReadBuffer.h @@ -31,7 +31,8 @@ public: private: std::optional read_until_position; - size_t file_offset_of_buffer_end = 0; + /// atomic because can be used in log or exception messages while being updated. + std::atomic file_offset_of_buffer_end = 0; }; } From b8866e01286c4e1adeb10554b823eb1a79b9119c Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 27 Jun 2023 16:11:27 +0200 Subject: [PATCH 0858/1997] Fix flaky unit test --- src/Interpreters/tests/gtest_lru_file_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp index 3cba1e48e1e..429f940a7c7 100644 --- a/src/Interpreters/tests/gtest_lru_file_cache.cpp +++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp @@ -475,7 +475,7 @@ TEST_F(FileCacheTest, get) } cv.notify_one(); - file_segment2.wait(file_segment2.range().left); + file_segment2.wait(file_segment2.range().right); file_segment2.complete(); ASSERT_TRUE(file_segment2.state() == State::DOWNLOADED); }); From fe9702192961fe2073713d9b9f033c6177666d22 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 27 Jun 2023 16:54:14 +0200 Subject: [PATCH 0859/1997] add missing finalize calls in buffers --- src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.cpp | 1 + src/Dictionaries/HTTPDictionarySource.cpp | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.cpp b/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.cpp index 284e7740a58..fcb8ebd1f22 100644 --- a/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.cpp +++ b/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.cpp @@ -239,6 +239,7 @@ QueryPipeline ExternalDictionaryLibraryBridgeHelper::loadKeys(const Block & requ WriteBufferFromOStream out_buffer(os); auto output_format = getContext()->getOutputFormat(ExternalDictionaryLibraryBridgeHelper::DEFAULT_FORMAT, out_buffer, requested_block.cloneEmpty()); formatBlock(output_format, requested_block); + out_buffer.finalize(); }; return QueryPipeline(loadBase(uri, out_stream_callback)); } diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp index 757a3b1819b..55bff868dee 100644 --- a/src/Dictionaries/HTTPDictionarySource.cpp +++ b/src/Dictionaries/HTTPDictionarySource.cpp @@ -135,6 +135,7 @@ QueryPipeline HTTPDictionarySource::loadIds(const std::vector & ids) WriteBufferFromOStream out_buffer(ostr); auto output_format = context->getOutputFormatParallelIfPossible(configuration.format, out_buffer, block.cloneEmpty()); formatBlock(output_format, block); + out_buffer.finalize(); }; Poco::URI uri(configuration.url); @@ -164,6 +165,7 @@ QueryPipeline HTTPDictionarySource::loadKeys(const Columns & key_columns, const WriteBufferFromOStream out_buffer(ostr); auto output_format = context->getOutputFormatParallelIfPossible(configuration.format, out_buffer, block.cloneEmpty()); formatBlock(output_format, block); + out_buffer.finalize(); }; Poco::URI uri(configuration.url); From e34597e43d09c3c164fb516a544f82d347be6afa Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 27 Jun 2023 18:36:32 +0300 Subject: [PATCH 0860/1997] Fix tabulation --- src/Functions/dateDiff.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index 253ed703bb9..6bfbbb7c735 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -399,7 +399,7 @@ public: impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "millisecond" || unit == "ms") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "microsecond" || unit == "us" || unit == "u") + else if (unit == "microsecond" || unit == "us" || unit == "u") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else throw Exception(ErrorCodes::BAD_ARGUMENTS, From 47480de25a4565e520ffc0df1df64d3cd5cf8b6c Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Tue, 27 Jun 2023 16:38:29 +0000 Subject: [PATCH 0861/1997] Fix a crash in s3 and s3Cluster functions --- src/TableFunctions/TableFunctionS3.cpp | 5 +++-- tests/queries/0_stateless/02772_s3_crash.reference | 0 tests/queries/0_stateless/02772_s3_crash.sql | 5 +++++ 3 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02772_s3_crash.reference create mode 100644 tests/queries/0_stateless/02772_s3_crash.sql diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 8b519168b4a..0f3078b1ca6 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -61,13 +61,14 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context } else { - if (args.empty() || args.size() > 6) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature()); auto * header_it = StorageURL::collectHeaders(args, configuration.headers_from_ast, context); if (header_it != args.end()) args.erase(header_it); + if (args.empty() || args.size() > 6) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature()); + for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); diff --git a/tests/queries/0_stateless/02772_s3_crash.reference b/tests/queries/0_stateless/02772_s3_crash.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02772_s3_crash.sql b/tests/queries/0_stateless/02772_s3_crash.sql new file mode 100644 index 00000000000..5cad83def63 --- /dev/null +++ b/tests/queries/0_stateless/02772_s3_crash.sql @@ -0,0 +1,5 @@ +-- Tags: no-fasttest +-- Tag no-fasttest: Depends on AWS + +SELECT * FROM s3(headers('random_header' = 'value')); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', headers('random_header' = 'value')); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } From 98721ca36c87379e6899837b46cf4b9b5810a969 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 27 Jun 2023 16:44:50 +0000 Subject: [PATCH 0862/1997] New epoch From f3f604ace491e35b251d8be928c7110d83978d9f Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 27 Jun 2023 16:45:34 +0000 Subject: [PATCH 0863/1997] added table with pk size --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 17 +++++++++++++++++ src/Storages/MergeTree/IMergeTreeDataPart.h | 1 + src/Storages/System/StorageSystemParts.cpp | 3 +++ 3 files changed, 21 insertions(+) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index c9930e61e98..55db22d6105 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1,5 +1,6 @@ #include "IMergeTreeDataPart.h" #include "Storages/MergeTree/IDataPartStorage.h" +#include "base/types.h" #include #include @@ -1800,6 +1801,22 @@ MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & di return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, storage.log); } +UInt64 IMergeTreeDataPart::getIndexSizeFromFile() const +{ + auto metadata_snapshot = storage.getInMemoryMetadataPtr(); + if (parent_part) + metadata_snapshot = metadata_snapshot->projections.get(name).metadata; + const auto & pk = metadata_snapshot->getPrimaryKey(); + if (!pk.column_names.empty()) + { + String file = "primary" + getIndexExtension(false); + if (checksums.files.contains("primary" + getIndexExtension(true))) + file = "primary" + getIndexExtension(true); + return getFileSizeOrZero(file); + } + return 0; +} + void IMergeTreeDataPart::checkConsistencyBase() const { auto metadata_snapshot = storage.getInMemoryMetadataPtr(); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index a36634d2cf9..b3c70c99d2e 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -323,6 +323,7 @@ public: UInt64 getIndexSizeInBytes() const; UInt64 getIndexSizeInAllocatedBytes() const; UInt64 getMarksCount() const; + UInt64 getIndexSizeFromFile() const; UInt64 getBytesOnDisk() const { return bytes_on_disk; } void setBytesOnDisk(UInt64 bytes_on_disk_) { bytes_on_disk = bytes_on_disk_; } diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 86ecb336b51..e1e8ba1aa00 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -57,6 +57,7 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"bytes_on_disk", std::make_shared()}, {"data_compressed_bytes", std::make_shared()}, {"data_uncompressed_bytes", std::make_shared()}, + {"primary_key_size", std::make_shared()}, {"marks_bytes", std::make_shared()}, {"secondary_indices_compressed_bytes", std::make_shared()}, {"secondary_indices_uncompressed_bytes", std::make_shared()}, @@ -168,6 +169,8 @@ void StorageSystemParts::processNextStorage( columns[res_index++]->insert(columns_size.data_compressed); if (columns_mask[src_index++]) columns[res_index++]->insert(columns_size.data_uncompressed); + if (columns_mask[src_index++]) + columns[res_index++]->insert(part->getIndexSizeFromFile()); if (columns_mask[src_index++]) columns[res_index++]->insert(columns_size.marks); if (columns_mask[src_index++]) From 40f721ae4f290c76d492260d740c1eb37df20e4c Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 27 Jun 2023 17:14:33 +0000 Subject: [PATCH 0864/1997] fix possible race on shutdown wait --- programs/server/Server.cpp | 4 ++-- src/Server/waitServersToFinish.cpp | 11 +++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d2d8a0d07fb..41df7a119d1 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1523,7 +1523,7 @@ try LOG_INFO(log, "Closed all listening sockets."); if (current_connections > 0) - current_connections = waitServersToFinish(servers_to_start_before_tables, config().getInt("shutdown_wait_unfinished", 5)); + current_connections = waitServersToFinish(servers_to_start_before_tables, servers_lock, config().getInt("shutdown_wait_unfinished", 5)); if (current_connections) LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections); @@ -1827,7 +1827,7 @@ try global_context->getProcessList().killAllQueries(); if (current_connections) - current_connections = waitServersToFinish(servers, config().getInt("shutdown_wait_unfinished", 5)); + current_connections = waitServersToFinish(servers, servers_lock, config().getInt("shutdown_wait_unfinished", 5)); if (current_connections) LOG_WARNING(log, "Closed connections. But {} remain." diff --git a/src/Server/waitServersToFinish.cpp b/src/Server/waitServersToFinish.cpp index f2e36fae86c..3b07c082067 100644 --- a/src/Server/waitServersToFinish.cpp +++ b/src/Server/waitServersToFinish.cpp @@ -5,7 +5,7 @@ namespace DB { -size_t waitServersToFinish(std::vector & servers, size_t seconds_to_wait) +size_t waitServersToFinish(std::vector & servers, std::mutex & mutex, size_t seconds_to_wait) { const size_t sleep_max_ms = 1000 * seconds_to_wait; const size_t sleep_one_ms = 100; @@ -15,10 +15,13 @@ size_t waitServersToFinish(std::vector & servers, siz { current_connections = 0; - for (auto & server : servers) { - server.stop(); - current_connections += server.currentConnections(); + std::scoped_lock lock{mutex}; + for (auto & server : servers) + { + server.stop(); + current_connections += server.currentConnections(); + } } if (!current_connections) From 0b6d367bdbe73e1ea9b1f179f315cba2185ffe94 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 27 Jun 2023 17:14:49 +0000 Subject: [PATCH 0865/1997] Parts mover: lock between getActiveContainingPart and swapActivePart --- src/Storages/MergeTree/MergeTreeData.cpp | 9 +++++++-- src/Storages/MergeTree/MergeTreeData.h | 3 ++- src/Storages/MergeTree/MergeTreePartsMover.cpp | 7 +++++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e9c3a7f66ae..5e17559acc0 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4529,9 +4529,8 @@ MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart( } -void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy) +void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy, DataPartsLock &) { - auto lock = lockParts(); for (auto original_active_part : getDataPartsStateRange(DataPartState::Active)) // NOLINT (copy is intended) { if (part_copy->name == original_active_part->name) @@ -4587,6 +4586,12 @@ MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String & return getActiveContainingPart(part_info); } +MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String & part_name, DataPartsLock & lock) const +{ + auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); + return getActiveContainingPart(part_info, DataPartState::Active, lock); +} + MergeTreeData::DataPartsVector MergeTreeData::getVisibleDataPartsVectorInPartition(ContextPtr local_context, const String & partition_id) const { return getVisibleDataPartsVectorInPartition(local_context->getCurrentTransaction().get(), partition_id); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index b27392b355b..c821a436a76 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -504,12 +504,13 @@ public: /// Returns a part in Active state with the given name or a part containing it. If there is no such part, returns nullptr. DataPartPtr getActiveContainingPart(const String & part_name) const; + DataPartPtr getActiveContainingPart(const String & part_name, DataPartsLock & lock) const; DataPartPtr getActiveContainingPart(const MergeTreePartInfo & part_info) const; DataPartPtr getActiveContainingPart(const MergeTreePartInfo & part_info, DataPartState state, DataPartsLock & lock) const; /// Swap part with it's identical copy (possible with another path on another disk). /// If original part is not active or doesn't exist exception will be thrown. - void swapActivePart(MergeTreeData::DataPartPtr part_copy); + void swapActivePart(MergeTreeData::DataPartPtr part_copy, DataPartsLock &); /// Returns all parts in specified partition DataPartsVector getVisibleDataPartsVectorInPartition(MergeTreeTransaction * txn, const String & partition_id, DataPartsLock * acquired_lock = nullptr) const; diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index 8fa4ac6c78a..a8f34ba4cec 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -263,7 +263,10 @@ void MergeTreePartsMover::swapClonedPart(TemporaryClonedPart & cloned_part) cons if (moves_blocker.isCancelled()) throw Exception(ErrorCodes::ABORTED, "Cancelled moving parts."); - auto active_part = data->getActiveContainingPart(cloned_part.part->name); + /// `getActiveContainingPart` and `swapActivePart` are called under the same lock + /// to prevent part becoming inactive between calls + auto part_lock = data->lockParts(); + auto active_part = data->getActiveContainingPart(cloned_part.part->name, part_lock); /// It's ok, because we don't block moving parts for merges or mutations if (!active_part || active_part->name != cloned_part.part->name) @@ -284,7 +287,7 @@ void MergeTreePartsMover::swapClonedPart(TemporaryClonedPart & cloned_part) cons cloned_part.part->renameTo(active_part->name, false); /// TODO what happen if server goes down here? - data->swapActivePart(cloned_part.part); + data->swapActivePart(cloned_part.part, part_lock); LOG_TRACE(log, "Part {} was moved to {}", cloned_part.part->name, cloned_part.part->getDataPartStorage().getFullPath()); From 79b6792548c065d6795c5a167b29da44aa91dae7 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 27 Jun 2023 17:15:11 +0000 Subject: [PATCH 0866/1997] Enable allow_remove_stale_moving_parts for stateless tests --- tests/config/config.d/merge_tree_old_dirs_cleanup.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/config/config.d/merge_tree_old_dirs_cleanup.xml b/tests/config/config.d/merge_tree_old_dirs_cleanup.xml index 2b8ea63b63d..e6b50724c97 100644 --- a/tests/config/config.d/merge_tree_old_dirs_cleanup.xml +++ b/tests/config/config.d/merge_tree_old_dirs_cleanup.xml @@ -5,4 +5,5 @@ 5 + true From d3b8b454f853c63da4b94ec97afdcb1528ffdc22 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Tue, 27 Jun 2023 19:19:58 +0200 Subject: [PATCH 0867/1997] Fix segfault in MathUnary --- src/Functions/FunctionMathUnary.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Functions/FunctionMathUnary.h b/src/Functions/FunctionMathUnary.h index 6e4bff7122d..9f400932356 100644 --- a/src/Functions/FunctionMathUnary.h +++ b/src/Functions/FunctionMathUnary.h @@ -154,6 +154,8 @@ private: using ColVecType = ColumnVectorOrDecimal; const auto col_vec = checkAndGetColumn(col.column.get()); + if (col_vec == nullptr) + return false; return (res = execute(col_vec)) != nullptr; }; From c59ddf0c668c0a345c88df98b249b79cd58a8fcb Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 27 Jun 2023 17:27:28 +0000 Subject: [PATCH 0868/1997] Resolved style check --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 55db22d6105..eb35fe178c4 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1801,7 +1801,7 @@ MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & di return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, storage.log); } -UInt64 IMergeTreeDataPart::getIndexSizeFromFile() const +UInt64 IMergeTreeDataPart::getIndexSizeFromFile() const { auto metadata_snapshot = storage.getInMemoryMetadataPtr(); if (parent_part) From 5a39960e0374f355b22260bea1095676b50e92c6 Mon Sep 17 00:00:00 2001 From: Nikifor Seriakov Date: Tue, 27 Jun 2023 21:32:39 +0400 Subject: [PATCH 0869/1997] Update docs/en/interfaces/formats.md Fixed RawBLOB comparison lists formatting. --- docs/en/interfaces/formats.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 79baf04d75d..378a1c46d93 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -2454,18 +2454,22 @@ In this format, all input data is read to a single value. It is possible to pars The result is output in binary format without delimiters and escaping. If more than one value is output, the format is ambiguous, and it will be impossible to read the data back. Below is a comparison of the formats `RawBLOB` and [TabSeparatedRaw](#tabseparatedraw). + `RawBLOB`: - data is output in binary format, no escaping; - there are no delimiters between values; - no newline at the end of each value. -[TabSeparatedRaw] (#tabseparatedraw): + +`TabSeparatedRaw`: - data is output without escaping; - the rows contain values separated by tabs; - there is a line feed after the last value in every row. The following is a comparison of the `RawBLOB` and [RowBinary](#rowbinary) formats. + `RawBLOB`: - String fields are output without being prefixed by length. + `RowBinary`: - String fields are represented as length in varint format (unsigned [LEB128] (https://en.wikipedia.org/wiki/LEB128)), followed by the bytes of the string. From a013ec1abaccea5599b17e69d7a923addff76e4c Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 27 Jun 2023 17:42:19 +0000 Subject: [PATCH 0870/1997] added field to tests --- .../queries/0_stateless/02117_show_create_table_system.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 09cc62dac00..f2c85a4d0ba 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -466,6 +466,7 @@ CREATE TABLE system.parts `bytes_on_disk` UInt64, `data_compressed_bytes` UInt64, `data_uncompressed_bytes` UInt64, + `primary_key_size` UInt64, `marks_bytes` UInt64, `secondary_indices_compressed_bytes` UInt64, `secondary_indices_uncompressed_bytes` UInt64, From f5327e79bb37c3cc061b8704ffdb85aa4f0b31c4 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 27 Jun 2023 20:18:38 +0200 Subject: [PATCH 0871/1997] fix a logical error on mutation --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 1 + src/Storages/MergeTree/IMergeTreeDataPart.h | 3 +++ src/Storages/MergeTree/MutateTask.cpp | 16 +++++++++------- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index cf2d1b19326..85edba84296 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1362,6 +1362,7 @@ void IMergeTreeDataPart::loadColumns(bool require) else { loaded_metadata_version = metadata_snapshot->getMetadataVersion(); + old_part_with_no_metadata_version_on_disk = true; } setColumns(loaded_columns, infos, loaded_metadata_version); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index fd73d802579..6dd82493398 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -249,6 +249,9 @@ public: /// Flag for keep S3 data when zero-copy replication over S3 turned on. mutable bool force_keep_shared_data = false; + /// Some old parts don't have metadata version, so we set it to the current table's version when loading the part + bool old_part_with_no_metadata_version_on_disk = false; + using TTLInfo = MergeTreeDataPartTTLInfo; using TTLInfos = MergeTreeDataPartTTLInfos; diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index b98b0844ee7..a19b9daca0e 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -153,20 +153,22 @@ static void splitAndModifyMutationCommands( /// But we don't know for sure what happened. auto part_metadata_version = part->getMetadataVersion(); auto table_metadata_version = metadata_snapshot->getMetadataVersion(); - /// StorageMergeTree does not have metadata version - if (table_metadata_version <= part_metadata_version && part->storage.supportsReplication()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} with metadata version {} contains column {} that is absent " - "in table {} with metadata version {}", - part->name, part_metadata_version, column.name, - part->storage.getStorageID().getNameForLogs(), table_metadata_version); - if (part_metadata_version < table_metadata_version) + bool allow_equal_versions = part_metadata_version == table_metadata_version && part->old_part_with_no_metadata_version_on_disk; + if (part_metadata_version < table_metadata_version || allow_equal_versions) { LOG_WARNING(log, "Ignoring column {} from part {} with metadata version {} because there is no such column " "in table {} with metadata version {}. Assuming the column was dropped", column.name, part->name, part_metadata_version, part->storage.getStorageID().getNameForLogs(), table_metadata_version); continue; } + + /// StorageMergeTree does not have metadata version + if (part->storage.supportsReplication()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} with metadata version {} contains column {} that is absent " + "in table {} with metadata version {}", + part->name, part_metadata_version, column.name, + part->storage.getStorageID().getNameForLogs(), table_metadata_version); } for_interpreter.emplace_back( From 575f3513977a21a8fea5ff30116636f2fc9ac2f1 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 27 Jun 2023 18:34:12 +0000 Subject: [PATCH 0872/1997] add test --- tests/queries/0_stateless/02807_math_unary_crash.reference | 2 ++ tests/queries/0_stateless/02807_math_unary_crash.sql | 4 ++++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/02807_math_unary_crash.reference create mode 100644 tests/queries/0_stateless/02807_math_unary_crash.sql diff --git a/tests/queries/0_stateless/02807_math_unary_crash.reference b/tests/queries/0_stateless/02807_math_unary_crash.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/02807_math_unary_crash.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/02807_math_unary_crash.sql b/tests/queries/0_stateless/02807_math_unary_crash.sql new file mode 100644 index 00000000000..16c3ba1e0ae --- /dev/null +++ b/tests/queries/0_stateless/02807_math_unary_crash.sql @@ -0,0 +1,4 @@ +CREATE TABLE t10 (`c0` Int32) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO t10 (c0) FORMAT Values (-1); +SELECT 1 FROM t10 GROUP BY erf(-sign(t10.c0)); +SELECT 1 FROM t10 GROUP BY -sign(t10.c0); From 7583da9b3806850a3ed99e7b93f253c17ddb5aa8 Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 27 Jun 2023 18:48:54 +0000 Subject: [PATCH 0873/1997] fix --- src/Server/waitServersToFinish.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/waitServersToFinish.h b/src/Server/waitServersToFinish.h index 5e90790cefb..b6daa025964 100644 --- a/src/Server/waitServersToFinish.h +++ b/src/Server/waitServersToFinish.h @@ -5,6 +5,6 @@ namespace DB { class ProtocolServerAdapter; -size_t waitServersToFinish(std::vector & servers, size_t seconds_to_wait); +size_t waitServersToFinish(std::vector & servers, std::mutex & mutex, size_t seconds_to_wait); } From 6515d52f6018570560eeb56d93d05ca1b530a892 Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 27 Jun 2023 18:50:40 +0000 Subject: [PATCH 0874/1997] fix2 --- programs/keeper/Keeper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index a1825665188..43c3489bbda 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -465,7 +465,7 @@ try LOG_INFO(log, "Closed all listening sockets."); if (current_connections > 0) - current_connections = waitServersToFinish(*servers, config().getInt("shutdown_wait_unfinished", 5)); + current_connections = waitServersToFinish(*servers, servers_lock, config().getInt("shutdown_wait_unfinished", 5)); if (current_connections) LOG_INFO(log, "Closed connections to Keeper. But {} remain. Probably some users cannot finish their connections after context shutdown.", current_connections); From 13854e5259ee446c7b76be2db619bd22fd6491bb Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 27 Jun 2023 17:23:51 +0200 Subject: [PATCH 0875/1997] impl --- src/Processors/QueryPlan/PartsSplitter.cpp | 6 +- ...nal_block_structure_mismatch_bug.reference | 9 +++ ...791_final_block_structure_mismatch_bug.sql | 66 +++++++++++++++++++ 3 files changed, 78 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference create mode 100644 tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 9796e696f6c..e1fc3facf04 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -271,6 +271,9 @@ Pipes buildPipesForReadingByPKRanges( for (size_t i = 0; i < result_layers.size(); ++i) { pipes[i] = reading_step_getter(std::move(result_layers[i])); + auto pk_expression = std::make_shared(primary_key.expression->getActionsDAG().clone()); + pipes[i].addSimpleTransform([pk_expression](const Block & header) + { return std::make_shared(header, pk_expression); }); auto & filter_function = filters[i]; if (!filter_function) continue; @@ -279,9 +282,6 @@ Pipes buildPipesForReadingByPKRanges( ExpressionActionsPtr expression_actions = std::make_shared(std::move(actions)); auto description = fmt::format( "filter values in [{}, {})", i ? ::toString(borders[i - 1]) : "-inf", i < borders.size() ? ::toString(borders[i]) : "+inf"); - auto pk_expression = std::make_shared(primary_key.expression->getActionsDAG().clone()); - pipes[i].addSimpleTransform([pk_expression](const Block & header) - { return std::make_shared(header, pk_expression); }); pipes[i].addSimpleTransform( [&](const Block & header) { diff --git a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference new file mode 100644 index 00000000000..a8401b1cae8 --- /dev/null +++ b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference @@ -0,0 +1,9 @@ +1 +2 +3 +1 +2 +3 +1 +2 +3 diff --git a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql new file mode 100644 index 00000000000..4c7ac50b8d0 --- /dev/null +++ b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql @@ -0,0 +1,66 @@ +SET do_not_merge_across_partitions_select_final=1; + +CREATE TABLE test_block_mismatch +( + a UInt32, + b DateTime +) +ENGINE = ReplacingMergeTree +PARTITION BY toYYYYMM(b) +ORDER BY (toDate(b), a); + +INSERT INTO test_block_mismatch VALUES (1, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch VALUES (1, toDateTime('2023-01-01 12:12:12')); +SELECT count(*) FROM test_block_mismatch FINAL; + +INSERT INTO test_block_mismatch VALUES (1, toDateTime('2023-02-02 12:12:12')); +INSERT INTO test_block_mismatch VALUES (1, toDateTime('2023-02-02 12:12:12')); +SELECT count(*) FROM test_block_mismatch FINAL; + +INSERT INTO test_block_mismatch VALUES (2, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch VALUES (2, toDateTime('2023-01-01 12:12:12')); +SELECT count(*) FROM test_block_mismatch FINAL; + +CREATE TABLE test_block_mismatch_sk1 +( + a UInt32, + b DateTime +) +ENGINE = ReplacingMergeTree +PARTITION BY toYYYYMM(b) +PRIMARY KEY (toDate(b)) +ORDER BY (toDate(b), a); + +INSERT INTO test_block_mismatch_sk1 VALUES (1, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch_sk1 VALUES (1, toDateTime('2023-01-01 12:12:12')); +SELECT count(*) FROM test_block_mismatch_sk1 FINAL; + +INSERT INTO test_block_mismatch_sk1 VALUES (1, toDateTime('2023-02-02 12:12:12')); +INSERT INTO test_block_mismatch_sk1 VALUES (1, toDateTime('2023-02-02 12:12:12')); +SELECT count(*) FROM test_block_mismatch_sk1 FINAL; + +INSERT INTO test_block_mismatch_sk1 VALUES (2, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch_sk1 VALUES (2, toDateTime('2023-01-01 12:12:12')); +SELECT count(*) FROM test_block_mismatch_sk1 FINAL; + +CREATE TABLE test_block_mismatch_sk2 +( + a UInt32, + b DateTime +) +ENGINE = ReplacingMergeTree +PARTITION BY toYYYYMM(b) +PRIMARY KEY (a) +ORDER BY (a, toDate(b)); + +INSERT INTO test_block_mismatch_sk2 VALUES (1, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch_sk2 VALUES (1, toDateTime('2023-01-01 12:12:12')); +SELECT count(*) FROM test_block_mismatch_sk2 FINAL; + +INSERT INTO test_block_mismatch_sk2 VALUES (1, toDateTime('2023-02-02 12:12:12')); +INSERT INTO test_block_mismatch_sk2 VALUES (1, toDateTime('2023-02-02 12:12:12')); +SELECT count(*) FROM test_block_mismatch_sk2 FINAL; + +INSERT INTO test_block_mismatch_sk2 VALUES (2, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch_sk2 VALUES (2, toDateTime('2023-01-01 12:12:12')); +SELECT count(*) FROM test_block_mismatch_sk2 FINAL; From 3e5abbbf48953288d5bcea4fab9f2431bd05873d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 27 Jun 2023 22:47:19 +0300 Subject: [PATCH 0876/1997] Update 02807_math_unary_crash.sql --- tests/queries/0_stateless/02807_math_unary_crash.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02807_math_unary_crash.sql b/tests/queries/0_stateless/02807_math_unary_crash.sql index 16c3ba1e0ae..fb693ac70f7 100644 --- a/tests/queries/0_stateless/02807_math_unary_crash.sql +++ b/tests/queries/0_stateless/02807_math_unary_crash.sql @@ -1,4 +1,6 @@ +DROP TABLE IF EXISTS t10; CREATE TABLE t10 (`c0` Int32) ENGINE = MergeTree ORDER BY tuple(); INSERT INTO t10 (c0) FORMAT Values (-1); SELECT 1 FROM t10 GROUP BY erf(-sign(t10.c0)); SELECT 1 FROM t10 GROUP BY -sign(t10.c0); +DROP TABLE t10; From 1ed104417a2cb49fc3c677d9193e2b676de95d77 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 27 Jun 2023 19:41:24 +0200 Subject: [PATCH 0877/1997] fix race between executeMetadataAlter and initializeReplication (cherry picked from commit 1d47783e857ed4dd7550de0728913e0144657a52) --- src/Databases/DatabaseReplicated.cpp | 10 ++++++++++ src/Databases/DatabaseReplicated.h | 2 ++ src/Databases/DatabaseReplicatedWorker.cpp | 5 ++++- src/Databases/IDatabase.h | 3 +++ src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 9 +++++++++ 5 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index de40ee4d82d..661afc6bf1f 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -1293,6 +1293,16 @@ void DatabaseReplicated::commitAlterTable(const StorageID & table_id, assert(checkDigestValid(query_context)); } + +bool DatabaseReplicated::canExecuteReplicatedMetadataAlter() const +{ + /// ReplicatedMergeTree may call commitAlterTable from its background threads when executing ALTER_METADATA entries. + /// It may update the metadata digest (both locally and in ZooKeeper) + /// before DatabaseReplicatedDDLWorker::initializeReplication() has finished. + /// We should not update metadata until the database is initialized. + return ddl_worker && ddl_worker->isCurrentlyActive(); +} + void DatabaseReplicated::detachTablePermanently(ContextPtr local_context, const String & table_name) { auto txn = local_context->getZooKeeperMetadataTransaction(); diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 1da181de030..ff1a4aba41c 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -48,6 +48,8 @@ public: /// then it will be executed on all replicas. BlockIO tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, bool internal) override; + bool canExecuteReplicatedMetadataAlter() const override; + bool hasReplicationThread() const override { return true; } void stopReplication() override; diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index ff2675dfd6b..593d0655777 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -91,12 +91,12 @@ void DatabaseReplicatedDDLWorker::initializeReplication() if (zookeeper->tryGet(database->replica_path + "/digest", digest_str)) { digest = parse(digest_str); - LOG_TRACE(log, "Metadata digest in ZooKeeper: {}", digest); std::lock_guard lock{database->metadata_mutex}; local_digest = database->tables_metadata_digest; } else { + LOG_WARNING(log, "Did not find digest in ZooKeeper, creating it"); /// Database was created by old ClickHouse versions, let's create the node std::lock_guard lock{database->metadata_mutex}; digest = local_digest = database->tables_metadata_digest; @@ -104,6 +104,9 @@ void DatabaseReplicatedDDLWorker::initializeReplication() zookeeper->create(database->replica_path + "/digest", digest_str, zkutil::CreateMode::Persistent); } + LOG_TRACE(log, "Trying to initialize replication: our_log_ptr={}, max_log_ptr={}, local_digest={}, zk_digest={}", + our_log_ptr, max_log_ptr, local_digest, digest); + bool is_new_replica = our_log_ptr == 0; bool lost_according_to_log_ptr = our_log_ptr + logs_to_keep < max_log_ptr; bool lost_according_to_digest = database->db_settings.check_consistency && local_digest != digest; diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index 53a2f372814..aadae3e2491 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -254,6 +254,9 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{}: alterTable() is not supported", getEngineName()); } + /// Special method for ReplicatedMergeTree and DatabaseReplicated + virtual bool canExecuteReplicatedMetadataAlter() const { return true; } + /// Returns time of table's metadata change, 0 if there is no corresponding metadata file. virtual time_t getObjectMetadataModificationTime(const String & /*name*/) const { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 3ba3048b812..792843cbe18 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1448,6 +1448,15 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( LOG_TRACE(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.alter_version, head_alter); return false; } + + auto database_name = storage.getStorageID().database_name; + auto database = DatabaseCatalog::instance().getDatabase(database_name); + if (!database->canExecuteReplicatedMetadataAlter()) + { + LOG_TRACE(LogToStr(out_postpone_reason, log), "Cannot execute alter metadata {} with version {} " + "because database {} cannot process metadata alters now", entry.znode_name, entry.alter_version, database_name); + return false; + } } /// If this MUTATE_PART is part of alter modify/drop query, than we have to execute them one by one From e2f20ea0e2b012796e05f1e734152609b34167e7 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 28 Jun 2023 00:30:51 +0200 Subject: [PATCH 0878/1997] fix --- src/Processors/QueryPlan/PartsSplitter.cpp | 6 ++--- src/Processors/QueryPlan/PartsSplitter.h | 1 + .../QueryPlan/ReadFromMergeTree.cpp | 22 +++++++++--------- ...nal_block_structure_mismatch_bug.reference | 1 + ...791_final_block_structure_mismatch_bug.sql | 23 +++++++++++++++++++ 5 files changed, 39 insertions(+), 14 deletions(-) diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index e1fc3facf04..533fbde1e13 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -256,6 +256,7 @@ namespace ErrorCodes Pipes buildPipesForReadingByPKRanges( const KeyDescription & primary_key, + ExpressionActionsPtr sorting_expr, RangesInDataParts parts, size_t max_layers, ContextPtr context, @@ -271,9 +272,8 @@ Pipes buildPipesForReadingByPKRanges( for (size_t i = 0; i < result_layers.size(); ++i) { pipes[i] = reading_step_getter(std::move(result_layers[i])); - auto pk_expression = std::make_shared(primary_key.expression->getActionsDAG().clone()); - pipes[i].addSimpleTransform([pk_expression](const Block & header) - { return std::make_shared(header, pk_expression); }); + pipes[i].addSimpleTransform([sorting_expr](const Block & header) + { return std::make_shared(header, sorting_expr); }); auto & filter_function = filters[i]; if (!filter_function) continue; diff --git a/src/Processors/QueryPlan/PartsSplitter.h b/src/Processors/QueryPlan/PartsSplitter.h index 56bca688c2d..4ba655a6f6d 100644 --- a/src/Processors/QueryPlan/PartsSplitter.h +++ b/src/Processors/QueryPlan/PartsSplitter.h @@ -18,6 +18,7 @@ using ReadingInOrderStepGetter = std::function; /// Will try to produce exactly max_layer pipes but may return less if data is distributed in not a very parallelizable way. Pipes buildPipesForReadingByPKRanges( const KeyDescription & primary_key, + ExpressionActionsPtr sorting_expr, RangesInDataParts parts, size_t max_layers, ContextPtr context, diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 3c38ecbbd3f..fac8ebd6e1f 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -979,6 +979,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( RangesInDataParts lonely_parts; size_t sum_marks_in_lonely_parts = 0; + auto sorting_expr = std::make_shared(metadata_for_reading->getSortingKey().expression->getActionsDAG().clone()); + for (size_t range_index = 0; range_index < parts_to_merge_ranges.size() - 1; ++range_index) { Pipes pipes; @@ -1022,12 +1024,20 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( info.use_uncompressed_cache); }; pipes = buildPipesForReadingByPKRanges( - metadata_for_reading->getPrimaryKey(), std::move(new_parts), num_streams, context, std::move(reading_step_getter)); + metadata_for_reading->getPrimaryKey(), + sorting_expr, + std::move(new_parts), + num_streams, + context, + std::move(reading_step_getter)); } else { pipes.emplace_back(read( std::move(new_parts), column_names, ReadFromMergeTree::ReadType::InOrder, num_streams, 0, info.use_uncompressed_cache)); + + pipes.back().addSimpleTransform([sorting_expr](const Block & header) + { return std::make_shared(header, sorting_expr); }); } /// Drop temporary columns, added by 'sorting_key_expr' @@ -1035,13 +1045,6 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( out_projection = createProjection(pipes.front().getHeader()); } - auto sorting_expr = std::make_shared( - metadata_for_reading->getSortingKey().expression->getActionsDAG().clone()); - - for (auto & pipe : pipes) - pipe.addSimpleTransform([sorting_expr](const Block & header) - { return std::make_shared(header, sorting_expr); }); - /// If do_not_merge_across_partitions_select_final is true and there is only one part in partition /// with level > 0 then we won't postprocess this part if (settings.do_not_merge_across_partitions_select_final && @@ -1098,9 +1101,6 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( if (!out_projection) out_projection = createProjection(pipe.getHeader()); - auto sorting_expr = std::make_shared( - metadata_for_reading->getSortingKey().expression->getActionsDAG().clone()); - pipe.addSimpleTransform([sorting_expr](const Block & header) { return std::make_shared(header, sorting_expr); diff --git a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference index a8401b1cae8..ca810c46a2d 100644 --- a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference +++ b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference @@ -7,3 +7,4 @@ 1 2 3 +2 diff --git a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql index 4c7ac50b8d0..a82e43d81f4 100644 --- a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql +++ b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql @@ -64,3 +64,26 @@ SELECT count(*) FROM test_block_mismatch_sk2 FINAL; INSERT INTO test_block_mismatch_sk2 VALUES (2, toDateTime('2023-01-01 12:12:12')); INSERT INTO test_block_mismatch_sk2 VALUES (2, toDateTime('2023-01-01 12:12:12')); SELECT count(*) FROM test_block_mismatch_sk2 FINAL; + +CREATE TABLE test_block_mismatch_magic_row_dist +( + a UInt32, + b DateTime +) +ENGINE = ReplacingMergeTree +PARTITION BY toYYYYMM(b) +ORDER BY (toDate(b), a); + +INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); +INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); +INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); +INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); + +optimize table test_block_mismatch_magic_row_dist final; + +system stop merges test_block_mismatch_magic_row_dist; + +INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-01-01 12:12:12')); + +SELECT count(*) FROM test_block_mismatch_magic_row_dist FINAL; From b95e8704d3e0f255cb5a8830b87f4cb4ca3ebe4c Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Wed, 28 Jun 2023 02:27:31 +0200 Subject: [PATCH 0879/1997] Fix test_host_regexp_multiple_ptr_records --- .../coredns_config/Corefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/Corefile b/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/Corefile index 0dd198441dc..3edf37dafa5 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/Corefile +++ b/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/Corefile @@ -1,6 +1,6 @@ . { hosts /example.com { - reload "200ms" + reload "20ms" fallthrough } forward . 127.0.0.11 From 42f3871833993ffb3fd135c47ee9ee81d2897116 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Wed, 28 Jun 2023 02:29:27 +0200 Subject: [PATCH 0880/1997] Fix test_host_regexp_multiple_ptr_records_concurrent --- .../coredns_config/Corefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile index 0dd198441dc..3edf37dafa5 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile +++ b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile @@ -1,6 +1,6 @@ . { hosts /example.com { - reload "200ms" + reload "20ms" fallthrough } forward . 127.0.0.11 From 6e769237a736d5f7c9e795451a4dddf675fdf85c Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 28 Jun 2023 01:39:06 +0000 Subject: [PATCH 0881/1997] added setting 'enable_job_stack_trace', default is off --- src/Common/Exception.cpp | 1 + src/Common/Exception.h | 1 + src/Common/ThreadPool.cpp | 10 +++++++--- src/Common/ThreadPool.h | 3 ++- src/Common/ThreadStatus.h | 1 + src/Core/Settings.h | 1 + src/Interpreters/ThreadStatusExt.cpp | 14 ++++++++++++++ 7 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index af48ce8fd99..ee268be45f6 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -200,6 +200,7 @@ Exception::FramePointers Exception::getStackFramePointers() const return frame_pointers; } +thread_local bool Exception::enable_job_stack_trace = false; thread_local std::vector Exception::thread_frame_pointers = {}; diff --git a/src/Common/Exception.h b/src/Common/Exception.h index 4514df2159b..f80dfe7f0a2 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -43,6 +43,7 @@ public: } /// Collect call stacks of all previous jobs' schedulings leading to this thread job's execution + static thread_local bool enable_job_stack_trace; static thread_local std::vector thread_frame_pointers; protected: diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index 0bd4fcb0455..979e53a72c0 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -191,7 +191,7 @@ ReturnType ThreadPoolImpl::scheduleImpl(Job job, Priority priority, std: /// Tracing context on this thread is used as parent context for the sub-thread that runs the job propagate_opentelemetry_tracing_context ? DB::OpenTelemetry::CurrentContext() : DB::OpenTelemetry::TracingContextOnThread(), /// capture_frame_pointers - true); + DB::Exception::enable_job_stack_trace); ++scheduled_jobs; } @@ -397,7 +397,9 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ /// to prevent us from modifying its priority. We have to use const_cast to force move semantics on JobWithPriority::job. job = std::move(const_cast(jobs.top().job)); parent_thread_trace_context = std::move(const_cast(jobs.top().thread_trace_context)); - thread_frame_pointers = std::move(const_cast &>(jobs.top().frame_pointers)); + DB::Exception::enable_job_stack_trace = jobs.top().enable_job_stack_trace; + if (DB::Exception::enable_job_stack_trace) + thread_frame_pointers = std::move(const_cast &>(jobs.top().frame_pointers)); jobs.pop(); /// We don't run jobs after `shutdown` is set, but we have to properly dequeue all jobs and finish them. @@ -416,7 +418,9 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ /// Run the job. try { - DB::Exception::thread_frame_pointers = std::move(thread_frame_pointers); + if (DB::Exception::enable_job_stack_trace) + DB::Exception::thread_frame_pointers = std::move(thread_frame_pointers); + CurrentMetrics::Increment metric_active_pool_threads(metric_active_threads); diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h index 57188572a9d..f5721146e09 100644 --- a/src/Common/ThreadPool.h +++ b/src/Common/ThreadPool.h @@ -131,9 +131,10 @@ private: /// Call stacks of all jobs' schedulings leading to this one std::vector frame_pointers; + bool enable_job_stack_trace = false; JobWithPriority(Job job_, Priority priority_, const DB::OpenTelemetry::TracingContextOnThread & thread_trace_context_, bool capture_frame_pointers = false) - : job(job_), priority(priority_), thread_trace_context(thread_trace_context_) + : job(job_), priority(priority_), thread_trace_context(thread_trace_context_), enable_job_stack_trace(capture_frame_pointers) { if (!capture_frame_pointers) return; diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 061959d9f1f..3b6b947471e 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -290,6 +290,7 @@ public: void flushUntrackedMemory(); private: + void applyGlobalSettings(); void applyQuerySettings(); void initPerformanceCounters(); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c14bd420c5c..d4003124303 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -465,6 +465,7 @@ class IColumn; M(UInt64, max_fetch_partition_retries_count, 5, "Amount of retries while fetching partition from another host.", 0) \ M(UInt64, http_max_multipart_form_data_size, 1024 * 1024 * 1024, "Limit on size of multipart/form-data content. This setting cannot be parsed from URL parameters and should be set in user profile. Note that content is parsed and external tables are created in memory before start of query execution. And this is the only limit that has effect on that stage (limits on max memory usage and max execution time have no effect while reading HTTP form data).", 0) \ M(Bool, calculate_text_stack_trace, true, "Calculate text stack trace in case of exceptions during query execution. This is the default. It requires symbol lookups that may slow down fuzzing tests when huge amount of wrong queries are executed. In normal cases you should not disable this option.", 0) \ + M(Bool, enable_job_stack_trace, false, "Output stack trace of a job creator when job results in exception", 0) \ M(Bool, allow_ddl, true, "If it is set to true, then a user is allowed to executed DDL queries.", 0) \ M(Bool, parallel_view_processing, false, "Enables pushing to attached views concurrently instead of sequentially.", 0) \ M(Bool, enable_unaligned_array_join, false, "Allow ARRAY JOIN with multiple arrays that have different sizes. When this settings is enabled, arrays will be resized to the longest one.", 0) \ diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 6a4f4576eca..5acfe500b1d 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -158,6 +158,17 @@ void CurrentThread::attachQueryForLog(const String & query_) current_thread->attachQueryForLog(query_); } +void ThreadStatus::applyGlobalSettings() +{ + auto global_context_ptr = global_context.lock(); + if (!global_context_ptr) + return; + + const Settings & settings = global_context_ptr->getSettingsRef(); + + DB::Exception::enable_job_stack_trace = settings.enable_job_stack_trace; +} + void ThreadStatus::applyQuerySettings() { auto query_context_ptr = query_context.lock(); @@ -166,6 +177,8 @@ void ThreadStatus::applyQuerySettings() const Settings & settings = query_context_ptr->getSettingsRef(); + DB::Exception::enable_job_stack_trace = settings.enable_job_stack_trace; + query_id_from_query_context = query_context_ptr->getCurrentQueryId(); initQueryProfiler(); @@ -204,6 +217,7 @@ void ThreadStatus::attachToGroupImpl(const ThreadGroupPtr & thread_group_) local_data = thread_group->getSharedData(); + applyGlobalSettings(); applyQuerySettings(); initPerformanceCounters(); } From ae7a586aea59deb84a7355021b06eb3b35d876f7 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 28 Jun 2023 10:45:52 +0800 Subject: [PATCH 0882/1997] fix bugs and add uts --- src/Functions/substringIndex.cpp | 497 +++++++++--------- .../02798_substring_index.reference | 155 ++++++ .../0_stateless/02798_substring_index.sql | 93 ++++ 3 files changed, 496 insertions(+), 249 deletions(-) create mode 100644 tests/queries/0_stateless/02798_substring_index.reference create mode 100644 tests/queries/0_stateless/02798_substring_index.sql diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp index 0a5dfd00656..1fca3bbed14 100644 --- a/src/Functions/substringIndex.cpp +++ b/src/Functions/substringIndex.cpp @@ -25,287 +25,287 @@ namespace ErrorCodes namespace { -template -class FunctionSubstringIndex : public IFunction -{ -public: - static constexpr auto name = is_utf8 ? "substringIndexUTF8" : "substringIndex"; - - - static FunctionPtr create(ContextPtr) + template + class FunctionSubstringIndex : public IFunction { - return std::make_shared(); - } + public: + static constexpr auto name = is_utf8 ? "substringIndexUTF8" : "substringIndex"; - String getName() const override - { - return name; - } - size_t getNumberOfArguments() const override { return 3; } + static FunctionPtr create(ContextPtr) { return std::make_shared(); } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + String getName() const override { return name; } - bool useDefaultImplementationForConstants() const override { return true; } + size_t getNumberOfArguments() const override { return 3; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isString(arguments[0])) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of first argument of function {}", - arguments[0]->getName(), - getName()); + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - if (!isString(arguments[1])) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of second argument of function {}", - arguments[1]->getName(), - getName()); + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - if (!isNativeNumber(arguments[2])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of third argument of function {}", - arguments[2]->getName(), getName()); - - return std::make_shared(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - ColumnPtr column_string = arguments[0].column; - ColumnPtr column_delim = arguments[1].column; - ColumnPtr column_index = arguments[2].column; - - const ColumnConst * column_delim_const = checkAndGetColumnConst(column_delim.get()); - if (!column_delim_const) - throw Exception(ErrorCodes::ILLEGAL_COLUMN , "Second argument to {} must be a constant String", getName()); - - String delim = column_delim_const->getValue(); - if constexpr (!is_utf8) + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (delim.size() != 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single character", getName()); - } - else - { - if (UTF8::countCodePoints(reinterpret_cast(delim.data()), delim.size()) != 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single UTF-8 character", getName()); + if (!isString(arguments[0])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}", + arguments[0]->getName(), + getName()); + + if (!isString(arguments[1])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of second argument of function {}", + arguments[1]->getName(), + getName()); + + if (!isNativeNumber(arguments[2])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of third argument of function {}", + arguments[2]->getName(), + getName()); + + return std::make_shared(); } - auto column_res = ColumnString::create(); - ColumnString::Chars & vec_res = column_res->getChars(); - ColumnString::Offsets & offsets_res = column_res->getOffsets(); - - const ColumnConst * column_string_const = checkAndGetColumnConst(column_string.get()); - if (column_string_const) + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { - String str = column_string_const->getValue(); - constantVector(str, delim, column_index.get(), vec_res, offsets_res); - } - else - { - const auto * col_str = checkAndGetColumn(column_string.get()); - if (!col_str) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument to {} must be a String", getName()); + ColumnPtr column_string = arguments[0].column; + ColumnPtr column_delim = arguments[1].column; + ColumnPtr column_index = arguments[2].column; - bool is_index_const = isColumnConst(*column_index); - if (is_index_const) + const ColumnConst * column_delim_const = checkAndGetColumnConst(column_delim.get()); + if (!column_delim_const) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument to {} must be a constant String", getName()); + + String delim = column_delim_const->getValue(); + if constexpr (!is_utf8) { - Int64 index = column_index->getInt(0); - vectorConstant(col_str->getChars(), col_str->getOffsets(), delim, index, vec_res, offsets_res); + if (delim.size() != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single character", getName()); } else - vectorVector(col_str->getChars(), col_str->getOffsets(), delim, column_index.get(), vec_res, offsets_res); - } - } - -protected: - static void vectorVector( - const ColumnString::Chars & str_data, - const ColumnString::Offsets & str_offsets, - const String & delim, - const IColumn * index_column, - ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) - { - size_t rows = str_offsets.size(); - res_data.reserve(str_data.size() / 2); - res_offsets.reserve(rows); - - std::unique_ptr searcher - = !is_utf8 ? nullptr : std::make_unique(delim); - - for (size_t i = 0; i < rows; ++i) - { - StringRef str_ref{&str_data[str_offsets[i]], str_offsets[i] - str_offsets[i - 1] - 1}; - Int64 index = index_column->getInt(i); - StringRef res_ref - = !is_utf8 ? substringIndex(str_ref, index) : substringIndexUTF8(searcher.get(), str_ref, delim, index); - appendToResultColumn(res_ref, res_data, res_offsets); - } - } - - static void vectorConstant( - const ColumnString::Chars & str_data, - const ColumnString::Offsets & str_offsets, - const String & delim, - Int64 index, - ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) - { - size_t rows = str_offsets.size(); - res_data.reserve(str_data.size() / 2); - res_offsets.reserve(rows); - - std::unique_ptr searcher - = !is_utf8 ? nullptr : std::make_unique(delim); - - for (size_t i = 0; i(str_ref, index) : substringIndexUTF8(searcher.get(), str_ref, delim, index); - appendToResultColumn(res_ref, res_data, res_offsets); - } - } - - static void constantVector( - const String & str, - const String & delim, - const IColumn * index_column, - ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) - { - size_t rows = index_column->size(); - res_data.reserve(str.size() * rows / 2); - res_offsets.reserve(rows); - - std::unique_ptr searcher - = !is_utf8 ? nullptr : std::make_unique(delim); - - StringRef str_ref{str.data(), str.size()}; - for (size_t i=0; igetInt(i); - StringRef res_ref - = !is_utf8 ? substringIndex(str_ref, index) : substringIndexUTF8(searcher.get(), str_ref, delim, index); - appendToResultColumn(res_ref, res_data, res_offsets); - } - } - - static void appendToResultColumn( - const StringRef & res_ref, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) - { - size_t res_offset = res_data.size(); - res_data.resize(res_offset + res_ref.size + 1); - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], res_ref.data, res_ref.size); - res_offset += res_ref.size; - res_data[res_offset] = 0; - ++res_offset; - - res_offsets.emplace_back(res_offset); - } - - static StringRef substringIndexUTF8( - const PositionCaseSensitiveUTF8::SearcherInBigHaystack * searcher, const StringRef & str_ref, const String & delim, Int64 index) - { - if (index == 0) - return {str_ref.data, 0}; - - const auto * begin = reinterpret_cast(str_ref.data); - const auto * end = reinterpret_cast(str_ref.data + str_ref.size); - const auto * pos = begin; - if (index > 0) - { - Int64 i = 0; - while (i < index) { - pos = searcher->search(pos, end - pos); + if (UTF8::countCodePoints(reinterpret_cast(delim.data()), delim.size()) != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single UTF-8 character", getName()); + } - if (pos != end) + auto column_res = ColumnString::create(); + ColumnString::Chars & vec_res = column_res->getChars(); + ColumnString::Offsets & offsets_res = column_res->getOffsets(); + + const ColumnConst * column_string_const = checkAndGetColumnConst(column_string.get()); + if (column_string_const) + { + String str = column_string_const->getValue(); + constantVector(str, delim, column_index.get(), vec_res, offsets_res); + } + else + { + const auto * col_str = checkAndGetColumn(column_string.get()); + if (!col_str) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument to {} must be a String", getName()); + + bool is_index_const = isColumnConst(*column_index); + if (is_index_const) + { + Int64 index = column_index->getInt(0); + vectorConstant(col_str, delim, index, vec_res, offsets_res); + } + else + vectorVector(col_str, delim, column_index.get(), vec_res, offsets_res); + } + return column_res; + } + + protected: + static void vectorVector( + const ColumnString * str_column, + const String & delim, + const IColumn * index_column, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + size_t rows = str_column->size(); + res_data.reserve(str_column->getChars().size() / 2); + res_offsets.reserve(rows); + + std::unique_ptr searcher + = !is_utf8 ? nullptr : std::make_unique(delim.data(), delim.size()); + + for (size_t i = 0; i < rows; ++i) + { + StringRef str_ref = str_column->getDataAt(i); + Int64 index = index_column->getInt(i); + StringRef res_ref + = !is_utf8 ? substringIndex(str_ref, delim[0], index) : substringIndexUTF8(searcher.get(), str_ref, delim, index); + appendToResultColumn(res_ref, res_data, res_offsets); + } + } + + static void vectorConstant( + const ColumnString * str_column, + const String & delim, + Int64 index, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + size_t rows = str_column->size(); + res_data.reserve(str_column->getChars().size() / 2); + res_offsets.reserve(rows); + + std::unique_ptr searcher + = !is_utf8 ? nullptr : std::make_unique(delim.data(), delim.size()); + + for (size_t i = 0; i < rows; ++i) + { + StringRef str_ref = str_column->getDataAt(i); + StringRef res_ref + = !is_utf8 ? substringIndex(str_ref, delim[0], index) : substringIndexUTF8(searcher.get(), str_ref, delim, index); + std::cout << "result:" << res_ref.toString() << std::endl; + appendToResultColumn(res_ref, res_data, res_offsets); + } + } + + static void constantVector( + const String & str, + const String & delim, + const IColumn * index_column, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + size_t rows = index_column->size(); + res_data.reserve(str.size() * rows / 2); + res_offsets.reserve(rows); + + std::unique_ptr searcher + = !is_utf8 ? nullptr : std::make_unique(delim.data(), delim.size()); + + StringRef str_ref{str.data(), str.size()}; + for (size_t i = 0; i < rows; ++i) + { + Int64 index = index_column->getInt(i); + StringRef res_ref + = !is_utf8 ? substringIndex(str_ref, delim[0], index) : substringIndexUTF8(searcher.get(), str_ref, delim, index); + appendToResultColumn(res_ref, res_data, res_offsets); + } + } + + static void appendToResultColumn(const StringRef & res_ref, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) + { + size_t res_offset = res_data.size(); + res_data.resize(res_offset + res_ref.size + 1); + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], res_ref.data, res_ref.size); + res_offset += res_ref.size; + res_data[res_offset] = 0; + ++res_offset; + + res_offsets.emplace_back(res_offset); + } + + static StringRef substringIndexUTF8( + const PositionCaseSensitiveUTF8::SearcherInBigHaystack * searcher, const StringRef & str_ref, const String & delim, Int64 index) + { + std::cout << "str:" << str_ref.toString() << ", delim" << delim << ",index:" << index << std::endl; + + if (index == 0) + return {str_ref.data, 0}; + + const auto * begin = reinterpret_cast(str_ref.data); + const auto * end = reinterpret_cast(str_ref.data + str_ref.size); + const auto * pos = begin; + if (index > 0) + { + Int64 i = 0; + while (i < index) + { + pos = searcher->search(pos, end - pos); + + if (pos != end) + { + pos += delim.size(); + ++i; + } + else + return str_ref; + } + return {begin, static_cast(pos - begin - delim.size())}; + } + else + { + Int64 total = 0; + while (pos < end && end != (pos = searcher->search(pos, end - pos))) + { + pos += delim.size(); + ++total; + } + + if (total + index < 0) + return str_ref; + + Int64 index_from_left = total + 1 + index; + std::cout << "total:" << total << ", index_from_left" << index_from_left << std::endl; + pos = begin; + Int64 i = 0; + while (i < index_from_left && pos < end && end != (pos = searcher->search(pos, end - pos))) { pos += delim.size(); ++i; + std::cout << "pos offset:" << pos - begin << ", total size:" << end - begin << std::endl; } - else - return str_ref; + std::cout << "pos offset:" << pos - begin << ", size:" << end - pos << std::endl; + StringRef res = {pos, static_cast(end - pos)}; + std::cout << "result:" << res.toString() << std::endl; + return res; } - return {begin, static_cast(pos - begin - delim.size())}; } - else + + static StringRef substringIndex(const StringRef & str_ref, char delim, Int64 index) { - Int64 total = 0; - while (pos < end && end != (pos = searcher->search(pos, end - pos))) + std::cout << "str:" << str_ref.toString() << ", delim" << delim << ",index:" << index << std::endl; + + if (index == 0) + return {str_ref.data, 0}; + + if (index > 0) { - pos += delim.size(); - ++total; - } - - if (total + index < 0) - return str_ref; - - Int64 index_from_left = total + 1 + index; - pos = begin; - Int64 i = 0; - while (pos < end && end != (pos = searcher->search(pos, end - pos)) && i < index_from_left) - { - pos += delim.size(); - ++i; - } - return {pos, static_cast(end - pos)}; - } - } - - template - static StringRef substringIndex( - const StringRef & str_ref, - Int64 index) - { - if (index == 0) - return {str_ref.data, 0}; - - if (index > 0) - { - const auto * end = str_ref.data + str_ref.size; - const auto * pos = str_ref.data; - Int64 i = 0; - while (i < index) - { - pos = find_first_symbols(pos, end); - - if (pos != end) + const auto * end = str_ref.data + str_ref.size; + const auto * pos = str_ref.data; + Int64 i = 0; + while (i < index) { - ++pos; - ++i; + pos = std::find(pos, end, delim); + if (pos != end) + { + ++pos; + ++i; + } + else + return str_ref; } - else - return str_ref; + return {str_ref.data, static_cast(pos - str_ref.data - 1)}; } - return {str_ref.data, static_cast(pos - str_ref.data - 1)}; - } - else - { - const auto * begin = str_ref.data; - const auto * pos = str_ref.data + str_ref.size; - Int64 i = 0; - while (i < index) + else { - const auto * next_pos = ::detail::find_last_symbols_sse2(begin, pos); - - if (next_pos != pos) + const auto * begin = str_ref.data; + const auto * pos = str_ref.data + str_ref.size; + Int64 i = 0; + while (i + index < 0) { - pos = next_pos; - ++i; - } - else - return str_ref; - } + --pos; + while (pos >= begin && *pos != delim) + --pos; - return {pos + 1, static_cast(str_ref.data + str_ref.size - pos - 1)}; + if (pos >= begin) + ++i; + else + return str_ref; + } + return {pos + 1, static_cast(str_ref.data + str_ref.size - pos - 1)}; + } } - } -}; + }; } @@ -319,4 +319,3 @@ REGISTER_FUNCTION(SubstringIndex) } - diff --git a/tests/queries/0_stateless/02798_substring_index.reference b/tests/queries/0_stateless/02798_substring_index.reference new file mode 100644 index 00000000000..a3084509c12 --- /dev/null +++ b/tests/queries/0_stateless/02798_substring_index.reference @@ -0,0 +1,155 @@ +-- { echoOn } +select substringIndex('www.clickhouse.com', '.', -4); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', -3); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', -2); +clickhouse.com +select substringIndex('www.clickhouse.com', '.', -1); +com +select substringIndex('www.clickhouse.com', '.', 0); + +select substringIndex('www.clickhouse.com', '.', 1); +www +select substringIndex('www.clickhouse.com', '.', 2); +www.clickhouse +select substringIndex('www.clickhouse.com', '.', 3); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', 4); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', -4); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', -3); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', -2); +clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', -1); +com +select substringIndex(materialize('www.clickhouse.com'), '.', 0); + +select substringIndex(materialize('www.clickhouse.com'), '.', 1); +www +select substringIndex(materialize('www.clickhouse.com'), '.', 2); +www.clickhouse +select substringIndex(materialize('www.clickhouse.com'), '.', 3); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', 4); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-4)); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-3)); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-2)); +clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-1)); +com +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(0)); + +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(1)); +www +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(2)); +www.clickhouse +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(3)); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(4)); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', materialize(-4)); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', materialize(-3)); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', materialize(-2)); +clickhouse.com +select substringIndex('www.clickhouse.com', '.', materialize(-1)); +com +select substringIndex('www.clickhouse.com', '.', materialize(0)); + +select substringIndex('www.clickhouse.com', '.', materialize(1)); +www +select substringIndex('www.clickhouse.com', '.', materialize(2)); +www.clickhouse +select substringIndex('www.clickhouse.com', '.', materialize(3)); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', materialize(4)); +www.clickhouse.com +select SUBSTRING_INDEX('www.clickhouse.com', '.', 2); +www.clickhouse +select substringIndex('www.clickhouse.com', '..', 2); -- { serverError BAD_ARGUMENTS } +select substringIndex('www.clickhouse.com', '', 2); -- { serverError BAD_ARGUMENTS } +select substringIndex('www.clickhouse.com', materialize('.'), 2); -- { serverError ILLEGAL_COLUMN } +select substringIndex('www.clickhouse.com', '.', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select substringIndexUTF8('富强,民主,文明', ',', -4); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', -3); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', -2); +民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', -1); +文明 +select substringIndexUTF8('富强,民主,文明', ',', 0); + +select substringIndexUTF8('富强,民主,文明', ',', 1); +富强 +select substringIndexUTF8('富强,民主,文明', ',', 2); +富强,民主 +select substringIndexUTF8('富强,民主,文明', ',', 3); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', 4); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -4); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -3); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -2); +民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -1); +文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 0); + +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 1); +富强 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 2); +富强,民主 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 3); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 4); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', materialize(-4)); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', materialize(-3)); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', materialize(-2)); +民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', materialize(-1)); +文明 +select substringIndexUTF8('富强,民主,文明', ',', materialize(0)); + +select substringIndexUTF8('富强,民主,文明', ',', materialize(1)); +富强 +select substringIndexUTF8('富强,民主,文明', ',', materialize(2)); +富强,民主 +select substringIndexUTF8('富强,民主,文明', ',', materialize(3)); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', materialize(4)); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-4)); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-3)); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-2)); +民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-1)); +文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(0)); + +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(1)); +富强 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(2)); +富强,民主 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(3)); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(4)); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',,', 2); -- { serverError BAD_ARGUMENTS } +select substringIndexUTF8('富强,民主,文明', '', 2); -- { serverError BAD_ARGUMENTS } +select substringIndexUTF8('富强,民主,文明', materialize(','), 2); -- { serverError ILLEGAL_COLUMN } +select substringIndexUTF8('富强,民主,文明', ',', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/02798_substring_index.sql b/tests/queries/0_stateless/02798_substring_index.sql new file mode 100644 index 00000000000..520775e8970 --- /dev/null +++ b/tests/queries/0_stateless/02798_substring_index.sql @@ -0,0 +1,93 @@ +-- { echoOn } +select substringIndex('www.clickhouse.com', '.', -4); +select substringIndex('www.clickhouse.com', '.', -3); +select substringIndex('www.clickhouse.com', '.', -2); +select substringIndex('www.clickhouse.com', '.', -1); +select substringIndex('www.clickhouse.com', '.', 0); +select substringIndex('www.clickhouse.com', '.', 1); +select substringIndex('www.clickhouse.com', '.', 2); +select substringIndex('www.clickhouse.com', '.', 3); +select substringIndex('www.clickhouse.com', '.', 4); + +select substringIndex(materialize('www.clickhouse.com'), '.', -4); +select substringIndex(materialize('www.clickhouse.com'), '.', -3); +select substringIndex(materialize('www.clickhouse.com'), '.', -2); +select substringIndex(materialize('www.clickhouse.com'), '.', -1); +select substringIndex(materialize('www.clickhouse.com'), '.', 0); +select substringIndex(materialize('www.clickhouse.com'), '.', 1); +select substringIndex(materialize('www.clickhouse.com'), '.', 2); +select substringIndex(materialize('www.clickhouse.com'), '.', 3); +select substringIndex(materialize('www.clickhouse.com'), '.', 4); + +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-4)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-3)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-2)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-1)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(0)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(1)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(2)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(3)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(4)); + +select substringIndex('www.clickhouse.com', '.', materialize(-4)); +select substringIndex('www.clickhouse.com', '.', materialize(-3)); +select substringIndex('www.clickhouse.com', '.', materialize(-2)); +select substringIndex('www.clickhouse.com', '.', materialize(-1)); +select substringIndex('www.clickhouse.com', '.', materialize(0)); +select substringIndex('www.clickhouse.com', '.', materialize(1)); +select substringIndex('www.clickhouse.com', '.', materialize(2)); +select substringIndex('www.clickhouse.com', '.', materialize(3)); +select substringIndex('www.clickhouse.com', '.', materialize(4)); + +select SUBSTRING_INDEX('www.clickhouse.com', '.', 2); + +select substringIndex('www.clickhouse.com', '..', 2); -- { serverError BAD_ARGUMENTS } +select substringIndex('www.clickhouse.com', '', 2); -- { serverError BAD_ARGUMENTS } +select substringIndex('www.clickhouse.com', materialize('.'), 2); -- { serverError ILLEGAL_COLUMN } +select substringIndex('www.clickhouse.com', '.', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select substringIndexUTF8('富强,民主,文明', ',', -4); +select substringIndexUTF8('富强,民主,文明', ',', -3); +select substringIndexUTF8('富强,民主,文明', ',', -2); +select substringIndexUTF8('富强,民主,文明', ',', -1); +select substringIndexUTF8('富强,民主,文明', ',', 0); +select substringIndexUTF8('富强,民主,文明', ',', 1); +select substringIndexUTF8('富强,民主,文明', ',', 2); +select substringIndexUTF8('富强,民主,文明', ',', 3); +select substringIndexUTF8('富强,民主,文明', ',', 4); + +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -4); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -3); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -2); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -1); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 0); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 1); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 2); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 3); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 4); + +select substringIndexUTF8('富强,民主,文明', ',', materialize(-4)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(-3)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(-2)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(-1)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(0)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(1)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(2)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(3)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(4)); + +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-4)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-3)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-2)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-1)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(0)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(1)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(2)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(3)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(4)); + +select substringIndexUTF8('富强,民主,文明', ',,', 2); -- { serverError BAD_ARGUMENTS } +select substringIndexUTF8('富强,民主,文明', '', 2); -- { serverError BAD_ARGUMENTS } +select substringIndexUTF8('富强,民主,文明', materialize(','), 2); -- { serverError ILLEGAL_COLUMN } +select substringIndexUTF8('富强,民主,文明', ',', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- { echoOff } From 70e49cb31c0ff80ffc6c8e6ab5687b24af659ad1 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 28 Jun 2023 11:28:20 +0800 Subject: [PATCH 0883/1997] add docs --- .../functions/string-functions.md | 36 +++++++++++++++++++ src/Functions/substringIndex.cpp | 16 ++------- ...new_functions_must_be_documented.reference | 1 + 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 5175bbf0615..5197b786884 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -573,6 +573,42 @@ Alias: Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +## substringIndex(s, delim, index) + +Returns the substring of `s` before `index` occurrences of the delimiter `delim`, as in Spark or MySQL. + +**Syntax** + +```sql +substringIndex(s, delim, index) +``` +Alias: `SUBSTRING_INDEX` + + +**Arguments** + +- s: The string to extract substring from. [String](../../sql-reference/data-types/string.md). +- delim: The character to split. [String](../../sql-reference/data-types/string.md). +- index: The number of occurrences of the delimiter to count before extracting the substring. If index is positive, everything to the left of the final delimiter (counting from the left) is returned. If index is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) + +**Example** + +``` sql +SELECT substringIndex('www.clickhouse.com', '.', 2) +``` + +Result: +``` +┌─substringIndex('www.clickhouse.com', '.', 2)─┐ +│ www.clickhouse │ +└──────────────────────────────────────────────┘ +``` + +## substringIndexUTF8(s, delim, index) + +Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + ## appendTrailingCharIfAbsent Appends character `c` to string `s` if `s` is non-empty and does not end with character `c`. diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp index 1fca3bbed14..fbb20b245f6 100644 --- a/src/Functions/substringIndex.cpp +++ b/src/Functions/substringIndex.cpp @@ -17,8 +17,6 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int ZERO_ARRAY_OR_TUPLE_INDEX; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int BAD_ARGUMENTS; } @@ -163,7 +161,6 @@ namespace StringRef str_ref = str_column->getDataAt(i); StringRef res_ref = !is_utf8 ? substringIndex(str_ref, delim[0], index) : substringIndexUTF8(searcher.get(), str_ref, delim, index); - std::cout << "result:" << res_ref.toString() << std::endl; appendToResultColumn(res_ref, res_data, res_offsets); } } @@ -207,8 +204,6 @@ namespace static StringRef substringIndexUTF8( const PositionCaseSensitiveUTF8::SearcherInBigHaystack * searcher, const StringRef & str_ref, const String & delim, Int64 index) { - std::cout << "str:" << str_ref.toString() << ", delim" << delim << ",index:" << index << std::endl; - if (index == 0) return {str_ref.data, 0}; @@ -244,27 +239,20 @@ namespace if (total + index < 0) return str_ref; - Int64 index_from_left = total + 1 + index; - std::cout << "total:" << total << ", index_from_left" << index_from_left << std::endl; pos = begin; Int64 i = 0; + Int64 index_from_left = total + 1 + index; while (i < index_from_left && pos < end && end != (pos = searcher->search(pos, end - pos))) { pos += delim.size(); ++i; - std::cout << "pos offset:" << pos - begin << ", total size:" << end - begin << std::endl; } - std::cout << "pos offset:" << pos - begin << ", size:" << end - pos << std::endl; - StringRef res = {pos, static_cast(end - pos)}; - std::cout << "result:" << res.toString() << std::endl; - return res; + return {pos, static_cast(end - pos)}; } } static StringRef substringIndex(const StringRef & str_ref, char delim, Int64 index) { - std::cout << "str:" << str_ref.toString() << ", delim" << delim << ",index:" << index << std::endl; - if (index == 0) return {str_ref.data, 0}; diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index b5c133988e6..6c904d6fc05 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -666,6 +666,7 @@ startsWith subBitmap substring substringUTF8 +substringIndex subtractDays subtractHours subtractMicroseconds From 521137c55d18f956c86cf71b1ca7bca2601f7d70 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Jun 2023 06:28:21 +0300 Subject: [PATCH 0884/1997] Update test.py --- tests/integration/test_attach_table_normalizer/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index 49acefdcd17..10b400494ab 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -4,7 +4,7 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node = cluster.add_instance( - "node", main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True + "node", stay_alive=True ) From 3f73d3f48aa679dc689dea6e49594752461e4d8b Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 28 Jun 2023 11:33:39 +0800 Subject: [PATCH 0885/1997] fix failed check --- .../02415_all_new_functions_must_be_documented.reference | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 6c904d6fc05..a2621949d0d 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -665,8 +665,9 @@ sqrt startsWith subBitmap substring -substringUTF8 substringIndex +substringIndexUTF8 +substringUTF8 subtractDays subtractHours subtractMicroseconds From 5df6f3d6e28483a029f3a8859c8bd09fdab008a0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 28 Jun 2023 03:40:09 +0000 Subject: [PATCH 0886/1997] Automatic style fix --- tests/integration/test_attach_table_normalizer/test.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index 10b400494ab..79093bf4014 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -3,9 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance( - "node", stay_alive=True -) +node = cluster.add_instance("node", stay_alive=True) @pytest.fixture(scope="module") From 375f7abfeba866ae7956e58e9bd1bf364b972ea5 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 28 Jun 2023 12:27:59 +0800 Subject: [PATCH 0887/1997] fix spelling --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 00d047121e6..9af48417250 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -210,6 +210,7 @@ Decrypted Deduplicate Deduplication DelayedInserts +delim DeliveryTag DeltaLake Denormalize @@ -834,6 +835,8 @@ Subexpression Submodules Subqueries Substrings +substringIndex +substringIndexUTF SummingMergeTree SuperSet Superset From be852d554bb592cb66cacd0feef75b496b2465f1 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 27 Jun 2023 13:33:39 +0000 Subject: [PATCH 0888/1997] Use magic_enum for values --- src/Common/ZooKeeper/IKeeper.h | 2 + src/Common/ZooKeeper/ZooKeeper.h | 2 + src/Common/ZooKeeper/ZooKeeperImpl.h | 2 + src/Coordination/FourLetterCommand.cpp | 11 +++-- src/Coordination/KeeperContext.cpp | 20 +++++++-- src/Coordination/KeeperFeatureFlags.cpp | 17 +++----- src/Coordination/KeeperFeatureFlags.h | 14 +------ .../StorageSystemZooKeeperConnection.cpp | 41 ++++++++++++++++--- .../test_keeper_feature_flags_config/test.py | 4 +- ...2735_system_zookeeper_connection.reference | 4 +- .../02735_system_zookeeper_connection.sql | 2 +- 11 files changed, 80 insertions(+), 39 deletions(-) diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index 369aacf16c7..2703c1079c0 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -532,6 +532,8 @@ public: virtual bool isFeatureEnabled(DB::KeeperFeatureFlag feature_flag) const = 0; + virtual const DB::KeeperFeatureFlags * getKeeperFeatureFlags() const { return nullptr; } + /// Expire session and finish all pending requests virtual void finalize(const String & reason) = 0; }; diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 03200771e4a..1fcb048add2 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -529,6 +529,8 @@ public: size_t getConnectedZooKeeperIndex() const { return connected_zk_index; } UInt64 getConnectedTime() const { return connected_time; } + const DB::KeeperFeatureFlags * getKeeperFeatureFlags() const { return impl->getKeeperFeatureFlags(); } + private: void init(ZooKeeperArgs args_); diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index ae6bef067e3..44ea993947e 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -202,6 +202,8 @@ public: void setServerCompletelyStarted(); + const KeeperFeatureFlags * getKeeperFeatureFlags() const override { return &keeper_feature_flags; } + private: ACLs default_acls; Poco::Net::SocketAddress connected_zk_address; diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 10d13657fb0..34540902d47 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -545,7 +546,7 @@ String FeatureFlagsCommand::run() StringBuffer ret; - auto append = [&ret] (String key, uint8_t value) -> void + auto append = [&ret] (const String & key, uint8_t value) -> void { writeText(key, ret); writeText('\t', ret); @@ -553,8 +554,12 @@ String FeatureFlagsCommand::run() writeText('\n', ret); }; - for (const auto feature : all_keeper_feature_flags) - append(SettingFieldKeeperFeatureFlagTraits::toString(feature), feature_flags.isEnabled(feature)); + for (const auto & [feature_flag, name] : magic_enum::enum_entries()) + { + std::string feature_flag_string(name); + boost::to_lower(feature_flag_string); + append(feature_flag_string, feature_flags.isEnabled(feature_flag)); + } return ret.str(); diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index e6f30c81310..e1c3a138646 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -1,10 +1,19 @@ #include #include #include +#include +#include namespace DB { +namespace ErrorCodes +{ + +extern const int BAD_ARGUMENTS; + +} + KeeperContext::KeeperContext() { /// enable by default some feature flags @@ -29,12 +38,17 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config) config.keys(feature_flags_key, keys); for (const auto & key : keys) { - auto feature_flag = SettingFieldKeeperFeatureFlagTraits::fromString(key); + auto feature_flag_string = boost::to_upper_copy(key); + auto feature_flag = magic_enum::enum_cast(feature_flag_string); + + if (!feature_flag.has_value()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid feature flag defined in config for Keeper: {}", key); + auto is_enabled = config.getBool(feature_flags_key + "." + key); if (is_enabled) - feature_flags.enableFeatureFlag(feature_flag); + feature_flags.enableFeatureFlag(feature_flag.value()); else - feature_flags.disableFeatureFlag(feature_flag); + feature_flags.disableFeatureFlag(feature_flag.value()); } system_nodes_with_data[keeper_api_feature_flags_path] = feature_flags.getFeatureFlags(); diff --git a/src/Coordination/KeeperFeatureFlags.cpp b/src/Coordination/KeeperFeatureFlags.cpp index 216dca014d4..3aff87bcea9 100644 --- a/src/Coordination/KeeperFeatureFlags.cpp +++ b/src/Coordination/KeeperFeatureFlags.cpp @@ -23,15 +23,10 @@ std::pair getByteAndBitIndex(size_t num) } -IMPLEMENT_SETTING_ENUM(KeeperFeatureFlag, ErrorCodes::BAD_ARGUMENTS, - {{"filtered_list", KeeperFeatureFlag::FILTERED_LIST}, - {"multi_read", KeeperFeatureFlag::MULTI_READ}, - {"check_not_exists", KeeperFeatureFlag::CHECK_NOT_EXISTS}}); - KeeperFeatureFlags::KeeperFeatureFlags() { /// get byte idx of largest value - auto [byte_idx, _] = getByteAndBitIndex(all_keeper_feature_flags.size() - 1); + auto [byte_idx, _] = getByteAndBitIndex(magic_enum::enum_count() - 1); feature_flags = std::string(byte_idx + 1, 0); } @@ -56,7 +51,7 @@ void KeeperFeatureFlags::fromApiVersion(KeeperApiVersion keeper_api_version) bool KeeperFeatureFlags::isEnabled(KeeperFeatureFlag feature_flag) const { - auto [byte_idx, bit_idx] = getByteAndBitIndex(feature_flag); + auto [byte_idx, bit_idx] = getByteAndBitIndex(magic_enum::enum_integer(feature_flag)); if (byte_idx > feature_flags.size()) return false; @@ -71,7 +66,7 @@ void KeeperFeatureFlags::setFeatureFlags(std::string feature_flags_) void KeeperFeatureFlags::enableFeatureFlag(KeeperFeatureFlag feature_flag) { - auto [byte_idx, bit_idx] = getByteAndBitIndex(feature_flag); + auto [byte_idx, bit_idx] = getByteAndBitIndex(magic_enum::enum_integer(feature_flag)); chassert(byte_idx < feature_flags.size()); feature_flags[byte_idx] |= (1 << bit_idx); @@ -79,7 +74,7 @@ void KeeperFeatureFlags::enableFeatureFlag(KeeperFeatureFlag feature_flag) void KeeperFeatureFlags::disableFeatureFlag(KeeperFeatureFlag feature_flag) { - auto [byte_idx, bit_idx] = getByteAndBitIndex(feature_flag); + auto [byte_idx, bit_idx] = getByteAndBitIndex(magic_enum::enum_integer(feature_flag)); chassert(byte_idx < feature_flags.size()); feature_flags[byte_idx] &= ~(1 << bit_idx); @@ -92,10 +87,10 @@ const std::string & KeeperFeatureFlags::getFeatureFlags() const void KeeperFeatureFlags::logFlags(Poco::Logger * log) const { - for (const auto & feature_flag : all_keeper_feature_flags) + for (const auto & [feature_flag, feature_flag_name] : magic_enum::enum_entries()) { auto is_enabled = isEnabled(feature_flag); - LOG_INFO(log, "Keeper feature flag {}: {}", SettingFieldKeeperFeatureFlagTraits::toString(feature_flag), is_enabled ? "enabled" : "disabled"); + LOG_INFO(log, "Keeper feature flag {}: {}", feature_flag_name, is_enabled ? "enabled" : "disabled"); } } diff --git a/src/Coordination/KeeperFeatureFlags.h b/src/Coordination/KeeperFeatureFlags.h index cdd4704a7ca..6c48915f60c 100644 --- a/src/Coordination/KeeperFeatureFlags.h +++ b/src/Coordination/KeeperFeatureFlags.h @@ -1,28 +1,18 @@ #pragma once -#include -#include #include namespace DB { -enum KeeperFeatureFlag +/// these values cannot be reordered or removed, only new values can be added +enum class KeeperFeatureFlag : size_t { FILTERED_LIST = 0, MULTI_READ, CHECK_NOT_EXISTS, }; -static inline constexpr std::array all_keeper_feature_flags -{ - KeeperFeatureFlag::FILTERED_LIST, - KeeperFeatureFlag::MULTI_READ, - KeeperFeatureFlag::CHECK_NOT_EXISTS, -}; - -DECLARE_SETTING_ENUM(KeeperFeatureFlag); - class KeeperFeatureFlags { public: diff --git a/src/Storages/System/StorageSystemZooKeeperConnection.cpp b/src/Storages/System/StorageSystemZooKeeperConnection.cpp index cd78ae01457..33268d58358 100644 --- a/src/Storages/System/StorageSystemZooKeeperConnection.cpp +++ b/src/Storages/System/StorageSystemZooKeeperConnection.cpp @@ -1,8 +1,11 @@ #include +#include +#include #include #include #include #include +#include #include namespace DB @@ -10,6 +13,13 @@ namespace DB NamesAndTypesList StorageSystemZooKeeperConnection::getNamesAndTypes() { + DataTypeEnum16::Values feature_flags_enum_values; + feature_flags_enum_values.reserve(magic_enum::enum_count()); + for (const auto & [feature_flag, feature_flag_string] : magic_enum::enum_entries()) + feature_flags_enum_values.push_back(std::pair{std::string{feature_flag_string}, static_cast(feature_flag)}); + + auto feature_flags_enum = std::make_shared(std::move(feature_flags_enum_values)); + return { {"name", std::make_shared()}, {"host", std::make_shared()}, @@ -19,7 +29,8 @@ NamesAndTypesList StorageSystemZooKeeperConnection::getNamesAndTypes() {"session_uptime_elapsed_seconds", std::make_shared()}, {"is_expired", std::make_shared()}, {"keeper_api_version", std::make_shared()}, - {"client_id", std::make_shared()} + {"client_id", std::make_shared()}, + {"enabled_feature_flags", std::make_shared(std::move(feature_flags_enum))} }; } @@ -36,17 +47,37 @@ void StorageSystemZooKeeperConnection::fillData(MutableColumns & res_columns, Co res_columns[7]->insert(0); res_columns[8]->insert(context->getZooKeeper()->getClientID()); + const auto add_enabled_feature_flags = [&](const auto & zookeeper) + { + Array enabled_feature_flags; + const auto * feature_flags = zookeeper->getKeeperFeatureFlags(); + if (feature_flags) + { + for (const auto & feature_flag : magic_enum::enum_values()) + { + if (feature_flags->isEnabled(feature_flag)) + { + enabled_feature_flags.push_back(feature_flag); + } + } + } + res_columns[9]->insert(std::move(enabled_feature_flags)); + }; + + add_enabled_feature_flags(context->getZooKeeper()); + for (const auto & elem : context->getAuxiliaryZooKeepers()) { res_columns[0]->insert(elem.first); res_columns[1]->insert(elem.second->getConnectedZooKeeperHost()); res_columns[2]->insert(elem.second->getConnectedZooKeeperPort()); res_columns[3]->insert(elem.second->getConnectedZooKeeperIndex()); - res_columns[4]->insert(elem.second->getSessionUptime()); - res_columns[5]->insert(elem.second->expired()); - res_columns[6]->insert(0); - res_columns[7]->insert(elem.second->getClientID()); + res_columns[4]->insert(elem.second->getConnectedTime()); + res_columns[5]->insert(elem.second->getSessionUptime()); + res_columns[6]->insert(elem.second->expired()); + res_columns[7]->insert(0); res_columns[8]->insert(elem.second->getClientID()); + add_enabled_feature_flags(elem.second); } } diff --git a/tests/integration/test_keeper_feature_flags_config/test.py b/tests/integration/test_keeper_feature_flags_config/test.py index bb7252e9ec8..93ac6cbd3bd 100644 --- a/tests/integration/test_keeper_feature_flags_config/test.py +++ b/tests/integration/test_keeper_feature_flags_config/test.py @@ -69,12 +69,12 @@ def test_keeper_feature_flags(started_cluster): for feature, is_enabled in feature_flags: node.wait_for_log_line( - f"ZooKeeperClient: Keeper feature flag {feature}: {'enabled' if is_enabled else 'disabled'}", + f"ZooKeeperClient: Keeper feature flag {feature.upper()}: {'enabled' if is_enabled else 'disabled'}", look_behind_lines=1000, ) node.wait_for_log_line( - f"KeeperContext: Keeper feature flag {feature}: {'enabled' if is_enabled else 'disabled'}", + f"KeeperContext: Keeper feature flag {feature.upper()}: {'enabled' if is_enabled else 'disabled'}", look_behind_lines=1000, ) diff --git a/tests/queries/0_stateless/02735_system_zookeeper_connection.reference b/tests/queries/0_stateless/02735_system_zookeeper_connection.reference index eddd4829829..380da27cde6 100644 --- a/tests/queries/0_stateless/02735_system_zookeeper_connection.reference +++ b/tests/queries/0_stateless/02735_system_zookeeper_connection.reference @@ -1,2 +1,2 @@ -default ::1 9181 0 0 0 1 1 -zookeeper2 ::1 9181 0 0 0 1 \ No newline at end of file +default ::1 9181 0 0 0 1 1 ['FILTERED_LIST','MULTI_READ','CHECK_NOT_EXISTS'] +zookeeper2 ::1 9181 0 0 0 1 diff --git a/tests/queries/0_stateless/02735_system_zookeeper_connection.sql b/tests/queries/0_stateless/02735_system_zookeeper_connection.sql index 863d90e1654..f999da51225 100644 --- a/tests/queries/0_stateless/02735_system_zookeeper_connection.sql +++ b/tests/queries/0_stateless/02735_system_zookeeper_connection.sql @@ -9,7 +9,7 @@ ENGINE ReplicatedMergeTree('zookeeper2:/clickhouse/{database}/02731_zk_connectio ORDER BY tuple(); select name, host, port, index, is_expired, keeper_api_version, (connected_time between yesterday() and now()), - (abs(session_uptime_elapsed_seconds - zookeeperSessionUptime()) < 10) + (abs(session_uptime_elapsed_seconds - zookeeperSessionUptime()) < 10), enabled_feature_flags from system.zookeeper_connection where name='default'; -- keeper_api_version will by 0 for auxiliary_zookeeper2, because we fail to get /api_version due to chroot From eff1cc0e5df2af76b5687fce03901686e1a8360c Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 28 Jun 2023 10:20:33 +0200 Subject: [PATCH 0889/1997] Remove unused errorcode --- src/Coordination/KeeperFeatureFlags.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Coordination/KeeperFeatureFlags.cpp b/src/Coordination/KeeperFeatureFlags.cpp index 3aff87bcea9..d0cd1c86b55 100644 --- a/src/Coordination/KeeperFeatureFlags.cpp +++ b/src/Coordination/KeeperFeatureFlags.cpp @@ -6,11 +6,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - namespace { From 68ac4d8cc934d4e9483b5257e9ffbdb84b92c709 Mon Sep 17 00:00:00 2001 From: xuelei Date: Wed, 28 Jun 2023 16:31:57 +0800 Subject: [PATCH 0890/1997] fix storage policy prompt display error --- src/Disks/StoragePolicy.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index f4be8b8fe86..02789132e55 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -302,7 +302,7 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & volume : getVolumes()) { if (!new_volume_names.contains(volume->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of old one", backQuote(name)); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of old one", backQuote(new_storage_policy->getName())); std::unordered_set new_disk_names; for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->getDisks()) @@ -310,7 +310,7 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & disk : volume->getDisks()) if (!new_disk_names.contains(disk->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of old one", backQuote(name)); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of old one", backQuote(new_storage_policy->getName())); } } From 04a08b47f3ca5d250600775adf1c307c09af929b Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 27 Jun 2023 22:25:56 +0200 Subject: [PATCH 0891/1997] Reorganize installation of clickhouse to have separated layers --- docker/server/Dockerfile.ubuntu | 46 +++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index f393b98cfe6..abb3f387330 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -11,14 +11,15 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list && apt-get update \ && apt-get upgrade -yq \ && apt-get install --yes --no-install-recommends \ - apt-transport-https \ ca-certificates \ - dirmngr \ - gnupg2 \ - wget \ locales \ tzdata \ - && apt-get clean + wget \ + && apt-get clean \ + && rm -rf \ + /var/lib/apt/lists/* \ + /var/cache/debconf \ + /tmp/* ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" @@ -43,7 +44,8 @@ ARG single_binary_location_url="" ARG TARGETARCH -RUN arch=${TARGETARCH:-amd64} \ +# install from a web location with deb packages +RUN arch="${TARGETARCH:-amd64}" \ && if [ -n "${deb_location_url}" ]; then \ echo "installing from custom url with deb packages: ${deb_location_url}" \ rm -rf /tmp/clickhouse_debs \ @@ -54,15 +56,27 @@ RUN arch=${TARGETARCH:-amd64} \ || exit 1 \ ; done \ && dpkg -i /tmp/clickhouse_debs/*.deb ; \ - elif [ -n "${single_binary_location_url}" ]; then \ + fi + +# install from a single binary +RUN if [ -n "${single_binary_location_url}" ]; then \ echo "installing from single binary url: ${single_binary_location_url}" \ && rm -rf /tmp/clickhouse_binary \ && mkdir -p /tmp/clickhouse_binary \ && wget --progress=bar:force:noscroll "${single_binary_location_url}" -O /tmp/clickhouse_binary/clickhouse \ && chmod +x /tmp/clickhouse_binary/clickhouse \ && /tmp/clickhouse_binary/clickhouse install --user "clickhouse" --group "clickhouse" ; \ - else \ - mkdir -p /etc/apt/sources.list.d \ + fi + +# A fallback to installation from ClickHouse repository +RUN if ! clickhouse local -q "SELECT ''" > /dev/null; then \ + apt-get update \ + && apt-get install --yes --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + dirmngr \ + gnupg2 \ + && mkdir -p /etc/apt/sources.list.d \ && apt-key adv --keyserver keyserver.ubuntu.com --recv 8919F6BD2B48D754 \ && echo ${REPOSITORY} > /etc/apt/sources.list.d/clickhouse.list \ && echo "installing from repository: ${REPOSITORY}" \ @@ -72,20 +86,20 @@ RUN arch=${TARGETARCH:-amd64} \ packages="${packages} ${package}=${VERSION}" \ ; done \ && apt-get install --allow-unauthenticated --yes --no-install-recommends ${packages} || exit 1 \ - ; fi \ - && clickhouse-local -q 'SELECT * FROM system.build_options' \ && rm -rf \ /var/lib/apt/lists/* \ /var/cache/debconf \ /tmp/* \ - && mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \ - && chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client - -RUN apt-get autoremove --purge -yq libksba8 && \ - apt-get autoremove -yq + && apt-get autoremove --purge -yq libksba8 \ + && apt-get autoremove -yq \ + ; fi +# post install # we need to allow "others" access to clickhouse folder, because docker container # can be started with arbitrary uid (openshift usecase) +RUN clickhouse-local -q 'SELECT * FROM system.build_options' \ + && mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \ + && chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client RUN locale-gen en_US.UTF-8 ENV LANG en_US.UTF-8 From 6145baade8dad30207935c81a844fadd618cb912 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 27 Jun 2023 22:27:33 +0200 Subject: [PATCH 0892/1997] Upgrade OS only once The ubuntu images are updated twice a month, it makes sense to do upgrade https://github.com/docker-library/official-images/commits/master/library/ubuntu --- docker/server/Dockerfile.ubuntu | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index abb3f387330..a563149ba8e 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -81,7 +81,6 @@ RUN if ! clickhouse local -q "SELECT ''" > /dev/null; then \ && echo ${REPOSITORY} > /etc/apt/sources.list.d/clickhouse.list \ && echo "installing from repository: ${REPOSITORY}" \ && apt-get update \ - && apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \ && for package in ${PACKAGES}; do \ packages="${packages} ${package}=${VERSION}" \ ; done \ From a2c4546c00f181ddc11a59c9e1b259819432e3c8 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 27 Jun 2023 22:53:30 +0200 Subject: [PATCH 0893/1997] Downgrade the base image to address issues on old dockerd --- docker/server/Dockerfile.ubuntu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index a563149ba8e..e2347945cf6 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -1,4 +1,4 @@ -FROM ubuntu:22.04 +FROM ubuntu:20.04 # see https://github.com/moby/moby/issues/4032#issuecomment-192327844 ARG DEBIAN_FRONTEND=noninteractive From 6d2b5166b930e77e53c9bb608d56d372c0ecd0b3 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 27 Jun 2023 23:10:56 +0200 Subject: [PATCH 0894/1997] Update golang version in builder --- docker/packager/binary/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index dd21c8552d3..e824161a688 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -89,7 +89,7 @@ RUN arch=${TARGETARCH:-amd64} \ && dpkg -i /tmp/nfpm.deb \ && rm /tmp/nfpm.deb -ARG GO_VERSION=1.19.5 +ARG GO_VERSION=1.19.10 # We need go for clickhouse-diagnostics RUN arch=${TARGETARCH:-amd64} \ && curl -Lo /tmp/go.tgz "https://go.dev/dl/go${GO_VERSION}.linux-${arch}.tar.gz" \ From 1cd5ae36bd89215d7c1af99d2a8b9193d0f79081 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 27 Jun 2023 23:20:47 +0200 Subject: [PATCH 0895/1997] Get rid of deprecated apt-key --- docker/server/Dockerfile.ubuntu | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index e2347945cf6..1bb0cfdc700 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -22,7 +22,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list /tmp/* ARG REPO_CHANNEL="stable" -ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" +ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" ARG VERSION="23.5.3.24" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" @@ -77,8 +77,13 @@ RUN if ! clickhouse local -q "SELECT ''" > /dev/null; then \ dirmngr \ gnupg2 \ && mkdir -p /etc/apt/sources.list.d \ - && apt-key adv --keyserver keyserver.ubuntu.com --recv 8919F6BD2B48D754 \ - && echo ${REPOSITORY} > /etc/apt/sources.list.d/clickhouse.list \ + && GNUPGHOME=$(mktemp -d) \ + && GNUPGHOME="$GNUPGHOME" gpg --no-default-keyring \ + --keyring /usr/share/keyrings/clickhouse-keyring.gpg \ + --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 8919F6BD2B48D754 \ + && rm -r "$GNUPGHOME" \ + && chmod +r /usr/share/keyrings/clickhouse-keyring.gpg \ + && echo "${REPOSITORY}" > /etc/apt/sources.list.d/clickhouse.list \ && echo "installing from repository: ${REPOSITORY}" \ && apt-get update \ && for package in ${PACKAGES}; do \ From 0e72c6dcc934a52184909e3c6b32b084678c185a Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 28 Jun 2023 10:33:07 +0200 Subject: [PATCH 0896/1997] fix --- .../0_stateless/01111_create_drop_replicated_db_stress.sh | 2 +- tests/queries/0_stateless/01293_optimize_final_force.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh index 770a0780ca2..4d341e5b8a3 100755 --- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh +++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh @@ -64,7 +64,7 @@ function alter_table() if [ -z "$table" ]; then continue; fi $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=0 -q \ "alter table $table update n = n + (select max(n) from merge(REGEXP('${CLICKHOUSE_DATABASE}.*'), '.*')) where 1 settings allow_nondeterministic_mutations=1" \ - 2>&1| grep -Fa "Exception: " | grep -Fv "Cannot enqueue query" | grep -Fv "ZooKeeper session expired" | grep -Fv UNKNOWN_DATABASE | grep -Fv UNKNOWN_TABLE | grep -Fv TABLE_IS_READ_ONLY | grep -Fv TABLE_IS_DROPPED + 2>&1| grep -Fa "Exception: " | grep -Fv "Cannot enqueue query" | grep -Fv "ZooKeeper session expired" | grep -Fv UNKNOWN_DATABASE | grep -Fv UNKNOWN_TABLE | grep -Fv TABLE_IS_READ_ONLY | grep -Fv TABLE_IS_DROPPED | grep -Fv "Error while executing table function merge" sleep 0.$RANDOM done } diff --git a/tests/queries/0_stateless/01293_optimize_final_force.sh b/tests/queries/0_stateless/01293_optimize_final_force.sh index eb3a2756899..9b9ed6272a1 100755 --- a/tests/queries/0_stateless/01293_optimize_final_force.sh +++ b/tests/queries/0_stateless/01293_optimize_final_force.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-debug, no-s3-storage +# Tags: no-fasttest, long, no-debug, no-s3-storage # This test is too slow with S3 storage and debug modes. CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) From 7e6d606b1c6b5277b1420a509cf841d1c1120ffc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Jun 2023 10:41:39 +0200 Subject: [PATCH 0897/1997] Remove bad code and fix a bug --- src/Common/QueryProfiler.cpp | 9 +++ src/Common/StringSearcher.h | 89 +---------------------- src/Common/Volnitsky.h | 3 - src/Functions/HasTokenImpl.h | 45 ++++++++---- src/Functions/hasToken.cpp | 5 +- src/Functions/hasTokenCaseInsensitive.cpp | 5 +- 6 files changed, 48 insertions(+), 108 deletions(-) diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index 085c8fb8af4..313d4b77739 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -120,6 +120,15 @@ void Timer::createIfNecessary(UInt64 thread_id, int clock_type, int pause_signal throw Exception(ErrorCodes::CANNOT_CREATE_TIMER, "Failed to create thread timer. The function " "'timer_create' returned non-zero but didn't set errno. This is bug in your OS."); + /// For example, it cannot be created if the server is run under QEMU: + /// "Failed to create thread timer, errno: 11, strerror: Resource temporarily unavailable." + + /// You could accidentally run the server under QEMU without being aware, + /// if you use Docker image for a different architecture, + /// and you have the "binfmt-misc" kernel module, and "qemu-user" tools. + + /// Also, it cannot be created if the server has too many threads. + throwFromErrno("Failed to create thread timer", ErrorCodes::CANNOT_CREATE_TIMER); } timer_id.emplace(local_timer_id); diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h index 3ed192d05f3..b3065354f65 100644 --- a/src/Common/StringSearcher.h +++ b/src/Common/StringSearcher.h @@ -793,88 +793,6 @@ public: } }; - -// Searches for needle surrounded by token-separators. -// Separators are anything inside ASCII (0-128) and not alphanum. -// Any value outside of basic ASCII (>=128) is considered a non-separator symbol, hence UTF-8 strings -// should work just fine. But any Unicode whitespace is not considered a token separtor. -template -class TokenSearcher : public StringSearcherBase -{ - StringSearcher searcher; - size_t needle_size; - -public: - - template - requires (sizeof(CharT) == 1) - static bool isValidNeedle(const CharT * needle_, size_t needle_size_) - { - return std::none_of(needle_, needle_ + needle_size_, isTokenSeparator); - } - - template - requires (sizeof(CharT) == 1) - TokenSearcher(const CharT * needle_, size_t needle_size_) - : searcher(needle_, needle_size_) - , needle_size(needle_size_) - { - /// The caller is responsible for calling isValidNeedle() - chassert(isValidNeedle(needle_, needle_size_)); - } - - template - requires (sizeof(CharT) == 1) - ALWAYS_INLINE bool compare(const CharT * haystack, const CharT * haystack_end, const CharT * pos) const - { - // use searcher only if pos is in the beginning of token and pos + searcher.needle_size is end of token. - if (isToken(haystack, haystack_end, pos)) - return searcher.compare(haystack, haystack_end, pos); - - return false; - } - - template - requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const CharT * const haystack_end) const - { - // use searcher.search(), then verify that returned value is a token - // if it is not, skip it and re-run - - const auto * pos = haystack; - while (pos < haystack_end) - { - pos = searcher.search(pos, haystack_end); - if (pos == haystack_end || isToken(haystack, haystack_end, pos)) - return pos; - - // assuming that heendle does not contain any token separators. - pos += needle_size; - } - return haystack_end; - } - - template - requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, size_t haystack_size) const - { - return search(haystack, haystack + haystack_size); - } - - template - requires (sizeof(CharT) == 1) - ALWAYS_INLINE bool isToken(const CharT * haystack, const CharT * const haystack_end, const CharT* p) const - { - return (p == haystack || isTokenSeparator(*(p - 1))) - && (p + needle_size >= haystack_end || isTokenSeparator(*(p + needle_size))); - } - - ALWAYS_INLINE static bool isTokenSeparator(const uint8_t c) - { - return !(isAlphaNumericASCII(c) || !isASCII(c)); - } -}; - } using ASCIICaseSensitiveStringSearcher = impl::StringSearcher; @@ -882,9 +800,6 @@ using ASCIICaseInsensitiveStringSearcher = impl::StringSearcher; using UTF8CaseSensitiveStringSearcher = impl::StringSearcher; using UTF8CaseInsensitiveStringSearcher = impl::StringSearcher; -using ASCIICaseSensitiveTokenSearcher = impl::TokenSearcher; -using ASCIICaseInsensitiveTokenSearcher = impl::TokenSearcher; - /// Use only with short haystacks where cheap initialization is required. template struct StdLibASCIIStringSearcher @@ -906,11 +821,11 @@ struct StdLibASCIIStringSearcher if constexpr (CaseInsensitive) return std::search( haystack_start, haystack_end, needle_start, needle_end, - [](char c1, char c2) {return std::toupper(c1) == std::toupper(c2);}); + [](char c1, char c2) { return std::toupper(c1) == std::toupper(c2); }); else return std::search( haystack_start, haystack_end, needle_start, needle_end, - [](char c1, char c2) {return c1 == c2;}); + [](char c1, char c2) { return c1 == c2; }); } template diff --git a/src/Common/Volnitsky.h b/src/Common/Volnitsky.h index 8f9aa23a38a..3360c197984 100644 --- a/src/Common/Volnitsky.h +++ b/src/Common/Volnitsky.h @@ -730,9 +730,6 @@ using VolnitskyUTF8 = VolnitskyBase; /// ignores non-ASCII bytes using VolnitskyCaseInsensitiveUTF8 = VolnitskyBase; -using VolnitskyCaseSensitiveToken = VolnitskyBase; -using VolnitskyCaseInsensitiveToken = VolnitskyBase; - using MultiVolnitsky = MultiVolnitskyBase; using MultiVolnitskyUTF8 = MultiVolnitskyBase; using MultiVolnitskyCaseInsensitive = MultiVolnitskyBase; diff --git a/src/Functions/HasTokenImpl.h b/src/Functions/HasTokenImpl.h index 8cacdfff99d..fdec5fcb0b7 100644 --- a/src/Functions/HasTokenImpl.h +++ b/src/Functions/HasTokenImpl.h @@ -17,7 +17,7 @@ namespace ErrorCodes /** Token search the string, means that needle must be surrounded by some separator chars, like whitespace or puctuation. */ -template +template struct HasTokenImpl { using ResultType = UInt8; @@ -46,7 +46,7 @@ struct HasTokenImpl const UInt8 * const end = haystack_data.data() + haystack_data.size(); const UInt8 * pos = begin; - if (!ASCIICaseSensitiveTokenSearcher::isValidNeedle(pattern.data(), pattern.size())) + if (!std::none_of(pattern.begin(), pattern.end(), isTokenSeparator)) { if (res_null) { @@ -58,7 +58,8 @@ struct HasTokenImpl throw Exception(ErrorCodes::BAD_ARGUMENTS, "Needle must not contain whitespace or separator characters"); } - TokenSearcher searcher(pattern.data(), pattern.size(), end - pos); + size_t pattern_size = pattern.size(); + Searcher searcher(pattern.data(), pattern_size, end - pos); if (res_null) std::ranges::fill(res_null->getData(), false); @@ -67,21 +68,31 @@ struct HasTokenImpl /// We will search for the next occurrence in all rows at once. while (pos < end && end != (pos = searcher.search(pos, end - pos))) { - /// Let's determine which index it refers to. - while (begin + haystack_offsets[i] <= pos) + /// The found substring is a token + if ((pos == begin || isTokenSeparator(pos[-1])) + && (pos + pattern_size == end || isTokenSeparator(pos[pattern_size]))) { - res[i] = negate; + /// Let's determine which index it refers to. + while (begin + haystack_offsets[i] <= pos) + { + res[i] = negate; + ++i; + } + + /// We check that the entry does not pass through the boundaries of strings. + if (pos + pattern.size() < begin + haystack_offsets[i]) + res[i] = !negate; + else + res[i] = negate; + + pos = begin + haystack_offsets[i]; ++i; } - - /// We check that the entry does not pass through the boundaries of strings. - if (pos + pattern.size() < begin + haystack_offsets[i]) - res[i] = !negate; else - res[i] = negate; - - pos = begin + haystack_offsets[i]; - ++i; + { + /// Not a token. Jump over it. + pos += pattern_size; + } } /// Tail, in which there can be no substring. @@ -113,6 +124,12 @@ struct HasTokenImpl { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name); } + +private: + static bool isTokenSeparator(UInt8 c) + { + return isASCII(c) && !isAlphaNumericASCII(c); + } }; } diff --git a/src/Functions/hasToken.cpp b/src/Functions/hasToken.cpp index b90750ea233..fa41abf2641 100644 --- a/src/Functions/hasToken.cpp +++ b/src/Functions/hasToken.cpp @@ -6,6 +6,7 @@ namespace DB { + struct NameHasToken { static constexpr auto name = "hasToken"; @@ -17,9 +18,9 @@ struct NameHasTokenOrNull }; using FunctionHasToken - = FunctionsStringSearch>; + = FunctionsStringSearch>; using FunctionHasTokenOrNull - = FunctionsStringSearch, ExecutionErrorPolicy::Null>; + = FunctionsStringSearch, ExecutionErrorPolicy::Null>; REGISTER_FUNCTION(HasToken) { diff --git a/src/Functions/hasTokenCaseInsensitive.cpp b/src/Functions/hasTokenCaseInsensitive.cpp index d7381e336b5..32675b9384d 100644 --- a/src/Functions/hasTokenCaseInsensitive.cpp +++ b/src/Functions/hasTokenCaseInsensitive.cpp @@ -6,6 +6,7 @@ namespace DB { + struct NameHasTokenCaseInsensitive { static constexpr auto name = "hasTokenCaseInsensitive"; @@ -17,9 +18,9 @@ struct NameHasTokenCaseInsensitiveOrNull }; using FunctionHasTokenCaseInsensitive - = FunctionsStringSearch>; + = FunctionsStringSearch>; using FunctionHasTokenCaseInsensitiveOrNull - = FunctionsStringSearch, ExecutionErrorPolicy::Null>; + = FunctionsStringSearch, ExecutionErrorPolicy::Null>; REGISTER_FUNCTION(HasTokenCaseInsensitive) { From 67a7bdbb3a1c8fa2341e1627261d311cba1308a7 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 28 Jun 2023 10:45:07 +0200 Subject: [PATCH 0898/1997] Rollback note about 22.04 in #50958 --- docker/server/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/server/README.md b/docker/server/README.md index 18dce492123..67646a262f5 100644 --- a/docker/server/README.md +++ b/docker/server/README.md @@ -20,7 +20,6 @@ For more information and documentation see https://clickhouse.com/. - The amd64 image requires support for [SSE3 instructions](https://en.wikipedia.org/wiki/SSE3). Virtually all x86 CPUs after 2005 support SSE3. - The arm64 image requires support for the [ARMv8.2-A architecture](https://en.wikipedia.org/wiki/AArch64#ARMv8.2-A). Most ARM CPUs after 2017 support ARMv8.2-A. A notable exception is Raspberry Pi 4 from 2019 whose CPU only supports ARMv8.0-A. -- Since the Clickhouse 23.3 Ubuntu image started using `ubuntu:22.04` as its base image, it requires docker version >= `20.10.10`, or use `docker run -- privileged` instead. Alternatively, try the Clickhouse Alpine image. ## How to use this image From 1596b09dc2888690acafa397936d0d7051e95fba Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 28 Jun 2023 10:51:15 +0200 Subject: [PATCH 0899/1997] add missing finalize calls --- programs/format/Format.cpp | 5 +++-- src/Client/ClientBase.cpp | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 392ecefac0f..43c66a32302 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -151,6 +151,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv) WriteBufferFromFileDescriptor out(STDOUT_FILENO); obfuscateQueries(query, out, obfuscated_words_map, used_nouns, hash_func, is_known_identifier); + out.finalize(); } else { @@ -175,7 +176,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv) { WriteBufferFromOStream res_buf(std::cout, 4096); formatAST(*res, res_buf, hilite, oneline); - res_buf.next(); + res_buf.finalize(); if (multiple) std::cout << "\n;\n"; std::cout << std::endl; @@ -199,7 +200,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv) res_cout.write(*s_pos++); } - res_cout.next(); + res_cout.finalize(); if (multiple) std::cout << " \\\n;\n"; std::cout << std::endl; diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 34b3b1e228a..cf1c2ed8779 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -362,7 +362,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu std::cout << std::endl; WriteBufferFromOStream res_buf(std::cout, 4096); formatAST(*res, res_buf); - res_buf.next(); + res_buf.finalize(); std::cout << std::endl << std::endl; } From a2c9e26b231d15a96818440c001897daa6e4004d Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 28 Jun 2023 09:31:44 +0000 Subject: [PATCH 0900/1997] Add peak memory usage (prototype) --- src/Client/ClientBase.cpp | 2 ++ src/Common/MemoryTracker.h | 1 + src/Common/ProgressIndication.cpp | 10 ++++++---- src/Common/ProgressIndication.h | 2 ++ src/Interpreters/ProfileEventsExt.cpp | 12 ++++++++++-- src/Interpreters/ProfileEventsExt.h | 1 + 6 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 34b3b1e228a..ecc873da068 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1189,6 +1189,8 @@ void ClientBase::onProfileEvents(Block & block) thread_times[host_name].system_ms = value; else if (event_name == MemoryTracker::USAGE_EVENT_NAME) thread_times[host_name].memory_usage = value; + else if (event_name == MemoryTracker::PEAK_USAGE_EVENT_NAME) + thread_times[host_name].peak_memory_usage = value; } progress_indication.updateThreadEventData(thread_times); diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index 4e29d40c953..3ea1ea8702c 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -95,6 +95,7 @@ private: public: static constexpr auto USAGE_EVENT_NAME = "MemoryTrackerUsage"; + static constexpr auto PEAK_USAGE_EVENT_NAME = "MemoryTrackerPeakUsage"; explicit MemoryTracker(VariableContext level_ = VariableContext::Thread); explicit MemoryTracker(MemoryTracker * parent_, VariableContext level_ = VariableContext::Thread); diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp index 61b60060430..d9564f0946e 100644 --- a/src/Common/ProgressIndication.cpp +++ b/src/Common/ProgressIndication.cpp @@ -83,7 +83,7 @@ ProgressIndication::MemoryUsage ProgressIndication::getMemoryUsage() const [](MemoryUsage const & acc, auto const & host_data) { UInt64 host_usage = host_data.second.memory_usage; - return MemoryUsage{.total = acc.total + host_usage, .max = std::max(acc.max, host_usage)}; + return MemoryUsage{.total = acc.total + host_usage, .max = std::max(acc.max, host_usage), .peak = std::max(acc.peak, host_data.second.peak_memory_usage)}; }); } @@ -99,8 +99,8 @@ void ProgressIndication::writeFinalProgress() if (elapsed_ns) std::cout << " (" << formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., " << formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.)"; - else - std::cout << ". "; + auto peak_memory_usage = getMemoryUsage().peak; + std::cout << ".\nPeak memory usage (for query) " << formatReadableSizeWithDecimalSuffix(peak_memory_usage) << "."; } void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message) @@ -152,7 +152,7 @@ void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message) std::string profiling_msg; double cpu_usage = getCPUUsage(); - auto [memory_usage, max_host_usage] = getMemoryUsage(); + auto [memory_usage, max_host_usage, peak_usage] = getMemoryUsage(); if (cpu_usage > 0 || memory_usage > 0) { @@ -166,6 +166,8 @@ void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message) if (memory_usage > 0) profiling_msg_builder << ", " << formatReadableSizeWithDecimalSuffix(memory_usage) << " RAM"; + // profiling_msg_builder << ", " << formatReadableSizeWithDecimalSuffix(memory_usage) << ", " + // << formatReadableSizeWithDecimalSuffix(peak_usage) << " Peak RAM"; if (max_host_usage < memory_usage) profiling_msg_builder << ", " << formatReadableSizeWithDecimalSuffix(max_host_usage) << " max/host"; diff --git a/src/Common/ProgressIndication.h b/src/Common/ProgressIndication.h index af5d69c0255..e3c73e8e7e5 100644 --- a/src/Common/ProgressIndication.h +++ b/src/Common/ProgressIndication.h @@ -22,6 +22,7 @@ struct ThreadEventData UInt64 user_ms = 0; UInt64 system_ms = 0; UInt64 memory_usage = 0; + UInt64 peak_memory_usage = 0; }; using HostToTimesMap = std::unordered_map; @@ -64,6 +65,7 @@ private: { UInt64 total = 0; UInt64 max = 0; + UInt64 peak = 0; }; MemoryUsage getMemoryUsage() const; diff --git a/src/Interpreters/ProfileEventsExt.cpp b/src/Interpreters/ProfileEventsExt.cpp index bf8d060bd3c..bd421ae8e33 100644 --- a/src/Interpreters/ProfileEventsExt.cpp +++ b/src/Interpreters/ProfileEventsExt.cpp @@ -86,9 +86,16 @@ static void dumpMemoryTracker(ProfileEventsSnapshot const & snapshot, DB::Mutabl columns[i++]->insert(static_cast(snapshot.current_time)); columns[i++]->insert(static_cast(snapshot.thread_id)); columns[i++]->insert(Type::GAUGE); - columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME)); - columns[i++]->insert(snapshot.memory_usage); + columns[i]->insert(snapshot.memory_usage); + + i = 0; + columns[i++]->insertData(host_name.data(), host_name.size()); + columns[i++]->insert(static_cast(snapshot.current_time)); + columns[i++]->insert(static_cast(snapshot.thread_id)); + columns[i++]->insert(Type::GAUGE); + columns[i++]->insertData(MemoryTracker::PEAK_USAGE_EVENT_NAME, strlen(MemoryTracker::PEAK_USAGE_EVENT_NAME)); + columns[i]->insert(snapshot.peak_memory_usage); } void getProfileEvents( @@ -121,6 +128,7 @@ void getProfileEvents( group_snapshot.thread_id = 0; group_snapshot.current_time = time(nullptr); group_snapshot.memory_usage = thread_group->memory_tracker.get(); + group_snapshot.peak_memory_usage = thread_group->memory_tracker.getPeak(); auto group_counters = thread_group->performance_counters.getPartiallyAtomicSnapshot(); auto prev_group_snapshot = last_sent_snapshots.find(0); group_snapshot.counters = diff --git a/src/Interpreters/ProfileEventsExt.h b/src/Interpreters/ProfileEventsExt.h index 7d9fc512d15..cc338530510 100644 --- a/src/Interpreters/ProfileEventsExt.h +++ b/src/Interpreters/ProfileEventsExt.h @@ -16,6 +16,7 @@ struct ProfileEventsSnapshot UInt64 thread_id; CountersIncrement counters; Int64 memory_usage; + Int64 peak_memory_usage; time_t current_time; }; From e3189e29f7d087cba591c2dc3dc9ae0cb6ce9df0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Jun 2023 11:56:11 +0200 Subject: [PATCH 0901/1997] Allow running in a stripped chroot --- src/Client/ClientBase.cpp | 4 +++- src/Client/ConnectionParameters.cpp | 10 +++++++++- src/Common/checkStackSize.cpp | 16 ++++++++++++++-- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 34b3b1e228a..36714ac762d 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2297,7 +2297,9 @@ void ClientBase::runInteractive() catch (const ErrnoException & e) { if (e.getErrno() != EEXIST) - throw; + { + std::cerr << getCurrentExceptionMessage(false) << '\n'; + } } } diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index c47d217d432..f6630a06939 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -60,7 +60,15 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati quota_key = config.getString("quota_key", ""); /// By default compression is disabled if address looks like localhost. - compression = config.getBool("compression", !isLocalAddress(DNSResolver::instance().resolveHost(host))) + + /// Avoid DNS request if the host is "localhost". + /// If ClickHouse is run under QEMU-user with a binary for a different architecture, + /// and there are all listed startup dependency shared libraries available, but not the runtime dependencies of glibc, + /// the glibc cannot open "plugins" for DNS resolving, and the DNS resolution does not work. + /// At the same time, I want clickhouse-local to always work, regardless. + /// TODO: get rid of glibc, or replace getaddrinfo to c-ares. + + compression = config.getBool("compression", host != "localhost" && !isLocalAddress(DNSResolver::instance().resolveHost(host))) ? Protocol::Compression::Enable : Protocol::Compression::Disable; timeouts = ConnectionTimeouts( diff --git a/src/Common/checkStackSize.cpp b/src/Common/checkStackSize.cpp index 67d163938b4..8847d37df3a 100644 --- a/src/Common/checkStackSize.cpp +++ b/src/Common/checkStackSize.cpp @@ -27,7 +27,7 @@ static thread_local size_t max_stack_size = 0; * @param out_address - if not nullptr, here the address of the stack will be written. * @return stack size */ -size_t getStackSize(void ** out_address) +static size_t getStackSize(void ** out_address) { using namespace DB; @@ -54,7 +54,15 @@ size_t getStackSize(void ** out_address) throwFromErrno("Cannot pthread_attr_get_np", ErrorCodes::CANNOT_PTHREAD_ATTR); # else if (0 != pthread_getattr_np(pthread_self(), &attr)) - throwFromErrno("Cannot pthread_getattr_np", ErrorCodes::CANNOT_PTHREAD_ATTR); + { + if (errno == ENOENT) + { + /// Most likely procfs is not mounted. + return 0; + } + else + throwFromErrno("Cannot pthread_getattr_np", ErrorCodes::CANNOT_PTHREAD_ATTR); + } # endif SCOPE_EXIT({ pthread_attr_destroy(&attr); }); @@ -83,6 +91,10 @@ __attribute__((__weak__)) void checkStackSize() if (!stack_address) max_stack_size = getStackSize(&stack_address); + /// The check is impossible. + if (!max_stack_size) + return; + const void * frame_address = __builtin_frame_address(0); uintptr_t int_frame_address = reinterpret_cast(frame_address); uintptr_t int_stack_address = reinterpret_cast(stack_address); From 0c1f24db77354c7982eaac60618da3b15589e4a4 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 28 Jun 2023 12:13:16 +0200 Subject: [PATCH 0902/1997] Add a comment (#51517) * Add a comment * Update DatabaseCatalog.cpp --- src/Interpreters/DatabaseCatalog.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 40ef2bd4775..4cb2f6e3b3d 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -217,6 +217,8 @@ void DatabaseCatalog::shutdownImpl() /// We still hold "databases" (instead of std::move) for Buffer tables to flush data correctly. /// Delay shutdown of temporary and system databases. They will be shutdown last. + /// Because some databases might use them until their shutdown is called, but calling shutdown + /// on temporary database means clearing its set of tables, which will lead to unnecessary errors like "table not found". std::vector databases_with_delayed_shutdown; for (auto & database : current_databases) { From 0320ab4a7056ac099145648504505cb1c7225858 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 28 Jun 2023 12:16:16 +0200 Subject: [PATCH 0903/1997] Try making Keeper in `DatabaseReplicated` tests more stable (#51473) * Try making Keeper in DatabaseReplicated less flaky * lower log level to debug * Revert back to information --- tests/config/config.d/database_replicated.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/config/config.d/database_replicated.xml b/tests/config/config.d/database_replicated.xml index 9a405f85908..2504a7ca526 100644 --- a/tests/config/config.d/database_replicated.xml +++ b/tests/config/config.d/database_replicated.xml @@ -40,7 +40,7 @@ 10000 30000 1000 - 4000 + 2000 5000 information false From 451694d8b6ed7630842020ca132a9d7548352cad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 28 Jun 2023 10:24:54 +0000 Subject: [PATCH 0904/1997] Make `test_ssl_cert_authentication` similar to `test_tlvs1_3` --- .../test_ssl_cert_authentication/test.py | 92 +++++++++++++------ 1 file changed, 62 insertions(+), 30 deletions(-) diff --git a/tests/integration/test_ssl_cert_authentication/test.py b/tests/integration/test_ssl_cert_authentication/test.py index b05a6acc16b..91faf8b0ce3 100644 --- a/tests/integration/test_ssl_cert_authentication/test.py +++ b/tests/integration/test_ssl_cert_authentication/test.py @@ -2,10 +2,10 @@ import pytest from helpers.client import Client from helpers.cluster import ClickHouseCluster from helpers.ssl_context import WrapSSLContextWithSNI +import urllib.request, urllib.parse import ssl import os.path from os import remove -import urllib3 # The test cluster is configured with certificate for that host name, see 'server-ext.cnf'. @@ -14,6 +14,7 @@ SSL_HOST = "integration-tests.clickhouse.com" HTTPS_PORT = 8443 # It's important for the node to work at this IP because 'server-cert.pem' requires that (see server-ext.cnf). SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +MAX_RETRY = 5 cluster = ClickHouseCluster(__file__) instance = cluster.add_instance( @@ -164,17 +165,19 @@ def get_ssl_context(cert_name): def execute_query_https( query, user, enable_ssl_auth=True, cert_name=None, password=None ): - url = f"https://{instance.ip_address}:{HTTPS_PORT}/?query={query}" - headers = {"X-ClickHouse-User": user} + url = ( + f"https://{instance.ip_address}:{HTTPS_PORT}/?query={urllib.parse.quote(query)}" + ) + request = urllib.request.Request(url) + request.add_header("X-ClickHouse-User", user) if enable_ssl_auth: - headers["X-ClickHouse-SSL-Certificate-Auth"] = "on" + request.add_header("X-ClickHouse-SSL-Certificate-Auth", "on") if password: - headers["X-ClickHouse-Key"] = password - http_client = urllib3.PoolManager(ssl_context=get_ssl_context(cert_name)) - response = http_client.request("GET", url, headers=headers) - if response.status != 200: - raise Exception(response.status) - return response.data.decode("utf-8") + request.add_header("X-ClickHouse-Key", password) + response = urllib.request.urlopen( + request, context=get_ssl_context(cert_name) + ).read() + return response.decode("utf-8") def test_https(): @@ -198,10 +201,18 @@ def test_https_wrong_cert(): execute_query_https("SELECT currentUser()", user="john", cert_name="client2") assert "403" in str(err.value) + count = 0 # Wrong certificate: self-signed certificate. - with pytest.raises(Exception) as err: - execute_query_https("SELECT currentUser()", user="john", cert_name="wrong") - assert "unknown ca" in str(err.value) + while count <= MAX_RETRY: + with pytest.raises(Exception) as err: + execute_query_https("SELECT currentUser()", user="john", cert_name="wrong") + err_str = str(err.value) + if count < MAX_RETRY and "Broken pipe" in err_str: + count = count + 1 + logging.warning(f"Failed attempt with wrong cert, err: {err_str}") + continue + assert "unknown ca" in err_str + break # No certificate. with pytest.raises(Exception) as err: @@ -291,24 +302,45 @@ def test_https_non_ssl_auth(): == "jane\n" ) + count = 0 # However if we send a certificate it must not be wrong. - with pytest.raises(Exception) as err: - execute_query_https( - "SELECT currentUser()", - user="peter", - enable_ssl_auth=False, - cert_name="wrong", - ) - assert "unknown ca" in str(err.value) - with pytest.raises(Exception) as err: - execute_query_https( - "SELECT currentUser()", - user="jane", - enable_ssl_auth=False, - password="qwe123", - cert_name="wrong", - ) - assert "unknown ca" in str(err.value) + while count <= MAX_RETRY: + with pytest.raises(Exception) as err: + execute_query_https( + "SELECT currentUser()", + user="peter", + enable_ssl_auth=False, + cert_name="wrong", + ) + err_str = str(err.value) + if count < MAX_RETRY and "Broken pipe" in err_str: + count = count + 1 + logging.warning( + f"Failed attempt with wrong cert, user: peter, err: {err_str}" + ) + continue + assert "unknown ca" in err_str + break + + count = 0 + while count <= MAX_RETRY: + with pytest.raises(Exception) as err: + execute_query_https( + "SELECT currentUser()", + user="jane", + enable_ssl_auth=False, + password="qwe123", + cert_name="wrong", + ) + err_str = str(err.value) + if count < MAX_RETRY and "Broken pipe" in err_str: + count = count + 1 + logging.warning( + f"Failed attempt with wrong cert, user: jane, err: {err_str}" + ) + continue + assert "unknown ca" in err_str + break def test_create_user(): From b8b0fd3abfee08db6b31b393fb828915940ae13d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 28 Jun 2023 12:30:15 +0200 Subject: [PATCH 0905/1997] Slightly better code around packets for parallel replicas (#51451) --- src/Client/Connection.cpp | 8 +--- src/Client/IServerConnection.h | 10 +++-- src/QueryPipeline/RemoteQueryExecutor.cpp | 6 ++- src/Storages/MergeTree/MergeTreeReadPool.cpp | 21 ++++++---- src/Storages/MergeTree/MergeTreeReadPool.h | 18 ++++---- .../ParallelReplicasReadingCoordinator.cpp | 2 - src/Storages/MergeTree/RequestResponse.cpp | 26 +++++++++++- src/Storages/MergeTree/RequestResponse.h | 42 +++++++++++++++++-- 8 files changed, 98 insertions(+), 35 deletions(-) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index be226bfd9dd..cd102f46ffe 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -1175,16 +1175,12 @@ ProfileInfo Connection::receiveProfileInfo() const ParallelReadRequest Connection::receiveParallelReadRequest() const { - ParallelReadRequest request; - request.deserialize(*in); - return request; + return ParallelReadRequest::deserialize(*in); } InitialAllRangesAnnouncement Connection::receiveInitialParallelReadAnnounecement() const { - InitialAllRangesAnnouncement announcement; - announcement.deserialize(*in); - return announcement; + return InitialAllRangesAnnouncement::deserialize(*in); } diff --git a/src/Client/IServerConnection.h b/src/Client/IServerConnection.h index 52382ff9d45..a0c029c79fb 100644 --- a/src/Client/IServerConnection.h +++ b/src/Client/IServerConnection.h @@ -16,6 +16,10 @@ #include +#include +#include +#include +#include namespace DB { @@ -34,9 +38,9 @@ struct Packet ProfileInfo profile_info; std::vector part_uuids; - InitialAllRangesAnnouncement announcement; - ParallelReadRequest request; - ParallelReadResponse response; + /// The part of parallel replicas protocol + std::optional announcement; + std::optional request; std::string server_timezone; diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index 233dfe13339..cd6f65b7b43 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -434,11 +434,13 @@ RemoteQueryExecutor::ReadResult RemoteQueryExecutor::processPacket(Packet packet switch (packet.type) { case Protocol::Server::MergeTreeReadTaskRequest: - processMergeTreeReadTaskRequest(packet.request); + chassert(packet.request.has_value()); + processMergeTreeReadTaskRequest(packet.request.value()); return ReadResult(ReadResult::Type::ParallelReplicasToken); case Protocol::Server::MergeTreeAllRangesAnnounecement: - processMergeTreeInitialReadAnnounecement(packet.announcement); + chassert(packet.announcement.has_value()); + processMergeTreeInitialReadAnnounecement(packet.announcement.value()); return ReadResult(ReadResult::Type::ParallelReplicasToken); case Protocol::Server::ReadTaskRequest: diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index ba8c2c6385f..2ab90189f9d 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace ProfileEvents @@ -433,8 +434,12 @@ MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicas::getTask(size_t thread) if (buffered_ranges.empty()) { - auto result = extension.callback(ParallelReadRequest{ - .replica_num = extension.number_of_current_replica, .min_number_of_marks = min_marks_for_concurrent_read * threads}); + auto result = extension.callback(ParallelReadRequest( + CoordinationMode::Default, + extension.number_of_current_replica, + min_marks_for_concurrent_read * threads, + /// For Default coordination mode we don't need to pass part names. + RangesInDataPartsDescription{})); if (!result || result->finish) { @@ -529,12 +534,12 @@ MarkRanges MergeTreeInOrderReadPoolParallelReplicas::getNewTask(RangesInDataPart if (no_more_tasks) return {}; - auto response = extension.callback(ParallelReadRequest{ - .mode = mode, - .replica_num = extension.number_of_current_replica, - .min_number_of_marks = min_marks_for_concurrent_read * request.size(), - .description = request, - }); + auto response = extension.callback(ParallelReadRequest( + mode, + extension.number_of_current_replica, + min_marks_for_concurrent_read * request.size(), + request + )); if (!response || response->description.empty() || response->finish) { diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index 21273904e00..68d5438cb3d 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -193,10 +193,11 @@ public: predict_block_size_bytes, column_names, virtual_column_names, prewhere_info, actions_settings, reader_settings, per_part_params); - extension.all_callback({ - .description = parts_ranges.getDescriptions(), - .replica_num = extension.number_of_current_replica - }); + extension.all_callback(InitialAllRangesAnnouncement( + CoordinationMode::Default, + parts_ranges.getDescriptions(), + extension.number_of_current_replica + )); } ~MergeTreeReadPoolParallelReplicas() override; @@ -253,10 +254,11 @@ public: for (const auto & part : parts_ranges) buffered_tasks.push_back({part.data_part->info, MarkRanges{}}); - extension.all_callback({ - .description = parts_ranges.getDescriptions(), - .replica_num = extension.number_of_current_replica - }); + extension.all_callback(InitialAllRangesAnnouncement( + mode, + parts_ranges.getDescriptions(), + extension.number_of_current_replica + )); } MarkRanges getNewTask(RangesInDataPartDescription description); diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 2814d13cff0..57cd91cc995 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -102,7 +102,6 @@ public: explicit DefaultCoordinator(size_t replicas_count_) : ParallelReplicasReadingCoordinator::ImplInterface(replicas_count_) - , announcements(replicas_count_) , reading_state(replicas_count_) { } @@ -119,7 +118,6 @@ public: PartitionToBlockRanges partitions; size_t sent_initial_requests{0}; - std::vector announcements; Parts all_parts_to_read; /// Contains only parts which we haven't started to read from diff --git a/src/Storages/MergeTree/RequestResponse.cpp b/src/Storages/MergeTree/RequestResponse.cpp index 05930d5a4c4..2ce0e20dcd2 100644 --- a/src/Storages/MergeTree/RequestResponse.cpp +++ b/src/Storages/MergeTree/RequestResponse.cpp @@ -51,7 +51,7 @@ String ParallelReadRequest::describe() const return result; } -void ParallelReadRequest::deserialize(ReadBuffer & in) +ParallelReadRequest ParallelReadRequest::deserialize(ReadBuffer & in) { UInt64 version; readIntBinary(version, in); @@ -60,12 +60,24 @@ void ParallelReadRequest::deserialize(ReadBuffer & in) "from replicas differ. Got: {}, supported version: {}", version, DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION); + CoordinationMode mode; + size_t replica_num; + size_t min_number_of_marks; + RangesInDataPartsDescription description; + uint8_t mode_candidate; readIntBinary(mode_candidate, in); mode = validateAndGet(mode_candidate); readIntBinary(replica_num, in); readIntBinary(min_number_of_marks, in); description.deserialize(in); + + return ParallelReadRequest( + mode, + replica_num, + min_number_of_marks, + std::move(description) + ); } void ParallelReadRequest::merge(ParallelReadRequest & other) @@ -125,7 +137,7 @@ String InitialAllRangesAnnouncement::describe() return result; } -void InitialAllRangesAnnouncement::deserialize(ReadBuffer & in) +InitialAllRangesAnnouncement InitialAllRangesAnnouncement::deserialize(ReadBuffer & in) { UInt64 version; readIntBinary(version, in); @@ -134,11 +146,21 @@ void InitialAllRangesAnnouncement::deserialize(ReadBuffer & in) "from replicas differ. Got: {}, supported version: {}", version, DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION); + CoordinationMode mode; + RangesInDataPartsDescription description; + size_t replica_num; + uint8_t mode_candidate; readIntBinary(mode_candidate, in); mode = validateAndGet(mode_candidate); description.deserialize(in); readIntBinary(replica_num, in); + + return InitialAllRangesAnnouncement { + mode, + description, + replica_num + }; } } diff --git a/src/Storages/MergeTree/RequestResponse.h b/src/Storages/MergeTree/RequestResponse.h index 7e5563c0553..3a5bfde6c20 100644 --- a/src/Storages/MergeTree/RequestResponse.h +++ b/src/Storages/MergeTree/RequestResponse.h @@ -40,21 +40,40 @@ struct PartBlockRange } }; +/// ParallelReadRequest is used by remote replicas during parallel read +/// to signal an initiator that they need more marks to read. struct ParallelReadRequest { + /// No default constructor, you must initialize all fields at once. + + ParallelReadRequest( + CoordinationMode mode_, + size_t replica_num_, + size_t min_number_of_marks_, + RangesInDataPartsDescription description_) + : mode(mode_) + , replica_num(replica_num_) + , min_number_of_marks(min_number_of_marks_) + , description(std::move(description_)) + {} + CoordinationMode mode; size_t replica_num; size_t min_number_of_marks; - - /// Extension for ordered mode + /// Extension for Ordered (InOrder or ReverseOrder) mode + /// Contains only data part names without mark ranges. RangesInDataPartsDescription description; void serialize(WriteBuffer & out) const; String describe() const; - void deserialize(ReadBuffer & in); + static ParallelReadRequest deserialize(ReadBuffer & in); void merge(ParallelReadRequest & other); }; +/// ParallelReadResponse is used by an initiator to tell +/// remote replicas about what to read during parallel reading. +/// Additionally contains information whether there are more available +/// marks to read (whether it is the last packet or not). struct ParallelReadResponse { bool finish{false}; @@ -66,15 +85,30 @@ struct ParallelReadResponse }; +/// The set of parts (their names) along with ranges to read which is sent back +/// to the initiator by remote replicas during parallel reading. +/// Additionally contains an identifier (replica_num) plus +/// the reading algorithm chosen (Default, InOrder or ReverseOrder). struct InitialAllRangesAnnouncement { + /// No default constructor, you must initialize all fields at once. + + InitialAllRangesAnnouncement( + CoordinationMode mode_, + RangesInDataPartsDescription description_, + size_t replica_num_) + : mode(mode_) + , description(description_) + , replica_num(replica_num_) + {} + CoordinationMode mode; RangesInDataPartsDescription description; size_t replica_num; void serialize(WriteBuffer & out) const; String describe(); - void deserialize(ReadBuffer & in); + static InitialAllRangesAnnouncement deserialize(ReadBuffer & in); }; From a029105fd51044d692c0b2895dd9d5ce09b28036 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 28 Jun 2023 10:35:19 +0000 Subject: [PATCH 0906/1997] Fix duplicate storage set logical error. --- src/Interpreters/ActionsVisitor.cpp | 3 +++ .../02809_storage_set_analysis_bug.reference | 0 .../02809_storage_set_analysis_bug.sql | 15 +++++++++++++++ 3 files changed, 18 insertions(+) create mode 100644 tests/queries/0_stateless/02809_storage_set_analysis_bug.reference create mode 100644 tests/queries/0_stateless/02809_storage_set_analysis_bug.sql diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index cfbe53b5e4d..10502b7e66d 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1435,6 +1435,9 @@ FutureSetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool if (table) { + if (auto set = data.prepared_sets->findStorage(set_key)) + return set; + if (StorageSet * storage_set = dynamic_cast(table.get())) return data.prepared_sets->addFromStorage(set_key, storage_set->getSet()); } diff --git a/tests/queries/0_stateless/02809_storage_set_analysis_bug.reference b/tests/queries/0_stateless/02809_storage_set_analysis_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02809_storage_set_analysis_bug.sql b/tests/queries/0_stateless/02809_storage_set_analysis_bug.sql new file mode 100644 index 00000000000..212170571f7 --- /dev/null +++ b/tests/queries/0_stateless/02809_storage_set_analysis_bug.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS test_set; +DROP TABLE IF EXISTS null_in__fuzz_6; + +set allow_suspicious_low_cardinality_types = 1; + + +CREATE TABLE null_in__fuzz_6 (`dt` LowCardinality(UInt16), `idx` Int32, `i` Nullable(Int256), `s` Int32) ENGINE = MergeTree PARTITION BY dt ORDER BY idx; +insert into null_in__fuzz_6 select * from generateRandom() limit 1; + +SET transform_null_in = 0; + +CREATE TABLE test_set (i Nullable(int)) ENGINE = Set(); +INSERT INTO test_set VALUES (1), (NULL); + +SELECT count() = 1 FROM null_in__fuzz_6 PREWHERE 71 WHERE i IN (test_set); -- { serverError CANNOT_CONVERT_TYPE } From d3677adcdf367ec87aba49ec844c607a9d6c987d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 28 Jun 2023 10:37:38 +0000 Subject: [PATCH 0907/1997] Import missing module --- tests/integration/test_ssl_cert_authentication/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_ssl_cert_authentication/test.py b/tests/integration/test_ssl_cert_authentication/test.py index 91faf8b0ce3..ff2de7491e1 100644 --- a/tests/integration/test_ssl_cert_authentication/test.py +++ b/tests/integration/test_ssl_cert_authentication/test.py @@ -6,6 +6,7 @@ import urllib.request, urllib.parse import ssl import os.path from os import remove +import logging # The test cluster is configured with certificate for that host name, see 'server-ext.cnf'. From 82fd863ad4eb1209ca66e660d7cf3b646f88fff7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Jun 2023 13:39:47 +0300 Subject: [PATCH 0908/1997] Update 02809_storage_set_analysis_bug.sql --- tests/queries/0_stateless/02809_storage_set_analysis_bug.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02809_storage_set_analysis_bug.sql b/tests/queries/0_stateless/02809_storage_set_analysis_bug.sql index 212170571f7..531946acda0 100644 --- a/tests/queries/0_stateless/02809_storage_set_analysis_bug.sql +++ b/tests/queries/0_stateless/02809_storage_set_analysis_bug.sql @@ -3,7 +3,6 @@ DROP TABLE IF EXISTS null_in__fuzz_6; set allow_suspicious_low_cardinality_types = 1; - CREATE TABLE null_in__fuzz_6 (`dt` LowCardinality(UInt16), `idx` Int32, `i` Nullable(Int256), `s` Int32) ENGINE = MergeTree PARTITION BY dt ORDER BY idx; insert into null_in__fuzz_6 select * from generateRandom() limit 1; @@ -13,3 +12,6 @@ CREATE TABLE test_set (i Nullable(int)) ENGINE = Set(); INSERT INTO test_set VALUES (1), (NULL); SELECT count() = 1 FROM null_in__fuzz_6 PREWHERE 71 WHERE i IN (test_set); -- { serverError CANNOT_CONVERT_TYPE } + +DROP TABLE test_set; +DROP TABLE null_in__fuzz_6; From 7024527542dd341e32dfe313cc54f8f537b69c98 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Wed, 28 Jun 2023 12:43:10 +0200 Subject: [PATCH 0909/1997] Fix flaky test 00417_kill_query --- tests/queries/0_stateless/00417_kill_query.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/00417_kill_query.sh b/tests/queries/0_stateless/00417_kill_query.sh index dc690caca39..6eb0505f6bb 100755 --- a/tests/queries/0_stateless/00417_kill_query.sh +++ b/tests/queries/0_stateless/00417_kill_query.sh @@ -9,13 +9,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) QUERY_FIELND_NUM=4 -$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 4" &>/dev/null & +$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 30" &>/dev/null & sleep 1 $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' and query LIKE 'SELECT sleep(%' AND (elapsed >= 0.) SYNC" | cut -f $QUERY_FIELND_NUM -$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 5" &>/dev/null & +$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 31" &>/dev/null & sleep 1 -$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' and query = 'SELECT sleep(1) FROM system.numbers LIMIT 5' ASYNC" | cut -f $QUERY_FIELND_NUM +$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' and query = 'SELECT sleep(1) FROM system.numbers LIMIT 31' ASYNC" | cut -f $QUERY_FIELND_NUM $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 ASYNC" $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 FORMAT TabSeparated" From d98776b70850f140494bc5e799219877f50124ca Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Wed, 28 Jun 2023 12:44:03 +0200 Subject: [PATCH 0910/1997] Fix flaky test 00417_kill_query --- tests/queries/0_stateless/00417_kill_query.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00417_kill_query.reference b/tests/queries/0_stateless/00417_kill_query.reference index 7e89d9674db..1a3b47964c0 100644 --- a/tests/queries/0_stateless/00417_kill_query.reference +++ b/tests/queries/0_stateless/00417_kill_query.reference @@ -1,2 +1,2 @@ -SELECT sleep(1) FROM system.numbers LIMIT 4 -SELECT sleep(1) FROM system.numbers LIMIT 5 +SELECT sleep(1) FROM system.numbers LIMIT 30 +SELECT sleep(1) FROM system.numbers LIMIT 31 From 3c8c4ce449fb9bfa74de74a0756b85f9c149efbe Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 28 Jun 2023 13:16:26 +0200 Subject: [PATCH 0911/1997] Update test.py --- tests/integration/test_storage_postgresql/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index d60a90ed7ce..05d4b1e12ef 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -335,7 +335,7 @@ def test_concurrent_queries(started_cluster): ) ) print(count) - assert count <= 18 + assert count <= 19 busy_pool = Pool(30) p = busy_pool.map_async(node_insert_select, range(30)) @@ -347,7 +347,7 @@ def test_concurrent_queries(started_cluster): ) ) print(count) - assert count <= 18 + assert count <= 20 node1.query("DROP TABLE test.test_table;") node1.query("DROP TABLE test.stat;") From 112310e98fce282516b633c1b0a193e45b278aec Mon Sep 17 00:00:00 2001 From: xuelei Date: Wed, 28 Jun 2023 19:17:53 +0800 Subject: [PATCH 0912/1997] fix storage policy prompt display error --- src/Disks/StoragePolicy.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index 02789132e55..6faa7c13c49 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -302,7 +302,7 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & volume : getVolumes()) { if (!new_volume_names.contains(volume->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of old one", backQuote(new_storage_policy->getName())); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of Old storage policy {}", backQuote(new_storage_policy->getName()),backQuote(name)); std::unordered_set new_disk_names; for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->getDisks()) @@ -310,7 +310,7 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & disk : volume->getDisks()) if (!new_disk_names.contains(disk->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of old one", backQuote(new_storage_policy->getName())); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of Old storage policy {}", backQuote(new_storage_policy->getName()),backQuote(name)); } } From 06206d092d843fd3e6c005609008cd85597f9124 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 28 Jun 2023 13:18:53 +0200 Subject: [PATCH 0913/1997] Update test.py --- tests/integration/test_storage_postgresql/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 05d4b1e12ef..83eb4f6f02a 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -323,7 +323,7 @@ def test_concurrent_queries(started_cluster): ) ) print(count) - assert count <= 18 + assert count <= 18 # 16 for test.test_table + 1 for conn + 1 for test.stat busy_pool = Pool(30) p = busy_pool.map_async(node_insert, range(30)) @@ -335,7 +335,7 @@ def test_concurrent_queries(started_cluster): ) ) print(count) - assert count <= 19 + assert count <= 19 # 16 for test.test_table + 1 for conn + at most 2 for test.stat busy_pool = Pool(30) p = busy_pool.map_async(node_insert_select, range(30)) @@ -347,7 +347,7 @@ def test_concurrent_queries(started_cluster): ) ) print(count) - assert count <= 20 + assert count <= 20 # 16 for test.test_table + 1 for conn + at most 3 for test.stat node1.query("DROP TABLE test.test_table;") node1.query("DROP TABLE test.stat;") From 5443987113e0417c310eaf7bbfdfdf26de64f142 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 28 Jun 2023 11:33:45 +0000 Subject: [PATCH 0914/1997] Automatic style fix --- tests/integration/test_storage_postgresql/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 83eb4f6f02a..9f7c012e66f 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -323,7 +323,7 @@ def test_concurrent_queries(started_cluster): ) ) print(count) - assert count <= 18 # 16 for test.test_table + 1 for conn + 1 for test.stat + assert count <= 18 # 16 for test.test_table + 1 for conn + 1 for test.stat busy_pool = Pool(30) p = busy_pool.map_async(node_insert, range(30)) @@ -335,7 +335,7 @@ def test_concurrent_queries(started_cluster): ) ) print(count) - assert count <= 19 # 16 for test.test_table + 1 for conn + at most 2 for test.stat + assert count <= 19 # 16 for test.test_table + 1 for conn + at most 2 for test.stat busy_pool = Pool(30) p = busy_pool.map_async(node_insert_select, range(30)) @@ -347,7 +347,7 @@ def test_concurrent_queries(started_cluster): ) ) print(count) - assert count <= 20 # 16 for test.test_table + 1 for conn + at most 3 for test.stat + assert count <= 20 # 16 for test.test_table + 1 for conn + at most 3 for test.stat node1.query("DROP TABLE test.test_table;") node1.query("DROP TABLE test.stat;") From 5a1bbe5a8d2f07960161100e0a17527e4bda6de2 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 28 Jun 2023 14:05:14 +0200 Subject: [PATCH 0915/1997] Update CachedOnDiskReadBufferFromFile.cpp --- src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 960d2a72410..5b42f41fbf3 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -852,9 +852,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() else { implementation_buffer = getImplementationBuffer(file_segments->front()); - - if (read_type == ReadType::CACHED) - file_segments->front().use(); + file_segments->front().use(); } chassert(!internal_buffer.empty()); From 15f64a7cb9a07d48e92fd0db6ea1f9b8227b0e5e Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 28 Jun 2023 15:26:17 +0200 Subject: [PATCH 0916/1997] Add some checks --- src/Interpreters/Cache/FileSegment.cpp | 3 ++- src/Interpreters/Cache/IFileCachePriority.h | 2 +- .../Cache/LRUFileCachePriority.cpp | 19 +++++++++++++++---- src/Interpreters/Cache/LRUFileCachePriority.h | 4 +++- src/Interpreters/Cache/Metadata.cpp | 4 ++-- 5 files changed, 23 insertions(+), 9 deletions(-) diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 95592fc7c12..e97d708ba74 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -872,6 +872,7 @@ void FileSegment::setDetachedState(const FileSegmentGuard::Lock & lock) setDownloadState(State::DETACHED, lock); key_metadata.reset(); cache = nullptr; + queue_iterator = nullptr; } void FileSegment::detach(const FileSegmentGuard::Lock & lock, const LockedKey &) @@ -890,7 +891,7 @@ void FileSegment::use() if (!cache) { - chassert(isCompleted(true)); + chassert(isDetached()); return; } diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h index 10f63fce21d..34c49653ab8 100644 --- a/src/Interpreters/Cache/IFileCachePriority.h +++ b/src/Interpreters/Cache/IFileCachePriority.h @@ -44,7 +44,7 @@ public: virtual size_t use(const CacheGuard::Lock &) = 0; - virtual std::shared_ptr remove(const CacheGuard::Lock &) = 0; + virtual void remove(const CacheGuard::Lock &) = 0; virtual const Entry & getEntry() const = 0; diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index 6f142c0cc6d..18862e154da 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -166,15 +166,17 @@ void LRUFileCachePriority::iterate(IterateFunc && func, const CacheGuard::Lock & } } -LRUFileCachePriority::Iterator -LRUFileCachePriority::LRUFileCacheIterator::remove(const CacheGuard::Lock &) +void LRUFileCachePriority::LRUFileCacheIterator::remove(const CacheGuard::Lock &) { - return std::make_shared( - cache_priority, cache_priority->remove(queue_iter)); + checkUsable(); + cache_priority->remove(queue_iter); + queue_iter = LRUQueueIterator{}; } void LRUFileCachePriority::LRUFileCacheIterator::invalidate() { + checkUsable(); + LOG_TEST( cache_priority->log, "Invalidating entry in LRU queue. Key: {}, offset: {}, previous size: {}", @@ -187,6 +189,8 @@ void LRUFileCachePriority::LRUFileCacheIterator::invalidate() void LRUFileCachePriority::LRUFileCacheIterator::updateSize(int64_t size) { + checkUsable(); + LOG_TEST( cache_priority->log, "Update size with {} in LRU queue for key: {}, offset: {}, previous size: {}", @@ -198,8 +202,15 @@ void LRUFileCachePriority::LRUFileCacheIterator::updateSize(int64_t size) size_t LRUFileCachePriority::LRUFileCacheIterator::use(const CacheGuard::Lock &) { + checkUsable(); cache_priority->queue.splice(cache_priority->queue.end(), cache_priority->queue, queue_iter); return ++queue_iter->hits; } +void LRUFileCachePriority::LRUFileCacheIterator::checkUsable() const +{ + if (queue_iter == LRUQueueIterator{}) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to use invalid iterator"); +} + } diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index 8dc4eb0a016..e0d7d45062a 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -60,13 +60,15 @@ public: size_t use(const CacheGuard::Lock &) override; - Iterator remove(const CacheGuard::Lock &) override; + void remove(const CacheGuard::Lock &) override; void invalidate() override; void updateSize(int64_t size) override; private: + void checkUsable() const; + LRUFileCachePriority * cache_priority; mutable LRUFileCachePriority::LRUQueueIterator queue_iter; }; diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 8c8524f7fa7..a77a9072cb6 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -147,7 +147,6 @@ String CacheMetadata::getFileNameForFileSegment(size_t offset, FileSegmentKind s file_suffix = "_temporary"; break; case FileSegmentKind::Regular: - file_suffix = ""; break; } return std::to_string(offset) + file_suffix; @@ -398,6 +397,8 @@ KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegm if (file_segment->queue_iterator) file_segment->queue_iterator->invalidate(); + file_segment->detach(segment_lock, *this); + const auto path = key_metadata->getFileSegmentPath(*file_segment); bool exists = fs::exists(path); if (exists) @@ -408,7 +409,6 @@ KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegm else if (file_segment->downloaded_size) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected path {} to exist", path); - file_segment->detach(segment_lock, *this); return key_metadata->erase(it); } From a3994319776c77576bff2a256aed77265423e279 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Jun 2023 16:40:57 +0300 Subject: [PATCH 0917/1997] Update StoragePolicy.cpp --- src/Disks/StoragePolicy.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index 6faa7c13c49..a02568f9489 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -302,7 +302,7 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & volume : getVolumes()) { if (!new_volume_names.contains(volume->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of Old storage policy {}", backQuote(new_storage_policy->getName()),backQuote(name)); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of the old storage policy {}", backQuote(new_storage_policy->getName()),backQuote(name)); std::unordered_set new_disk_names; for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->getDisks()) @@ -310,7 +310,7 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & disk : volume->getDisks()) if (!new_disk_names.contains(disk->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of Old storage policy {}", backQuote(new_storage_policy->getName()),backQuote(name)); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of the old storage policy {}", backQuote(new_storage_policy->getName()),backQuote(name)); } } From a41ec1221e8b1d33560af877ad6711be8f93de9c Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 28 Jun 2023 13:50:53 +0000 Subject: [PATCH 0918/1997] Http prototype --- src/Common/ProgressIndication.cpp | 4 ++-- src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp | 7 ++++++- src/Server/HTTP/WriteBufferFromHTTPServerResponse.h | 5 +++++ src/Server/HTTPHandler.cpp | 7 ++++++- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp index d9564f0946e..29766fd1a14 100644 --- a/src/Common/ProgressIndication.cpp +++ b/src/Common/ProgressIndication.cpp @@ -100,7 +100,7 @@ void ProgressIndication::writeFinalProgress() std::cout << " (" << formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., " << formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.)"; auto peak_memory_usage = getMemoryUsage().peak; - std::cout << ".\nPeak memory usage (for query) " << formatReadableSizeWithDecimalSuffix(peak_memory_usage) << "."; + std::cout << ".\nPeak memory usage (for query) " << formatReadableSizeWithBinarySuffix(peak_memory_usage) << "."; } void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message) @@ -166,7 +166,7 @@ void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message) if (memory_usage > 0) profiling_msg_builder << ", " << formatReadableSizeWithDecimalSuffix(memory_usage) << " RAM"; - // profiling_msg_builder << ", " << formatReadableSizeWithDecimalSuffix(memory_usage) << ", " + // profiling_msg_builder << ", " << formatReadableSizeWithDecimalSuffix(memory_usage) << ", " // << formatReadableSizeWithDecimalSuffix(peak_usage) << " Peak RAM"; if (max_host_usage < memory_usage) profiling_msg_builder << ", " << formatReadableSizeWithDecimalSuffix(max_host_usage) << " max/host"; diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp index c8015cfd185..1e6d520de0a 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp @@ -38,7 +38,7 @@ void WriteBufferFromHTTPServerResponse::writeHeaderSummary() accumulated_progress.writeJSON(progress_string_writer); if (response_header_ostr) - *response_header_ostr << "X-ClickHouse-Summary: " << progress_string_writer.str() << "\r\n" << std::flush; + *response_header_ostr << "X-ClickHouse-Summary: " << progress_string_writer.str() << " Mem " << formatReadableSizeWithBinarySuffix(peak_memory_usage) << "\r\n" << std::flush; } void WriteBufferFromHTTPServerResponse::writeHeaderProgress() @@ -169,6 +169,11 @@ void WriteBufferFromHTTPServerResponse::onProgress(const Progress & progress) } } +void WriteBufferFromHTTPServerResponse::onMemoryUsage(Int64 usage) +{ + peak_memory_usage = usage; +} + WriteBufferFromHTTPServerResponse::~WriteBufferFromHTTPServerResponse() { finalize(); diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h index ce677616755..f849a685aec 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h @@ -45,6 +45,9 @@ public: /// Writes progress in repeating HTTP headers. void onProgress(const Progress & progress); + void onMemoryUsage(Int64 peak_memory_usage); + + /// Turn compression on or off. /// The setting has any effect only if HTTP headers haven't been sent yet. void setCompression(bool enable_compression) @@ -126,6 +129,8 @@ private: int exception_code = 0; + Int64 peak_memory_usage = 0; + std::mutex mutex; /// progress callback could be called from different threads. }; diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index fe98ae5f69e..b8c852e2482 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -816,7 +816,12 @@ void HTTPHandler::processQuery( /// While still no data has been sent, we will report about query execution progress by sending HTTP headers. /// Note that we add it unconditionally so the progress is available for `X-ClickHouse-Summary` - append_callback([&used_output](const Progress & progress) { used_output.out->onProgress(progress); }); + append_callback([&used_output, this](const Progress & progress) { + used_output.out->onProgress(progress); + auto thread_group = CurrentThread::getGroup(); + auto peak_memory_usage = thread_group->memory_tracker.getPeak(); + used_output.out->onMemoryUsage(peak_memory_usage); + }); if (settings.readonly > 0 && settings.cancel_http_readonly_queries_on_client_close) { From c42bf37a141decb206e405470c2af6d85145bf3f Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 28 Jun 2023 09:59:32 -0400 Subject: [PATCH 0919/1997] list the disk types --- .../engines/table-engines/mergetree-family/mergetree.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 42454af6feb..c67ac8fa4ef 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -756,6 +756,14 @@ If you perform the `SELECT` query between merges, you may get expired data. To a - [ttl_only_drop_parts](/docs/en/operations/settings/settings.md/#ttl_only_drop_parts) setting +## Disk types + +In addition to local block devices, ClickHouse supports other device types through table engines. These are the types: +- [S3](#table_engine-mergetree-s3) +- GCS (also supported using the [S3 table engine](#table_engine-mergetree-s3)) +- [Azure Blob Storage](#table_engine-mergetree-azure-blob-storage) +- [HDFS](/docs/en/sql-reference/table-functions/hdfs.md) + ## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes} ### Introduction {#introduction} From b276f450f38247131bac33587482f130b4c02b4b Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 28 Jun 2023 14:12:32 +0000 Subject: [PATCH 0920/1997] Remove whilespace --- src/Server/HTTPHandler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index b8c852e2482..562f1e7e93f 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -816,7 +816,7 @@ void HTTPHandler::processQuery( /// While still no data has been sent, we will report about query execution progress by sending HTTP headers. /// Note that we add it unconditionally so the progress is available for `X-ClickHouse-Summary` - append_callback([&used_output, this](const Progress & progress) { + append_callback([&used_output, this](const Progress & progress) { used_output.out->onProgress(progress); auto thread_group = CurrentThread::getGroup(); auto peak_memory_usage = thread_group->memory_tracker.getPeak(); From f57d91b678226094bc55074b3df1ce8ba630fb5f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Jun 2023 17:14:58 +0300 Subject: [PATCH 0921/1997] Update 02803_remote_cannot_clone_block.sql --- tests/queries/0_stateless/02803_remote_cannot_clone_block.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02803_remote_cannot_clone_block.sql b/tests/queries/0_stateless/02803_remote_cannot_clone_block.sql index 3e14aa6be96..6d79aa76d18 100644 --- a/tests/queries/0_stateless/02803_remote_cannot_clone_block.sql +++ b/tests/queries/0_stateless/02803_remote_cannot_clone_block.sql @@ -6,6 +6,8 @@ SELECT * FROM system.numbers LIMIT 10000; +SET allow_experimental_analyzer = 0; + SELECT * FROM ( From 0b19c1832a7afd9015e1872a932b2d28b326117c Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 28 Jun 2023 14:15:03 +0000 Subject: [PATCH 0922/1997] Fix: detach from thread group --- src/Dictionaries/HashedDictionary.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index eb1d98a8f39..798f37cb516 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include @@ -69,6 +70,11 @@ public: shards_queues[shard].emplace(backlog); pool.scheduleOrThrowOnError([this, shard, thread_group = CurrentThread::getGroup()] { + SCOPE_EXIT_SAFE( + if (thread_group) + CurrentThread::detachFromGroupIfNotDetached(); + ); + /// Do not account memory that was occupied by the dictionaries for the query/user context. MemoryTrackerBlockerInThread memory_blocker; From 23d0a9e3a83f263f563c0d2b0983bff6aa9a2d90 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 28 Jun 2023 16:20:45 +0200 Subject: [PATCH 0923/1997] fix --- .../01861_explain_pipeline.reference | 18 +++++----- ...inal_streams_data_skipping_index.reference | 36 +++++++++---------- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/tests/queries/0_stateless/01861_explain_pipeline.reference b/tests/queries/0_stateless/01861_explain_pipeline.reference index aec3ae06dce..427b3eaefc0 100644 --- a/tests/queries/0_stateless/01861_explain_pipeline.reference +++ b/tests/queries/0_stateless/01861_explain_pipeline.reference @@ -17,14 +17,12 @@ ExpressionTransform × 2 (ReadFromMergeTree) ExpressionTransform × 2 ReplacingSorted - ExpressionTransform - FilterSortedStreamByRange - Description: filter values in [(5), +inf) - ExpressionTransform - MergeTreeInOrder 0 → 1 - ReplacingSorted 2 → 1 + FilterSortedStreamByRange + Description: filter values in [(5), +inf) + ExpressionTransform + MergeTreeInOrder 0 → 1 + ReplacingSorted 2 → 1 + FilterSortedStreamByRange × 2 + Description: filter values in [-inf, (5)) ExpressionTransform × 2 - FilterSortedStreamByRange × 2 - Description: filter values in [-inf, (5)) - ExpressionTransform × 2 - MergeTreeInOrder × 2 0 → 1 + MergeTreeInOrder × 2 0 → 1 diff --git a/tests/queries/0_stateless/02780_final_streams_data_skipping_index.reference b/tests/queries/0_stateless/02780_final_streams_data_skipping_index.reference index d7a540ae479..5242c625325 100644 --- a/tests/queries/0_stateless/02780_final_streams_data_skipping_index.reference +++ b/tests/queries/0_stateless/02780_final_streams_data_skipping_index.reference @@ -9,17 +9,15 @@ ExpressionTransform × 2 (ReadFromMergeTree) ExpressionTransform × 2 AggregatingSortedTransform 2 → 1 - ExpressionTransform × 2 - FilterSortedStreamByRange × 2 - Description: filter values in [(999424), +inf) - ExpressionTransform × 2 - MergeTreeInOrder × 2 0 → 1 - AggregatingSortedTransform + FilterSortedStreamByRange × 2 + Description: filter values in [(999424), +inf) + ExpressionTransform × 2 + MergeTreeInOrder × 2 0 → 1 + AggregatingSortedTransform + FilterSortedStreamByRange + Description: filter values in [-inf, (999424)) ExpressionTransform - FilterSortedStreamByRange - Description: filter values in [-inf, (999424)) - ExpressionTransform - MergeTreeInOrder 0 → 1 + MergeTreeInOrder 0 → 1 EXPLAIN PIPELINE SELECT * FROM data FINAL WHERE v1 >= now() - INTERVAL 180 DAY SETTINGS max_threads=2, max_final_threads=2, force_data_skipping_indices='v1_index', use_skip_indexes_if_final=0 FORMAT LineAsString; @@ -30,14 +28,12 @@ ExpressionTransform × 2 (ReadFromMergeTree) ExpressionTransform × 2 AggregatingSortedTransform 2 → 1 - ExpressionTransform × 2 - FilterSortedStreamByRange × 2 - Description: filter values in [(999424), +inf) - ExpressionTransform × 2 - MergeTreeInOrder × 2 0 → 1 - AggregatingSortedTransform + FilterSortedStreamByRange × 2 + Description: filter values in [(999424), +inf) + ExpressionTransform × 2 + MergeTreeInOrder × 2 0 → 1 + AggregatingSortedTransform + FilterSortedStreamByRange + Description: filter values in [-inf, (999424)) ExpressionTransform - FilterSortedStreamByRange - Description: filter values in [-inf, (999424)) - ExpressionTransform - MergeTreeInOrder 0 → 1 + MergeTreeInOrder 0 → 1 From bfcadabb927e5ea547c29df488d3fe6ea396a178 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 28 Jun 2023 10:30:49 -0400 Subject: [PATCH 0924/1997] add web disk type --- docs/en/engines/table-engines/mergetree-family/mergetree.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index c67ac8fa4ef..1f084fe075b 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -763,6 +763,7 @@ In addition to local block devices, ClickHouse supports other device types throu - GCS (also supported using the [S3 table engine](#table_engine-mergetree-s3)) - [Azure Blob Storage](#table_engine-mergetree-azure-blob-storage) - [HDFS](/docs/en/sql-reference/table-functions/hdfs.md) +- [Web (read-only)](#web-storage) ## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes} @@ -944,6 +945,8 @@ configuration files; all the settings are in the CREATE/ATTACH query. The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk. ::: +#### Example dynamic web storage {#web-storage} + ```sql ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' ( From b392127304d8c14ce34bd86d0b8ca561e1559919 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 28 Jun 2023 11:00:07 -0400 Subject: [PATCH 0925/1997] add example web config --- .../mergetree-family/mergetree.md | 53 ++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 1f084fe075b..b87c4d216cf 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -945,7 +945,7 @@ configuration files; all the settings are in the CREATE/ATTACH query. The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk. ::: -#### Example dynamic web storage {#web-storage} +#### Example dynamic web storage ```sql ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' @@ -1249,6 +1249,57 @@ Examples of working configurations can be found in integration tests directory ( Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. ::: +## Web storage (read-only) {#web-storage} + +Web storage can be used for read-only purposes. An example use is for hosting sample +data, or for migrating data. + +:::tip +Storage can also be configured temporarily within a query, if a web dataset is not expected +to be used routinely, see [dynamic storage](#dynamic-storage) and skip editing the +configuration file. +::: + +In this sample configuration: +- the disk is of type `web` +- the data is hosted at `http://nginx:80/test1/` +- a cache on local storage is used + +```xml + + + + + web + http://nginx:80/test1/ + + + cache + web + cached_web_cache/ + 100000000 + + + + + +
+ web +
+
+
+ + +
+ cached_web +
+
+
+
+
+
+``` + ## Virtual Columns {#virtual-columns} - `_part` — Name of a part. From dd3a744cef6e736bc68782fd79853a1535bdebb8 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 28 Jun 2023 11:17:16 -0400 Subject: [PATCH 0926/1997] add HDFS example --- .../mergetree-family/mergetree.md | 38 ++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index b87c4d216cf..1b7f3263ab9 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -762,7 +762,7 @@ In addition to local block devices, ClickHouse supports other device types throu - [S3](#table_engine-mergetree-s3) - GCS (also supported using the [S3 table engine](#table_engine-mergetree-s3)) - [Azure Blob Storage](#table_engine-mergetree-azure-blob-storage) -- [HDFS](/docs/en/sql-reference/table-functions/hdfs.md) +- [HDFS](#hdfs-storage) - [Web (read-only)](#web-storage) ## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes} @@ -1249,6 +1249,42 @@ Examples of working configurations can be found in integration tests directory ( Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. ::: +## HDFS storage {#hdfs-storage} + +In this sample configuration: +- the disk is of type `hdfs` +- the data is hosted at `hdfs://hdfs1:9000/clickhouse/` + +```xml + + + + + hdfs + hdfs://hdfs1:9000/clickhouse/ + true + + + local + / + + + + + +
+ hdfs +
+ + hdd + +
+
+
+
+
+``` + ## Web storage (read-only) {#web-storage} Web storage can be used for read-only purposes. An example use is for hosting sample From 594ec09edf0d63b73e02c2dbbee6cc500eb52d87 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 28 Jun 2023 18:19:16 +0200 Subject: [PATCH 0927/1997] fix missing metadata_version.txt --- .../MergeTree/DataPartStorageOnDiskBase.cpp | 26 ++++---- .../MergeTree/DataPartStorageOnDiskBase.h | 5 +- src/Storages/MergeTree/IDataPartStorage.h | 31 +++++++-- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 20 +++--- src/Storages/MergeTree/MergeTreeData.cpp | 35 +++++------ src/Storages/MergeTree/MergeTreeData.h | 13 +--- src/Storages/MergeTree/MutateTask.cpp | 11 +++- src/Storages/MergeTree/MutateTask.h | 2 +- src/Storages/StorageMergeTree.cpp | 6 +- src/Storages/StorageReplicatedMergeTree.cpp | 63 +++++++++++-------- 10 files changed, 120 insertions(+), 92 deletions(-) diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 30776a8bc50..c850ebaa6fd 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -415,41 +415,41 @@ void DataPartStorageOnDiskBase::backup( MutableDataPartStoragePtr DataPartStorageOnDiskBase::freeze( const std::string & to, const std::string & dir_path, - bool make_source_readonly, std::function save_metadata_callback, - bool copy_instead_of_hardlink, - const NameSet & files_to_copy_instead_of_hardlinks, - DiskTransactionPtr external_transaction) const + const ClonePartParams & params) const { auto disk = volume->getDisk(); - if (external_transaction) - external_transaction->createDirectories(to); + if (params.external_transaction) + params.external_transaction->createDirectories(to); else disk->createDirectories(to); - localBackup(disk, getRelativePath(), fs::path(to) / dir_path, make_source_readonly, {}, copy_instead_of_hardlink, files_to_copy_instead_of_hardlinks, external_transaction); + localBackup(disk, getRelativePath(), fs::path(to) / dir_path, params.make_source_readonly, {}, params.copy_instead_of_hardlink, + params.files_to_copy_instead_of_hardlinks, params.external_transaction); if (save_metadata_callback) save_metadata_callback(disk); - if (external_transaction) + if (params.external_transaction) { - external_transaction->removeFileIfExists(fs::path(to) / dir_path / "delete-on-destroy.txt"); - external_transaction->removeFileIfExists(fs::path(to) / dir_path / "txn_version.txt"); - external_transaction->removeFileIfExists(fs::path(to) / dir_path / IMergeTreeDataPart::METADATA_VERSION_FILE_NAME); + params.external_transaction->removeFileIfExists(fs::path(to) / dir_path / "delete-on-destroy.txt"); + params.external_transaction->removeFileIfExists(fs::path(to) / dir_path / "txn_version.txt"); + if (!params.keep_metadata_version) + params.external_transaction->removeFileIfExists(fs::path(to) / dir_path / IMergeTreeDataPart::METADATA_VERSION_FILE_NAME); } else { disk->removeFileIfExists(fs::path(to) / dir_path / "delete-on-destroy.txt"); disk->removeFileIfExists(fs::path(to) / dir_path / "txn_version.txt"); - disk->removeFileIfExists(fs::path(to) / dir_path / IMergeTreeDataPart::METADATA_VERSION_FILE_NAME); + if (!params.keep_metadata_version) + disk->removeFileIfExists(fs::path(to) / dir_path / IMergeTreeDataPart::METADATA_VERSION_FILE_NAME); } auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); /// Do not initialize storage in case of DETACH because part may be broken. bool to_detached = dir_path.starts_with("detached/"); - return create(single_disk_volume, to, dir_path, /*initialize=*/ !to_detached && !external_transaction); + return create(single_disk_volume, to, dir_path, /*initialize=*/ !to_detached && !params.external_transaction); } MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart( diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h index 043953eb20c..5f7dcc3fd32 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h @@ -62,11 +62,8 @@ public: MutableDataPartStoragePtr freeze( const std::string & to, const std::string & dir_path, - bool make_source_readonly, std::function save_metadata_callback, - bool copy_instead_of_hardlink, - const NameSet & files_to_copy_instead_of_hardlinks, - DiskTransactionPtr external_transaction) const override; + const ClonePartParams & params) const override; MutableDataPartStoragePtr clonePart( const std::string & to, diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index 9d6c5d0dcba..b40a9aa1b46 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -63,6 +63,9 @@ using DiskPtr = std::shared_ptr; class ISyncGuard; using SyncGuardPtr = std::unique_ptr; +class MergeTreeTransaction; +using MergeTreeTransactionPtr = std::shared_ptr; + class IBackupEntry; using BackupEntryPtr = std::shared_ptr; using BackupEntries = std::vector>; @@ -72,6 +75,17 @@ struct WriteSettings; class TemporaryFileOnDisk; + +struct HardlinkedFiles +{ + /// Shared table uuid where hardlinks live + std::string source_table_shared_id; + /// Hardlinked from part + std::string source_part_name; + /// Hardlinked files list + NameSet hardlinks_from_source_part; +}; + /// This is an abstraction of storage for data part files. /// Ideally, it is assumed to contain read-only methods from IDisk. /// It is not fulfilled now, but let's try our best. @@ -220,14 +234,23 @@ public: /// If `external_transaction` is provided, the disk operations (creating directories, hardlinking, /// etc) won't be applied immediately; instead, they'll be added to external_transaction, which the /// caller then needs to commit. + + struct ClonePartParams + { + MergeTreeTransactionPtr txn = NO_TRANSACTION_PTR; + HardlinkedFiles * hardlinked_files = nullptr; + bool copy_instead_of_hardlink = false; + NameSet files_to_copy_instead_of_hardlinks; + bool keep_metadata_version = false; + bool make_source_readonly = false; + DiskTransactionPtr external_transaction = nullptr; + }; + virtual std::shared_ptr freeze( const std::string & to, const std::string & dir_path, - bool make_source_readonly, std::function save_metadata_callback, - bool copy_instead_of_hardlink, - const NameSet & files_to_copy_instead_of_hardlinks, - DiskTransactionPtr external_transaction = nullptr) const = 0; + const ClonePartParams & params) const = 0; /// Make a full copy of a data part into 'to/dir_path' (possibly to a different disk). virtual std::shared_ptr clonePart( diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 85edba84296..1f105951757 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1363,6 +1363,8 @@ void IMergeTreeDataPart::loadColumns(bool require) { loaded_metadata_version = metadata_snapshot->getMetadataVersion(); old_part_with_no_metadata_version_on_disk = true; + LOG_WARNING(storage.log, "Part {} doesn't have metadata version on disk, setting it to {}. " + "It's okay if the part was created by an old version of ClickHouse", name, loaded_metadata_version); } setColumns(loaded_columns, infos, loaded_metadata_version); @@ -1765,12 +1767,6 @@ void IMergeTreeDataPart::renameToDetached(const String & prefix) DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & /*metadata_snapshot*/) const { - auto storage_settings = storage.getSettings(); - - /// In case of zero-copy replication we copy directory instead of hardlinks - /// because hardlinks tracking doesn't work for detached parts. - bool copy_instead_of_hardlink = isStoredOnRemoteDiskWithZeroCopySupport() && storage.supportsReplication() && storage_settings->allow_remote_fs_zero_copy_replication; - /// Avoid unneeded duplicates of broken parts if we try to detach the same broken part multiple times. /// Otherwise it may pollute detached/ with dirs with _tryN suffix and we will fail to remove broken part after 10 attempts. bool broken = !prefix.empty(); @@ -1778,13 +1774,19 @@ DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix if (!maybe_path_in_detached) return nullptr; + /// In case of zero-copy replication we copy directory instead of hardlinks + /// because hardlinks tracking doesn't work for detached parts. + auto storage_settings = storage.getSettings(); + IDataPartStorage::ClonePartParams params + { + .copy_instead_of_hardlink = isStoredOnRemoteDiskWithZeroCopySupport() && storage.supportsReplication() && storage_settings->allow_remote_fs_zero_copy_replication, + .make_source_readonly = true + }; return getDataPartStorage().freeze( storage.relative_data_path, *maybe_path_in_detached, - /*make_source_readonly=*/ true, /*save_metadata_callback=*/ {}, - copy_instead_of_hardlink, - /*files_to_copy_instead_of_hardlinks=*/ {}); + params); } MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e9c3a7f66ae..f3cf4a85953 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7289,10 +7289,7 @@ std::pair MergeTreeData::cloneAn const String & tmp_part_prefix, const MergeTreePartInfo & dst_part_info, const StorageMetadataPtr & metadata_snapshot, - const MergeTreeTransactionPtr & txn, - HardlinkedFiles * hardlinked_files, - bool copy_instead_of_hardlink, - const NameSet & files_to_copy_instead_of_hardlinks) + const IDataPartStorage::ClonePartParams & params) { /// Check that the storage policy contains the disk where the src_part is located. bool does_storage_policy_allow_same_disk = false; @@ -7343,16 +7340,14 @@ std::pair MergeTreeData::cloneAn } String with_copy; - if (copy_instead_of_hardlink) + if (params.copy_instead_of_hardlink) with_copy = " (copying data)"; auto dst_part_storage = src_part_storage->freeze( relative_data_path, tmp_dst_part_name, - /*make_source_readonly=*/ false, /*save_metadata_callback=*/ {}, - copy_instead_of_hardlink, - files_to_copy_instead_of_hardlinks); + params); LOG_DEBUG(log, "Clone{} part {} to {}{}", src_flushed_tmp_part ? " flushed" : "", @@ -7364,18 +7359,18 @@ std::pair MergeTreeData::cloneAn .withPartFormatFromDisk() .build(); - if (!copy_instead_of_hardlink && hardlinked_files) + if (!params.copy_instead_of_hardlink && params.hardlinked_files) { - hardlinked_files->source_part_name = src_part->name; - hardlinked_files->source_table_shared_id = src_part->storage.getTableSharedID(); + params.hardlinked_files->source_part_name = src_part->name; + params.hardlinked_files->source_table_shared_id = src_part->storage.getTableSharedID(); for (auto it = src_part->getDataPartStorage().iterate(); it->isValid(); it->next()) { - if (!files_to_copy_instead_of_hardlinks.contains(it->name()) + if (!params.files_to_copy_instead_of_hardlinks.contains(it->name()) && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME) { - hardlinked_files->hardlinks_from_source_part.insert(it->name()); + params.hardlinked_files->hardlinks_from_source_part.insert(it->name()); } } @@ -7386,18 +7381,18 @@ std::pair MergeTreeData::cloneAn for (auto it = projection_storage.iterate(); it->isValid(); it->next()) { auto file_name_with_projection_prefix = fs::path(projection_storage.getPartDirectory()) / it->name(); - if (!files_to_copy_instead_of_hardlinks.contains(file_name_with_projection_prefix) + if (!params.files_to_copy_instead_of_hardlinks.contains(file_name_with_projection_prefix) && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME) { - hardlinked_files->hardlinks_from_source_part.insert(file_name_with_projection_prefix); + params.hardlinked_files->hardlinks_from_source_part.insert(file_name_with_projection_prefix); } } } } /// We should write version metadata on part creation to distinguish it from parts that were created without transaction. - TransactionID tid = txn ? txn->tid : Tx::PrehistoricTID; + TransactionID tid = params.txn ? params.txn->tid : Tx::PrehistoricTID; dst_data_part->version.setCreationTID(tid, nullptr); dst_data_part->storeVersionMetadata(); @@ -7579,13 +7574,15 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher( createAndStoreFreezeMetadata(disk, part, fs::path(backup_part_path) / part->getDataPartStorage().getPartDirectory()); }; + IDataPartStorage::ClonePartParams params + { + .make_source_readonly = true + }; auto new_storage = data_part_storage->freeze( backup_part_path, part->getDataPartStorage().getPartDirectory(), - /*make_source_readonly=*/ true, callback, - /*copy_instead_of_hardlink=*/ false, - /*files_to_copy_instead_of_hardlinks=*/ {}); + params); part->is_frozen.store(true, std::memory_order_relaxed); result.push_back(PartitionCommandResultInfo{ diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index b27392b355b..43e59ccc392 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -828,21 +828,10 @@ public: MergeTreeData & checkStructureAndGetMergeTreeData(const StoragePtr & source_table, const StorageMetadataPtr & src_snapshot, const StorageMetadataPtr & my_snapshot) const; MergeTreeData & checkStructureAndGetMergeTreeData(IStorage & source_table, const StorageMetadataPtr & src_snapshot, const StorageMetadataPtr & my_snapshot) const; - struct HardlinkedFiles - { - /// Shared table uuid where hardlinks live - std::string source_table_shared_id; - /// Hardlinked from part - std::string source_part_name; - /// Hardlinked files list - NameSet hardlinks_from_source_part; - }; - std::pair cloneAndLoadDataPartOnSameDisk( const MergeTreeData::DataPartPtr & src_part, const String & tmp_part_prefix, const MergeTreePartInfo & dst_part_info, const StorageMetadataPtr & metadata_snapshot, - const MergeTreeTransactionPtr & txn, HardlinkedFiles * hardlinked_files, - bool copy_instead_of_hardlink, const NameSet & files_to_copy_instead_of_hardlinks); + const IDataPartStorage::ClonePartParams & params); virtual std::vector getMutationsStatus() const = 0; diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index a19b9daca0e..f4a071b8f27 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -852,7 +852,7 @@ struct MutationContext MergeTreeTransactionPtr txn; - MergeTreeData::HardlinkedFiles hardlinked_files; + HardlinkedFiles hardlinked_files; bool need_prefix = true; @@ -1803,7 +1803,12 @@ bool MutateTask::prepare() if (ctx->need_prefix) prefix = "tmp_clone_"; - auto [part, lock] = ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, prefix, ctx->future_part->part_info, ctx->metadata_snapshot, ctx->txn, &ctx->hardlinked_files, false, files_to_copy_instead_of_hardlinks); + IDataPartStorage::ClonePartParams clone_params + { + .txn = ctx->txn, .hardlinked_files = &ctx->hardlinked_files, + .files_to_copy_instead_of_hardlinks = std::move(files_to_copy_instead_of_hardlinks), .keep_metadata_version = true + }; + auto [part, lock] = ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, prefix, ctx->future_part->part_info, ctx->metadata_snapshot, clone_params); part->getDataPartStorage().beginTransaction(); ctx->temporary_directory_lock = std::move(lock); @@ -1932,7 +1937,7 @@ bool MutateTask::prepare() return true; } -const MergeTreeData::HardlinkedFiles & MutateTask::getHardlinkedFiles() const +const HardlinkedFiles & MutateTask::getHardlinkedFiles() const { return ctx->hardlinked_files; } diff --git a/src/Storages/MergeTree/MutateTask.h b/src/Storages/MergeTree/MutateTask.h index 54ad996ad4c..dc21df018d7 100644 --- a/src/Storages/MergeTree/MutateTask.h +++ b/src/Storages/MergeTree/MutateTask.h @@ -45,7 +45,7 @@ public: return promise.get_future(); } - const MergeTreeData::HardlinkedFiles & getHardlinkedFiles() const; + const HardlinkedFiles & getHardlinkedFiles() const; private: diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index c02c96f62be..4c0c0c8e3fa 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1932,7 +1932,8 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con Int64 temp_index = insert_increment.get(); MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level); - auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, my_metadata_snapshot, local_context->getCurrentTransaction(), {}, false, {}); + IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()}; + auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, my_metadata_snapshot, clone_params); dst_parts.emplace_back(std::move(dst_part)); dst_parts_locks.emplace_back(std::move(part_lock)); } @@ -2030,7 +2031,8 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const Int64 temp_index = insert_increment.get(); MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level); - auto [dst_part, part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, dest_metadata_snapshot, local_context->getCurrentTransaction(), {}, false, {}); + IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()}; + auto [dst_part, part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, dest_metadata_snapshot, clone_params); dst_parts.emplace_back(std::move(dst_part)); dst_parts_locks.emplace_back(std::move(part_lock)); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index b1ba06c77f9..ecc2537f6ad 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1578,7 +1578,7 @@ void StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(const zkutil: } MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAndCommit(Transaction & transaction, - const MutableDataPartPtr & part, std::optional hardlinked_files, bool replace_zero_copy_lock) + const MutableDataPartPtr & part, std::optional hardlinked_files, bool replace_zero_copy_lock) { auto zookeeper = getZooKeeper(); @@ -2183,7 +2183,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) /// A replica that will be used to fetch part String replica; - MergeTreeData::HardlinkedFiles hardlinked_files; + HardlinkedFiles hardlinked_files; scope_guard temporary_part_lock; }; @@ -2433,8 +2433,6 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) static const String TMP_PREFIX = "tmp_replace_from_"; - std::vector hardlinked_files_for_parts; - auto obtain_part = [&] (PartDescriptionPtr & part_desc) { if (part_desc->src_table_part) @@ -2442,8 +2440,14 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) if (part_desc->checksum_hex != part_desc->src_table_part->checksums.getTotalChecksumHex()) throw Exception(ErrorCodes::UNFINISHED, "Checksums of {} is suddenly changed", part_desc->src_table_part->name); + bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication + || dynamic_cast(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; + IDataPartStorage::ClonePartParams clone_params + { + .copy_instead_of_hardlink = zero_copy_enabled && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport() + }; auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk( - part_desc->src_table_part, TMP_PREFIX + "clone_", part_desc->new_part_info, metadata_snapshot, NO_TRANSACTION_PTR, &part_desc->hardlinked_files, false, {}); + part_desc->src_table_part, TMP_PREFIX + "clone_", part_desc->new_part_info, metadata_snapshot, clone_params); part_desc->res_part = std::move(res_part); part_desc->temporary_part_lock = std::move(temporary_part_lock); } @@ -4270,6 +4274,11 @@ bool StorageReplicatedMergeTree::fetchPart( profile_events_scope.getSnapshot()); }; + auto is_zero_copy_part = [&settings_ptr](const auto & data_part) + { + return settings_ptr->allow_remote_fs_zero_copy_replication && data_part->isStoredOnRemoteDiskWithZeroCopySupport(); + }; + DataPartPtr part_to_clone; { /// If the desired part is a result of a part mutation, try to find the source part and compare @@ -4281,7 +4290,7 @@ bool StorageReplicatedMergeTree::fetchPart( auto source_part = getActiveContainingPart(covered_part_info); /// Fetch for zero-copy replication is cheap and straightforward, so we don't use local clone here - if (source_part && (!settings_ptr->allow_remote_fs_zero_copy_replication || !source_part->getDataPartStorage().supportZeroCopyReplication())) + if (source_part && !is_zero_copy_part(source_part)) { auto source_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums( source_part->getColumns(), source_part->checksums); @@ -4330,14 +4339,15 @@ bool StorageReplicatedMergeTree::fetchPart( InterserverCredentialsPtr credentials; std::optional tagger_ptr; std::function get_part; - MergeTreeData::HardlinkedFiles hardlinked_files; scope_guard part_directory_lock; if (part_to_clone) { get_part = [&, part_to_clone]() { - auto [cloned_part, lock] = cloneAndLoadDataPartOnSameDisk(part_to_clone, "tmp_clone_", part_info, metadata_snapshot, NO_TRANSACTION_PTR, &hardlinked_files, false, {}); + chassert(!is_zero_copy_part(part_to_clone)); + IDataPartStorage::ClonePartParams clone_params{ .keep_metadata_version = true }; + auto [cloned_part, lock] = cloneAndLoadDataPartOnSameDisk(part_to_clone, "tmp_clone_", part_info, metadata_snapshot, clone_params); part_directory_lock = std::move(lock); return cloned_part; }; @@ -4387,7 +4397,8 @@ bool StorageReplicatedMergeTree::fetchPart( Transaction transaction(*this, NO_TRANSACTION_RAW); renameTempPartAndReplace(part, transaction); - replaced_parts = checkPartChecksumsAndCommit(transaction, part, hardlinked_files, !part_to_clone); + chassert(!part_to_clone || !is_zero_copy_part(part)); + replaced_parts = checkPartChecksumsAndCommit(transaction, part, /*hardlinked_files*/ {}, /*replace_zero_copy_lock*/ true); /** If a quorum is tracked for this part, you must update it. * If you do not have time, in case of losing the session, when you restart the server - see the `ReplicatedMergeTreeRestartingThread::updateQuorumIfWeHavePart` method. @@ -7255,7 +7266,6 @@ void StorageReplicatedMergeTree::replacePartitionFrom( assert(replace == !LogEntry::ReplaceRangeEntry::isMovePartitionOrAttachFrom(drop_range)); String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range); - std::vector hardlinked_files_for_parts; for (const auto & src_part : src_all_parts) { @@ -7286,19 +7296,21 @@ void StorageReplicatedMergeTree::replacePartitionFrom( UInt64 index = lock->getNumber(); MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); - MergeTreeData::HardlinkedFiles hardlinked_files; + HardlinkedFiles hardlinked_files; - bool copy_instead_of_hardlink = storage_settings_ptr->allow_remote_fs_zero_copy_replication - && src_part->isStoredOnRemoteDiskWithZeroCopySupport(); - - auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, metadata_snapshot, NO_TRANSACTION_PTR, &hardlinked_files, copy_instead_of_hardlink, {}); + bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication + || dynamic_cast(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; + IDataPartStorage::ClonePartParams clone_params + { + .copy_instead_of_hardlink = zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport() + }; + auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, metadata_snapshot, clone_params); src_parts.emplace_back(src_part); dst_parts.emplace_back(dst_part); dst_parts_locks.emplace_back(std::move(part_lock)); ephemeral_locks.emplace_back(std::move(*lock)); block_id_paths.emplace_back(block_id_path); part_checksums.emplace_back(hash_hex); - hardlinked_files_for_parts.emplace_back(hardlinked_files); } ReplicatedMergeTreeLogEntryData entry; @@ -7360,7 +7372,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( } for (size_t i = 0; i < dst_parts.size(); ++i) - lockSharedData(*dst_parts[i], false, hardlinked_files_for_parts[i]); + lockSharedData(*dst_parts[i], false, /*hardlinked_files*/ {}); Coordination::Error code = zookeeper->tryMulti(ops, op_results); if (code == Coordination::Error::ZOK) @@ -7501,7 +7513,6 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta String dest_alter_partition_version_path = dest_table_storage->zookeeper_path + "/alter_partition_version"; Coordination::Stat dest_alter_partition_version_stat; zookeeper->get(dest_alter_partition_version_path, &dest_alter_partition_version_stat); - std::vector hardlinked_files_for_parts; std::vector temporary_parts_locks; for (const auto & src_part : src_all_parts) @@ -7524,12 +7535,15 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta UInt64 index = lock->getNumber(); MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); - MergeTreeData::HardlinkedFiles hardlinked_files; + HardlinkedFiles hardlinked_files; - bool copy_instead_of_hardlink = storage_settings_ptr->allow_remote_fs_zero_copy_replication - && src_part->isStoredOnRemoteDiskWithZeroCopySupport(); - - auto [dst_part, dst_part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, dest_metadata_snapshot, NO_TRANSACTION_PTR, &hardlinked_files, copy_instead_of_hardlink, {}); + bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication + || dynamic_cast(dest_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; + IDataPartStorage::ClonePartParams clone_params + { + .copy_instead_of_hardlink = zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport() + }; + auto [dst_part, dst_part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, dest_metadata_snapshot, clone_params); src_parts.emplace_back(src_part); dst_parts.emplace_back(dst_part); @@ -7537,7 +7551,6 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta ephemeral_locks.emplace_back(std::move(*lock)); block_id_paths.emplace_back(block_id_path); part_checksums.emplace_back(hash_hex); - hardlinked_files_for_parts.emplace_back(hardlinked_files); } ReplicatedMergeTreeLogEntryData entry_delete; @@ -7606,7 +7619,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta dest_table_storage->renameTempPartAndReplaceUnlocked(part, transaction, dest_data_parts_lock); for (size_t i = 0; i < dst_parts.size(); ++i) - dest_table_storage->lockSharedData(*dst_parts[i], false, hardlinked_files_for_parts[i]); + dest_table_storage->lockSharedData(*dst_parts[i], false, /*hardlinked_files*/ {}); Coordination::Error code = zookeeper->tryMulti(ops, op_results); if (code == Coordination::Error::ZBADVERSION) From 9c2a9a60ea9a1f474495d74c399df5ffe5dddb45 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 28 Jun 2023 16:35:41 +0000 Subject: [PATCH 0928/1997] Update version_date.tsv and changelogs after v23.3.6.7-lts --- docs/changelogs/v23.3.6.7-lts.md | 19 +++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 2 files changed, 20 insertions(+) create mode 100644 docs/changelogs/v23.3.6.7-lts.md diff --git a/docs/changelogs/v23.3.6.7-lts.md b/docs/changelogs/v23.3.6.7-lts.md new file mode 100644 index 00000000000..387cc126aba --- /dev/null +++ b/docs/changelogs/v23.3.6.7-lts.md @@ -0,0 +1,19 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.6.7-lts (7e3f0a271b7) FIXME as compared to v23.3.5.9-lts (f5fbc2fd2b3) + +#### Improvement +* Backported in [#51240](https://github.com/ClickHouse/ClickHouse/issues/51240): Improve the progress bar for file/s3/hdfs/url table functions by using chunk size from source data and using incremental total size counting in each thread. Fix the progress bar for *Cluster functions. This closes [#47250](https://github.com/ClickHouse/ClickHouse/issues/47250). [#51088](https://github.com/ClickHouse/ClickHouse/pull/51088) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Build/Testing/Packaging Improvement +* Backported in [#51529](https://github.com/ClickHouse/ClickHouse/issues/51529): Split huge `RUN` in Dockerfile into smaller conditional. Install the necessary tools on demand in the same `RUN` layer, and remove them after that. Upgrade the OS only once at the beginning. Use a modern way to check the signed repository. Downgrade the base repo to ubuntu:20.04 to address the issues on older docker versions. Upgrade golang version to address golang vulnerabilities. [#51504](https://github.com/ClickHouse/ClickHouse/pull/51504) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix type of LDAP server params hash in cache entry [#50865](https://github.com/ClickHouse/ClickHouse/pull/50865) ([Julian Maicher](https://github.com/jmaicher)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index dd21cc7e953..307ed97068f 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -5,6 +5,7 @@ v23.4.4.16-stable 2023-06-17 v23.4.3.48-stable 2023-06-12 v23.4.2.11-stable 2023-05-02 v23.4.1.1943-stable 2023-04-27 +v23.3.6.7-lts 2023-06-28 v23.3.5.9-lts 2023-06-22 v23.3.4.17-lts 2023-06-17 v23.3.3.52-lts 2023-06-12 From c9fad7b1410740d7ada64b65dfda5fefbe4a45ff Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 28 Jun 2023 18:40:48 +0200 Subject: [PATCH 0929/1997] Don't run 02782_uniq_exact_parallel_merging_bug in parallel with other tests --- .../0_stateless/02782_uniq_exact_parallel_merging_bug.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02782_uniq_exact_parallel_merging_bug.sh b/tests/queries/0_stateless/02782_uniq_exact_parallel_merging_bug.sh index d84ffd21b87..a7f71eacf0f 100755 --- a/tests/queries/0_stateless/02782_uniq_exact_parallel_merging_bug.sh +++ b/tests/queries/0_stateless/02782_uniq_exact_parallel_merging_bug.sh @@ -1,10 +1,8 @@ #!/usr/bin/env bash -# Tags: long, no-random-settings, no-tsan, no-asan, no-ubsan, no-msan +# Tags: long, no-random-settings, no-tsan, no-asan, no-ubsan, no-msan, no-parallel # shellcheck disable=SC2154 -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh From 1d78bafa82886fed17a8f1eb84cd18122dc1ce6f Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 28 Jun 2023 18:26:33 +0200 Subject: [PATCH 0930/1997] Flexible drop cache --- .../IO/CachedOnDiskReadBufferFromFile.cpp | 13 ++-- src/Disks/IO/CachedOnDiskReadBufferFromFile.h | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 2 + src/Disks/getDiskConfigurationFromAST.cpp | 1 + src/Disks/getOrCreateDiskFromAST.cpp | 29 +++++--- src/Interpreters/Cache/FileCache.cpp | 24 ++++++- src/Interpreters/Cache/FileCache.h | 12 +++- src/Interpreters/Cache/FileCacheKey.cpp | 5 ++ src/Interpreters/Cache/FileCacheKey.h | 2 + src/Interpreters/Cache/Metadata.cpp | 19 +++++- src/Interpreters/Cache/Metadata.h | 3 + src/Interpreters/FilesystemCacheLog.cpp | 4 ++ src/Interpreters/FilesystemCacheLog.h | 2 + src/Interpreters/InterpreterSystemQuery.cpp | 13 +++- src/Parsers/ASTSystemQuery.cpp | 8 +++ src/Parsers/ASTSystemQuery.h | 2 + src/Parsers/ParserSetQuery.cpp | 4 +- src/Parsers/ParserSystemQuery.cpp | 8 +++ src/Parsers/isDiskFunction.cpp | 2 +- .../02808_filesystem_cache_drop_query.sh | 66 +++++++++++++++++++ 20 files changed, 196 insertions(+), 25 deletions(-) create mode 100755 tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 202f40bfdb2..b27a62e0e4b 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -74,19 +74,22 @@ CachedOnDiskReadBufferFromFile::CachedOnDiskReadBufferFromFile( } void CachedOnDiskReadBufferFromFile::appendFilesystemCacheLog( - const FileSegment::Range & file_segment_range, CachedOnDiskReadBufferFromFile::ReadType type) + const FileSegment & file_segment, CachedOnDiskReadBufferFromFile::ReadType type) { if (!cache_log) return; + const auto range = file_segment.range(); FilesystemCacheLogElement elem { .event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()), .query_id = query_id, .source_file_path = source_file_path, - .file_segment_range = { file_segment_range.left, file_segment_range.right }, + .file_segment_range = { range.left, range.right }, .requested_range = { first_offset, read_until_position }, - .file_segment_size = file_segment_range.size(), + .file_segment_key = file_segment.key().toString(), + .file_segment_offset = file_segment.offset(), + .file_segment_size = range.size(), .read_from_cache_attempted = true, .read_buffer_id = current_buffer_id, .profile_counters = std::make_shared( @@ -495,7 +498,7 @@ bool CachedOnDiskReadBufferFromFile::completeFileSegmentAndGetNext() auto completed_range = current_file_segment->range(); if (cache_log) - appendFilesystemCacheLog(completed_range, read_type); + appendFilesystemCacheLog(*current_file_segment, read_type); chassert(file_offset_of_buffer_end > completed_range.right); @@ -521,7 +524,7 @@ CachedOnDiskReadBufferFromFile::~CachedOnDiskReadBufferFromFile() { if (cache_log && file_segments && !file_segments->empty()) { - appendFilesystemCacheLog(file_segments->front().range(), read_type); + appendFilesystemCacheLog(file_segments->front(), read_type); } } diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h index b4e7701de75..36cf8a54183 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h @@ -90,7 +90,7 @@ private: bool completeFileSegmentAndGetNext(); - void appendFilesystemCacheLog(const FileSegment::Range & file_segment_range, ReadType read_type); + void appendFilesystemCacheLog(const FileSegment & file_segment, ReadType read_type); bool writeCache(char * data, size_t size, size_t offset, FileSegment & file_segment); diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index eb9c509e459..ee5934c01be 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -88,6 +88,8 @@ void ReadBufferFromRemoteFSGather::appendUncachedReadInfo() .source_file_path = current_object.remote_path, .file_segment_range = { 0, current_object.bytes_size }, .cache_type = FilesystemCacheLogElement::CacheType::READ_FROM_FS_BYPASSING_CACHE, + .file_segment_key = {}, + .file_segment_offset = {}, .file_segment_size = current_object.bytes_size, .read_from_cache_attempted = false, }; diff --git a/src/Disks/getDiskConfigurationFromAST.cpp b/src/Disks/getDiskConfigurationFromAST.cpp index 4b1323b4db8..89dda978f6a 100644 --- a/src/Disks/getDiskConfigurationFromAST.cpp +++ b/src/Disks/getDiskConfigurationFromAST.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Disks/getOrCreateDiskFromAST.cpp b/src/Disks/getOrCreateDiskFromAST.cpp index 637acff7b95..6c1b0a966b2 100644 --- a/src/Disks/getOrCreateDiskFromAST.cpp +++ b/src/Disks/getOrCreateDiskFromAST.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -9,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -26,12 +26,20 @@ namespace { std::string getOrCreateDiskFromDiskAST(const ASTFunction & function, ContextPtr context) { - /// We need a unique name for a created custom disk, but it needs to be the same - /// after table is reattached or server is restarted, so take a hash of the disk - /// configuration serialized ast as a disk name suffix. - auto disk_setting_string = serializeAST(function, true); - auto disk_name = DiskSelector::TMP_INTERNAL_DISK_PREFIX - + toString(sipHash128(disk_setting_string.data(), disk_setting_string.size())); + std::string disk_name; + if (function.name == "disk") + { + /// We need a unique name for a created custom disk, but it needs to be the same + /// after table is reattached or server is restarted, so take a hash of the disk + /// configuration serialized ast as a disk name suffix. + auto disk_setting_string = serializeAST(function, true); + disk_name = DiskSelector::TMP_INTERNAL_DISK_PREFIX + + toString(sipHash128(disk_setting_string.data(), disk_setting_string.size())); + } + else + { + disk_name = function.name.substr(std::strlen("disk_")); + } auto result_disk = context->getOrCreateDisk(disk_name, [&](const DisksMap & disks_map) -> DiskPtr { const auto * function_args_expr = assert_cast(function.arguments.get()); @@ -43,6 +51,9 @@ namespace return disk; }); + if (!result_disk->isCustomDisk()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk with name `{}` already exist", disk_name); + if (!result_disk->isRemote()) { static constexpr auto custom_disks_base_dir_in_config = "custom_local_disks_base_directory"; @@ -91,8 +102,8 @@ namespace std::string getOrCreateDiskFromDiskAST(const ASTPtr & disk_function, ContextPtr context) { - if (!isDiskFunction(disk_function)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected a disk function"); + if (!disk_function->as()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected a function"); auto ast = disk_function->clone(); diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index eb5b59a447d..58690ac4cb5 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -807,6 +807,17 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) return true; } +void FileCache::removeKey(const Key & key) +{ + assertInitialized(); + + auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW); + if (!locked_key) + return; + + locked_key->removeAllReleasable(); +} + void FileCache::removeKeyIfExists(const Key & key) { assertInitialized(); @@ -822,6 +833,17 @@ void FileCache::removeKeyIfExists(const Key & key) locked_key->removeAllReleasable(); } +void FileCache::removeFileSegment(const Key & key, size_t offset) +{ + assertInitialized(); + + auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::RETURN_NULL); + if (!locked_key) + return; + + locked_key->removeFileSegment(offset); +} + void FileCache::removePathIfExists(const String & path) { removeKeyIfExists(createKeyForPath(path)); @@ -916,7 +938,7 @@ void FileCache::loadMetadata() continue; } - const auto key = Key(unhexUInt(key_directory.filename().string().data())); + const auto key = Key::fromKeyString(key_directory.filename().string()); auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY, /* is_initial_load */true); for (fs::directory_iterator offset_it{key_directory}; offset_it != fs::directory_iterator(); ++offset_it) diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index a93ef669898..d7fcbbe701e 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -83,13 +83,19 @@ public: FileSegmentsHolderPtr set(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings); - /// Remove files by `key`. Removes files which might be used at the moment. + /// Remove file segment by `key` and `offset`. Throws if file segment does not exist. + void removeFileSegment(const Key & key, size_t offset); + + /// Remove files by `key`. Throws if key does not exist. + void removeKey(const Key & key); + + /// Remove files by `key`. void removeKeyIfExists(const Key & key); - /// Removes files by `path`. Removes files which might be used at the moment. + /// Removes files by `path`. void removePathIfExists(const String & path); - /// Remove files by `key`. Will not remove files which are used at the moment. + /// Remove files by `key`. void removeAllReleasable(); std::vector tryGetCachePaths(const Key & key); diff --git a/src/Interpreters/Cache/FileCacheKey.cpp b/src/Interpreters/Cache/FileCacheKey.cpp index f97cdc058aa..772fcd600bf 100644 --- a/src/Interpreters/Cache/FileCacheKey.cpp +++ b/src/Interpreters/Cache/FileCacheKey.cpp @@ -28,4 +28,9 @@ FileCacheKey FileCacheKey::random() return FileCacheKey(UUIDHelpers::generateV4().toUnderType()); } +FileCacheKey FileCacheKey::fromKeyString(const std::string & key_str) +{ + return FileCacheKey(unhexUInt(key_str.data())); +} + } diff --git a/src/Interpreters/Cache/FileCacheKey.h b/src/Interpreters/Cache/FileCacheKey.h index bab8359732c..e788cd5e7cd 100644 --- a/src/Interpreters/Cache/FileCacheKey.h +++ b/src/Interpreters/Cache/FileCacheKey.h @@ -21,6 +21,8 @@ struct FileCacheKey static FileCacheKey random(); bool operator==(const FileCacheKey & other) const { return key == other.key; } + + static FileCacheKey fromKeyString(const std::string & key_str); }; using FileCacheKeyAndOffset = std::pair; diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 8c8524f7fa7..ce0207ce613 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -381,17 +381,32 @@ void LockedKey::removeAllReleasable() } } -KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegmentGuard::Lock & segment_lock) +KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset) { auto it = key_metadata->find(offset); if (it == key_metadata->end()) throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no offset {}", offset); auto file_segment = it->second->file_segment; + return removeFileSegmentImpl(it, file_segment->lock()); +} + +KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegmentGuard::Lock & segment_lock) +{ + auto it = key_metadata->find(offset); + if (it == key_metadata->end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no offset {}", offset); + + return removeFileSegmentImpl(it, segment_lock); +} + +KeyMetadata::iterator LockedKey::removeFileSegmentImpl(KeyMetadata::iterator it, const FileSegmentGuard::Lock & segment_lock) +{ + auto file_segment = it->second->file_segment; LOG_DEBUG( key_metadata->log, "Remove from cache. Key: {}, offset: {}, size: {}", - getKey(), offset, file_segment->reserved_size); + getKey(), file_segment->offset(), file_segment->reserved_size); chassert(file_segment->assertCorrectnessUnlocked(segment_lock)); diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index 8ee40aa977f..9f2c5f278f9 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -159,6 +159,7 @@ struct LockedKey : private boost::noncopyable void removeAllReleasable(); KeyMetadata::iterator removeFileSegment(size_t offset, const FileSegmentGuard::Lock &); + KeyMetadata::iterator removeFileSegment(size_t offset); void shrinkFileSegmentToDownloadedSize(size_t offset, const FileSegmentGuard::Lock &); @@ -173,6 +174,8 @@ struct LockedKey : private boost::noncopyable std::string toString() const; private: + KeyMetadata::iterator removeFileSegmentImpl(KeyMetadata::iterator it, const FileSegmentGuard::Lock &); + const std::shared_ptr key_metadata; KeyGuard::Lock lock; /// `lock` must be destructed before `key_metadata`. }; diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index 17f0fda71ec..b660db064d1 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -40,6 +40,8 @@ NamesAndTypesList FilesystemCacheLogElement::getNamesAndTypes() {"source_file_path", std::make_shared()}, {"file_segment_range", std::make_shared(types)}, {"total_requested_range", std::make_shared(types)}, + {"key", std::make_shared()}, + {"offset", std::make_shared()}, {"size", std::make_shared()}, {"read_type", std::make_shared()}, {"read_from_cache_attempted", std::make_shared()}, @@ -60,6 +62,8 @@ void FilesystemCacheLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(source_file_path); columns[i++]->insert(Tuple{file_segment_range.first, file_segment_range.second}); columns[i++]->insert(Tuple{requested_range.first, requested_range.second}); + columns[i++]->insert(file_segment_key); + columns[i++]->insert(file_segment_offset); columns[i++]->insert(file_segment_size); columns[i++]->insert(typeToString(cache_type)); columns[i++]->insert(read_from_cache_attempted); diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index 1b22d561c51..d6dd00e5463 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -39,6 +39,8 @@ struct FilesystemCacheLogElement std::pair file_segment_range{}; std::pair requested_range{}; CacheType cache_type{}; + std::string file_segment_key; + size_t file_segment_offset; size_t file_segment_size; bool read_from_cache_attempted; String read_buffer_id; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index f2d011b12d1..e1ff8676bc7 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -370,7 +370,18 @@ BlockIO InterpreterSystemQuery::execute() else { auto cache = FileCacheFactory::instance().getByName(query.filesystem_cache_name).cache; - cache->removeAllReleasable(); + if (query.delete_key.empty()) + { + cache->removeAllReleasable(); + } + else + { + auto key = FileCacheKey::fromKeyString(query.delete_key); + if (query.delete_offset.has_value()) + cache->removeFileSegment(key, query.delete_offset.value()); + else + cache->removeKey(key); + } } break; } diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index a91449ff035..9c5e7bff61e 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -210,7 +210,15 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, else if (type == Type::DROP_FILESYSTEM_CACHE) { if (!filesystem_cache_name.empty()) + { settings.ostr << (settings.hilite ? hilite_none : "") << " " << filesystem_cache_name; + if (!delete_key.empty()) + { + settings.ostr << (settings.hilite ? hilite_none : "") << " KEY " << delete_key; + if (delete_offset.has_value()) + settings.ostr << (settings.hilite ? hilite_none : "") << " OFFSET " << delete_offset.value(); + } + } } else if (type == Type::UNFREEZE) { diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index ca4802d9a9b..ebc3e9cd430 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -107,6 +107,8 @@ public: UInt64 seconds{}; String filesystem_cache_name; + std::string delete_key; + std::optional delete_offset; String backup_name; diff --git a/src/Parsers/ParserSetQuery.cpp b/src/Parsers/ParserSetQuery.cpp index 4df74c2dd82..727d037112f 100644 --- a/src/Parsers/ParserSetQuery.cpp +++ b/src/Parsers/ParserSetQuery.cpp @@ -215,7 +215,7 @@ bool ParserSetQuery::parseNameValuePair(SettingChange & change, IParser::Pos & p else if (ParserKeyword("FALSE").ignore(pos, expected)) value = std::make_shared(Field(static_cast(0))); /// for SETTINGS disk=disk(type='s3', path='', ...) - else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") + else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name.starts_with("disk")) { tryGetIdentifierNameInto(name, change.name); change.value = createFieldFromAST(function_ast); @@ -280,7 +280,7 @@ bool ParserSetQuery::parseNameValuePairWithParameterOrDefault( node = std::make_shared(Field(static_cast(1))); else if (ParserKeyword("FALSE").ignore(pos, expected)) node = std::make_shared(Field(static_cast(0))); - else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") + else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name.starts_with("disk")) { change.name = name; change.value = createFieldFromAST(function_ast); diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 48dbe60e241..ef71e994d56 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -405,7 +405,15 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & ParserLiteral path_parser; ASTPtr ast; if (path_parser.parse(pos, ast, expected)) + { res->filesystem_cache_name = ast->as()->value.safeGet(); + if (ParserKeyword{"KEY"}.ignore(pos, expected) && ParserIdentifier().parse(pos, ast, expected)) + { + res->delete_key = ast->as()->name(); + if (ParserKeyword{"OFFSET"}.ignore(pos, expected) && ParserLiteral().parse(pos, ast, expected)) + res->delete_offset = ast->as()->value.safeGet(); + } + } if (!parseQueryWithOnCluster(res, pos, expected)) return false; break; diff --git a/src/Parsers/isDiskFunction.cpp b/src/Parsers/isDiskFunction.cpp index e60229cb3f7..5ba626a8b2c 100644 --- a/src/Parsers/isDiskFunction.cpp +++ b/src/Parsers/isDiskFunction.cpp @@ -10,7 +10,7 @@ bool isDiskFunction(ASTPtr ast) return false; const auto * function = ast->as(); - return function && function->name == "disk" && function->arguments->as(); + return function && function->name.starts_with("disk") && function->arguments->as(); } } diff --git a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh new file mode 100755 index 00000000000..6388bf5ee0c --- /dev/null +++ b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh @@ -0,0 +1,66 @@ +# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings + +# set -x + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +disk_name="${CLICKHOUSE_TEST_UNIQUE_NAME}" +$CLICKHOUSE_CLIENT -nm --query """ +DROP TABLE IF EXISTS test; +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY tuple() +SETTINGS disk = disk_$disk_name(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3disk); + +INSERT INTO test SELECT 1, 'test'; +""" + +query_id=$RANDOM + +$CLICKHOUSE_CLIENT --query_id "$query_id" --query "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1" + +${CLICKHOUSE_CLIENT} -q " system flush logs" + +key=$($CLICKHOUSE_CLIENT -nm --query """ +SELECT key FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; +""") + +offset=$($CLICKHOUSE_CLIENT -nm --query """ +SELECT offset FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; +""") + +$CLICKHOUSE_CLIENT -nm --query """ +SELECT count() FROM system.filesystem_cache WHERE key = '$key' AND file_segment_range_begin = $offset; +""" + +$CLICKHOUSE_CLIENT -nm --query """ +SYSTEM DROP FILESYSTEM CACHE '$disk_name' KEY $key OFFSET $offset; +""" + +$CLICKHOUSE_CLIENT -nm --query """ +SELECT count() FROM system.filesystem_cache WHERE key = '$key' AND file_segment_range_begin = $offset; +""" + +query_id=$RANDOM$RANDOM + +$CLICKHOUSE_CLIENT --query_id "$query_id" --query "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1" + +${CLICKHOUSE_CLIENT} -q " system flush logs" + +key=$($CLICKHOUSE_CLIENT -nm --query """ +SELECT key FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; +""") + +$CLICKHOUSE_CLIENT -nm --query """ +SELECT count() FROM system.filesystem_cache WHERE key = '$key'; +""" + +$CLICKHOUSE_CLIENT -nm --query """ +SYSTEM DROP FILESYSTEM CACHE '$disk_name' KEY $key +""" + +$CLICKHOUSE_CLIENT -nm --query """ +SELECT count() FROM system.filesystem_cache WHERE key = '$key'; +""" From 372f5786c42fd3e96bbcafb80012c04fdaa96bbc Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 28 Jun 2023 18:44:54 +0200 Subject: [PATCH 0931/1997] more optimal REPLACE_RANGE with zero-copy --- src/Storages/StorageReplicatedMergeTree.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index ecc2537f6ad..b9d48fc75f3 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2435,13 +2435,17 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) auto obtain_part = [&] (PartDescriptionPtr & part_desc) { - if (part_desc->src_table_part) + /// Fetches with zero-copy-replication are cheap, but cloneAndLoadDataPartOnSameDisk will do full copy. + /// It's okay to check the setting for current table and disk for the source table, because src and dst part are on the same disk. + bool prefer_fetch_from_other_replica = !part_desc->replica.empty() && storage_settings_ptr->allow_remote_fs_zero_copy_replication + && part_desc->src_table_part && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport(); + + if (part_desc->src_table_part && !prefer_fetch_from_other_replica) { if (part_desc->checksum_hex != part_desc->src_table_part->checksums.getTotalChecksumHex()) throw Exception(ErrorCodes::UNFINISHED, "Checksums of {} is suddenly changed", part_desc->src_table_part->name); - bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication - || dynamic_cast(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; + bool zero_copy_enabled = dynamic_cast(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; IDataPartStorage::ClonePartParams clone_params { .copy_instead_of_hardlink = zero_copy_enabled && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport() From 56354b72514e79dfcd9a322c4cbf2aa2e81e6892 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 28 Jun 2023 16:55:22 +0000 Subject: [PATCH 0932/1997] Fix yet another place --- src/Dictionaries/HashedDictionary.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index 798f37cb516..5f25600db8f 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -236,6 +236,11 @@ HashedDictionary::~HashedDictionary() pool.trySchedule([&container, thread_group = CurrentThread::getGroup()] { + SCOPE_EXIT_SAFE( + if (thread_group) + CurrentThread::detachFromGroupIfNotDetached(); + ); + /// Do not account memory that was occupied by the dictionaries for the query/user context. MemoryTrackerBlockerInThread memory_blocker; From 39f8b92e24ea8e695583469dfc42d2c0238e6356 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 28 Jun 2023 19:05:35 +0200 Subject: [PATCH 0933/1997] Allow to add disk name for custom disk --- src/Disks/getOrCreateDiskFromAST.cpp | 23 ++++++++++---- src/Parsers/ParserSetQuery.cpp | 4 +-- src/Parsers/isDiskFunction.cpp | 2 +- ...stom_disk_with_user_defined_name.reference | 3 ++ ...2808_custom_disk_with_user_defined_name.sh | 31 +++++++++++++++++++ 5 files changed, 54 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.reference create mode 100755 tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh diff --git a/src/Disks/getOrCreateDiskFromAST.cpp b/src/Disks/getOrCreateDiskFromAST.cpp index 637acff7b95..c5ec0f5d91b 100644 --- a/src/Disks/getOrCreateDiskFromAST.cpp +++ b/src/Disks/getOrCreateDiskFromAST.cpp @@ -26,12 +26,20 @@ namespace { std::string getOrCreateDiskFromDiskAST(const ASTFunction & function, ContextPtr context) { - /// We need a unique name for a created custom disk, but it needs to be the same - /// after table is reattached or server is restarted, so take a hash of the disk - /// configuration serialized ast as a disk name suffix. - auto disk_setting_string = serializeAST(function, true); - auto disk_name = DiskSelector::TMP_INTERNAL_DISK_PREFIX - + toString(sipHash128(disk_setting_string.data(), disk_setting_string.size())); + std::string disk_name; + if (function.name == "disk") + { + /// We need a unique name for a created custom disk, but it needs to be the same + /// after table is reattached or server is restarted, so take a hash of the disk + /// configuration serialized ast as a disk name suffix. + auto disk_setting_string = serializeAST(function, true); + disk_name = DiskSelector::TMP_INTERNAL_DISK_PREFIX + + toString(sipHash128(disk_setting_string.data(), disk_setting_string.size())); + } + else + { + disk_name = function.name.substr(std::strlen("disk_")); + } auto result_disk = context->getOrCreateDisk(disk_name, [&](const DisksMap & disks_map) -> DiskPtr { const auto * function_args_expr = assert_cast(function.arguments.get()); @@ -43,6 +51,9 @@ namespace return disk; }); + if (!result_disk->isCustomDisk()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk with name `{}` already exist", disk_name); + if (!result_disk->isRemote()) { static constexpr auto custom_disks_base_dir_in_config = "custom_local_disks_base_directory"; diff --git a/src/Parsers/ParserSetQuery.cpp b/src/Parsers/ParserSetQuery.cpp index 4df74c2dd82..727d037112f 100644 --- a/src/Parsers/ParserSetQuery.cpp +++ b/src/Parsers/ParserSetQuery.cpp @@ -215,7 +215,7 @@ bool ParserSetQuery::parseNameValuePair(SettingChange & change, IParser::Pos & p else if (ParserKeyword("FALSE").ignore(pos, expected)) value = std::make_shared(Field(static_cast(0))); /// for SETTINGS disk=disk(type='s3', path='', ...) - else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") + else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name.starts_with("disk")) { tryGetIdentifierNameInto(name, change.name); change.value = createFieldFromAST(function_ast); @@ -280,7 +280,7 @@ bool ParserSetQuery::parseNameValuePairWithParameterOrDefault( node = std::make_shared(Field(static_cast(1))); else if (ParserKeyword("FALSE").ignore(pos, expected)) node = std::make_shared(Field(static_cast(0))); - else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") + else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name.starts_with("disk")) { change.name = name; change.value = createFieldFromAST(function_ast); diff --git a/src/Parsers/isDiskFunction.cpp b/src/Parsers/isDiskFunction.cpp index e60229cb3f7..5ba626a8b2c 100644 --- a/src/Parsers/isDiskFunction.cpp +++ b/src/Parsers/isDiskFunction.cpp @@ -10,7 +10,7 @@ bool isDiskFunction(ASTPtr ast) return false; const auto * function = ast->as(); - return function && function->name == "disk" && function->arguments->as(); + return function && function->name.starts_with("disk") && function->arguments->as(); } } diff --git a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.reference b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.reference new file mode 100644 index 00000000000..713dde3527d --- /dev/null +++ b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.reference @@ -0,0 +1,3 @@ +OK +0 +1 diff --git a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh new file mode 100755 index 00000000000..99b9a0ed7b3 --- /dev/null +++ b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh @@ -0,0 +1,31 @@ +# Tags: no-fasttest, no-parallel + +# set -x + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT -nm --query """ +DROP TABLE IF EXISTS test; +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY tuple() +SETTINGS disk = disk_s3disk(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3disk); +""" 2>&1 | grep -q "Disk with name \`s3disk\` already exist" && echo 'OK' || echo 'FAIL' + +$CLICKHOUSE_CLIENT -nm --query """ +SELECT count() FROM system.disks WHERE name = '$disk_name' +""" + +disk_name="${CLICKHOUSE_TEST_UNIQUE_NAME}" +$CLICKHOUSE_CLIENT -nm --query """ +DROP TABLE IF EXISTS test; +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY tuple() +SETTINGS disk = disk_$disk_name(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3disk); +""" + +$CLICKHOUSE_CLIENT -nm --query """ +SELECT count() FROM system.disks WHERE name = '$disk_name' +""" From 7f8ad3d5cbab240a5ef4d75b55f55478ceed22e0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 17:48:54 +0200 Subject: [PATCH 0934/1997] Convert assert to LOGICAL_ERROR in createBlockSelector() for zero weight Signed-off-by: Azat Khuzhin --- src/Interpreters/createBlockSelector.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/createBlockSelector.cpp b/src/Interpreters/createBlockSelector.cpp index 659fc483373..a8eb39e6c9d 100644 --- a/src/Interpreters/createBlockSelector.cpp +++ b/src/Interpreters/createBlockSelector.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include @@ -12,13 +13,19 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + template IColumn::Selector createBlockSelector( const IColumn & column, const std::vector & slots) { const auto total_weight = slots.size(); - assert(total_weight != 0); + if (total_weight == 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "weight is zero"); size_t num_rows = column.size(); IColumn::Selector selector(num_rows); From c9adfe1efd9aa0210185eecfbc9d446f4060077f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 17:53:14 +0200 Subject: [PATCH 0935/1997] Prohibit cluster with zero weight across all shards Before it leads to SIGSEGV, due to either divizion by zero or an a check in libdivide. Signed-off-by: Azat Khuzhin --- src/Interpreters/Cluster.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index edbef77ef02..89bfb70f7c5 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -30,6 +30,7 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; extern const int INVALID_SHARD_ID; extern const int NO_SUCH_REPLICA; + extern const int BAD_ARGUMENTS; } namespace @@ -614,6 +615,12 @@ Poco::Timespan Cluster::saturate(Poco::Timespan v, Poco::Timespan limit) void Cluster::initMisc() { + /// NOTE: It is possible to have cluster w/o shards for + /// optimize_skip_unused_shards (i.e. WHERE 0 expression), so check the + /// slots only if shards is not empty. + if (!shards_info.empty() && slot_to_shard.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cluster with zero weight on all shards is prohibited"); + for (const auto & shard_info : shards_info) { if (!shard_info.isLocal() && !shard_info.hasRemoteConnections()) From 2a12fb42461f0916455a9efd8fd9b5ada4edca69 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 17:57:53 +0200 Subject: [PATCH 0936/1997] Initialize weight/slot_to_shards for cluster not from xml correcty This is: - clusterAllReplicas - copier - some distributed cases Signed-off-by: Azat Khuzhin --- src/Interpreters/Cluster.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 89bfb70f7c5..891586d88b6 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -525,7 +525,7 @@ Cluster::Cluster( addresses_with_failover.emplace_back(current); - addShard(settings, std::move(current), params.treat_local_as_remote, current_shard_num); + addShard(settings, std::move(current), params.treat_local_as_remote, current_shard_num, /* insert_paths= */ {}, /* weight= */ 1); ++current_shard_num; } @@ -553,7 +553,7 @@ Cluster::Cluster( addresses_with_failover.emplace_back(current); - addShard(settings, std::move(current), params.treat_local_as_remote, current_shard_num); + addShard(settings, std::move(current), params.treat_local_as_remote, current_shard_num, /* insert_paths= */ {}, /* weight= */ 1); ++current_shard_num; } @@ -715,6 +715,7 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti ShardInfo info; info.shard_num = ++shard_num; + info.weight = 1; if (address.is_local) info.local_addresses.push_back(address); @@ -740,6 +741,8 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti info.per_replica_pools = {std::move(pool)}; addresses_with_failover.emplace_back(Addresses{address}); + + slot_to_shard.insert(std::end(slot_to_shard), info.weight, shards_info.size()); shards_info.emplace_back(std::move(info)); } }; @@ -769,7 +772,11 @@ Cluster::Cluster(Cluster::SubclusterTag, const Cluster & from, const std::vector { for (size_t index : indices) { - shards_info.emplace_back(from.shards_info.at(index)); + const auto & from_shard = from.shards_info.at(index); + + if (from_shard.weight) + slot_to_shard.insert(std::end(slot_to_shard), from_shard.weight, shards_info.size()); + shards_info.emplace_back(from_shard); if (!from.addresses_with_failover.empty()) addresses_with_failover.emplace_back(from.addresses_with_failover.at(index)); From 006d05c6a7aacc6f1c321822725389778b8c299c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 18:03:04 +0200 Subject: [PATCH 0937/1997] Add test for INSERT INTO clusterAllReplicas() (leads to SIGSEGV before) Signed-off-by: Azat Khuzhin --- .../0_stateless/02804_clusterAllReplicas_insert.reference | 1 + .../queries/0_stateless/02804_clusterAllReplicas_insert.sql | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/02804_clusterAllReplicas_insert.reference create mode 100644 tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql diff --git a/tests/queries/0_stateless/02804_clusterAllReplicas_insert.reference b/tests/queries/0_stateless/02804_clusterAllReplicas_insert.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/02804_clusterAllReplicas_insert.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql b/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql new file mode 100644 index 00000000000..05bda19eb9e --- /dev/null +++ b/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql @@ -0,0 +1,5 @@ +drop table if exists data; +create table data (key Int) engine=Memory(); +-- NOTE: internal_replication is false, so INSERT will be done only into one shard +insert into function clusterAllReplicas(test_cluster_two_shards, currentDatabase(), data, rand()) values (2); +select * from data order by key; From eea3c39959876a78639d53d4a5f84354cc53135b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 28 Jun 2023 10:49:46 +0000 Subject: [PATCH 0938/1997] Cosmetics --- .../functions/tuple-functions.md | 7 +- src/Common/assert_cast.h | 2 +- src/Functions/tupleElement.cpp | 140 +++++++----------- .../0_stateless/02116_tuple_element.sql | 22 +-- .../02354_tuple_element_with_default.sql | 10 +- 5 files changed, 73 insertions(+), 108 deletions(-) diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 1739920c9f0..7ed2deaeda6 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -22,14 +22,15 @@ tuple(x, y, …) A function that allows getting a column from a tuple. -If the second argument is a number `n`, it is the column index, starting from 1. If the second argument is a string `s`, it represents the name of the element. Besides, we can provide the third optional argument, such that when index out of bounds or element for such name does not exist, the default value returned instead of throw exception. The second and third arguments if provided are always must be constant. There is no cost to execute the function. +If the second argument is a number `index`, it is the column index, starting from 1. If the second argument is a string `name`, it represents the name of the element. Besides, we can provide the third optional argument, such that when index out of bounds or no element exist for the name, the default value returned instead of throwing an exception. The second and third arguments, if provided, must be constants. There is no cost to execute the function. -The function implements the operator `x.n` and `x.s`. +The function implements operators `x.index` and `x.name`. **Syntax** ``` sql -tupleElement(tuple, n/s [, default_value]) +tupleElement(tuple, index, [, default_value]) +tupleElement(tuple, name, [, default_value]) ``` ## untuple diff --git a/src/Common/assert_cast.h b/src/Common/assert_cast.h index 604cfaed6e2..0b73ba1cc12 100644 --- a/src/Common/assert_cast.h +++ b/src/Common/assert_cast.h @@ -23,7 +23,7 @@ namespace DB * The exact match of the type is checked. That is, cast to the ancestor will be unsuccessful. */ template -To assert_cast(From && from) +inline To assert_cast(From && from) { #ifndef NDEBUG try diff --git a/src/Functions/tupleElement.cpp b/src/Functions/tupleElement.cpp index b1fd200f5cd..fb8f1d3b48d 100644 --- a/src/Functions/tupleElement.cpp +++ b/src/Functions/tupleElement.cpp @@ -34,32 +34,14 @@ class FunctionTupleElement : public IFunction { public: static constexpr auto name = "tupleElement"; - static FunctionPtr create(ContextPtr) - { - return std::make_shared(); - } - - String getName() const override - { - return name; - } + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + String getName() const override { return name; } bool isVariadic() const override { return true; } - - size_t getNumberOfArguments() const override - { - return 0; - } - - bool useDefaultImplementationForConstants() const override - { - return true; - } - + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - bool useDefaultImplementationForNulls() const override { return false; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override @@ -72,107 +54,98 @@ public: getName(), number_of_arguments); size_t count_arrays = 0; - const IDataType * tuple_col = arguments[0].type.get(); - while (const DataTypeArray * array = checkAndGetDataType(tuple_col)) + const IDataType * input_type = arguments[0].type.get(); + while (const DataTypeArray * array = checkAndGetDataType(input_type)) { - tuple_col = array->getNestedType().get(); + input_type = array->getNestedType().get(); ++count_arrays; } - const DataTypeTuple * tuple = checkAndGetDataType(tuple_col); + const DataTypeTuple * tuple = checkAndGetDataType(input_type); if (!tuple) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be tuple or array of tuple. Actual {}", getName(), arguments[0].type->getName()); - auto index = getElementNum(arguments[1].column, *tuple, number_of_arguments); + std::optional index = getElementIndex(arguments[1].column, *tuple, number_of_arguments); if (index.has_value()) { - DataTypePtr out_return_type = tuple->getElements()[index.value()]; + DataTypePtr return_type = tuple->getElements()[index.value()]; for (; count_arrays; --count_arrays) - out_return_type = std::make_shared(out_return_type); + return_type = std::make_shared(return_type); - return out_return_type; + return return_type; } else { - const IDataType * default_col = arguments[2].type.get(); - size_t default_argument_count_arrays = 0; - if (const DataTypeArray * array = checkAndGetDataType(default_col)) - { - default_argument_count_arrays = array->getNumberOfDimensions(); - } + const IDataType * default_type = arguments[2].type.get(); + size_t default_count_arrays = 0; - if (count_arrays != default_argument_count_arrays) - { + if (const DataTypeArray * default_type_as_array = checkAndGetDataType(default_type)) + default_count_arrays = default_type_as_array->getNumberOfDimensions(); + + if (count_arrays != default_count_arrays) throw Exception(ErrorCodes::NUMBER_OF_DIMENSIONS_MISMATCHED, "Dimension of types mismatched between first argument and third argument. " "Dimension of 1st argument: {}. " - "Dimension of 3rd argument: {}.",count_arrays, default_argument_count_arrays); - } + "Dimension of 3rd argument: {}", count_arrays, default_count_arrays); + return arguments[2].type; } } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - Columns array_offsets; + const auto & input_arg = arguments[0]; + const IDataType * input_type = input_arg.type.get(); + const IColumn * input_col = input_arg.column.get(); - const auto & first_arg = arguments[0]; - - const IDataType * tuple_type = first_arg.type.get(); - const IColumn * tuple_col = first_arg.column.get(); - bool first_arg_is_const = false; - if (typeid_cast(tuple_col)) + bool input_arg_is_const = false; + if (typeid_cast(input_col)) { - tuple_col = assert_cast(tuple_col)->getDataColumnPtr().get(); - first_arg_is_const = true; + input_col = assert_cast(input_col)->getDataColumnPtr().get(); + input_arg_is_const = true; } - while (const DataTypeArray * array_type = checkAndGetDataType(tuple_type)) - { - const ColumnArray * array_col = assert_cast(tuple_col); - tuple_type = array_type->getNestedType().get(); - tuple_col = &array_col->getData(); + Columns array_offsets; + while (const DataTypeArray * array_type = checkAndGetDataType(input_type)) + { + const ColumnArray * array_col = assert_cast(input_col); + + input_type = array_type->getNestedType().get(); + input_col = &array_col->getData(); array_offsets.push_back(array_col->getOffsetsPtr()); } - const DataTypeTuple * tuple_type_concrete = checkAndGetDataType(tuple_type); - const ColumnTuple * tuple_col_concrete = checkAndGetColumn(tuple_col); - if (!tuple_type_concrete || !tuple_col_concrete) + const DataTypeTuple * input_type_as_tuple = checkAndGetDataType(input_type); + const ColumnTuple * input_col_as_tuple = checkAndGetColumn(input_col); + if (!input_type_as_tuple || !input_col_as_tuple) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "First argument for function {} must be tuple or array of tuple. Actual {}", - getName(), - first_arg.type->getName()); + "First argument for function {} must be tuple or array of tuple. Actual {}", getName(), input_arg.type->getName()); - auto index = getElementNum(arguments[1].column, *tuple_type_concrete, arguments.size()); + std::optional index = getElementIndex(arguments[1].column, *input_type_as_tuple, arguments.size()); if (!index.has_value()) { if (!array_offsets.empty()) - { recursiveCheckArrayOffsets(arguments[0].column, arguments[2].column, array_offsets.size()); - } return arguments[2].column; } - ColumnPtr res = tuple_col_concrete->getColumns()[index.value()]; + ColumnPtr res = input_col_as_tuple->getColumns()[index.value()]; /// Wrap into Arrays for (auto it = array_offsets.rbegin(); it != array_offsets.rend(); ++it) res = ColumnArray::create(res, *it); - if (first_arg_is_const) - { + if (input_arg_is_const) res = ColumnConst::create(res, input_rows_count); - } return res; } private: - void recursiveCheckArrayOffsets(ColumnPtr col_x, ColumnPtr col_y, size_t depth) const { for (size_t i = 1; i < depth; ++i) @@ -187,22 +160,16 @@ private: void checkArrayOffsets(ColumnPtr col_x, ColumnPtr col_y) const { if (isColumnConst(*col_x)) - { checkArrayOffsetsWithFirstArgConst(col_x, col_y); - } else if (isColumnConst(*col_y)) - { checkArrayOffsetsWithFirstArgConst(col_y, col_x); - } else { const auto & array_x = *assert_cast(col_x.get()); const auto & array_y = *assert_cast(col_y.get()); if (!array_x.hasEqualOffsets(array_y)) - { throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "The argument 1 and argument 3 of function {} have different array sizes", getName()); - } } } @@ -220,23 +187,21 @@ private: size_t row_size = offsets_y.size(); for (size_t row = 0; row < row_size; ++row) { - if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset)) - { + if (offsets_x[0] != offsets_y[row] - prev_offset) throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "The argument 1 and argument 3 of function {} have different array sizes", getName()); - } prev_offset = offsets_y[row]; } } - std::optional getElementNum(const ColumnPtr & index_column, const DataTypeTuple & tuple, const size_t argument_size) const + std::optional getElementIndex(const ColumnPtr & index_column, const DataTypeTuple & tuple, size_t argument_size) const { if (checkAndGetColumnConst(index_column.get()) || checkAndGetColumnConst(index_column.get()) || checkAndGetColumnConst(index_column.get()) || checkAndGetColumnConst(index_column.get())) { - size_t index = index_column->getUInt(0); + const size_t index = index_column->getUInt(0); if (index == 0) throw Exception(ErrorCodes::ILLEGAL_INDEX, "Indices in tuples are 1-based."); @@ -244,21 +209,20 @@ private: if (index > tuple.getElements().size()) throw Exception(ErrorCodes::ILLEGAL_INDEX, "Index for tuple element is out of range."); - return std::optional(index - 1); + return {index - 1}; } else if (const auto * name_col = checkAndGetColumnConst(index_column.get())) { - auto index = tuple.tryGetPositionByName(name_col->getValue()); - if (index.has_value()) - { - return index; - } + std::optional index = tuple.tryGetPositionByName(name_col->getValue()); - if (argument_size == 2) + if (index.has_value()) + return index; + else { - throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, "Tuple doesn't have element with name '{}'", name_col->getValue()); + if (argument_size == 2) + throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, "Tuple doesn't have element with name '{}'", name_col->getValue()); + return std::nullopt; } - return std::nullopt; } else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, diff --git a/tests/queries/0_stateless/02116_tuple_element.sql b/tests/queries/0_stateless/02116_tuple_element.sql index c911712684d..bedfedd7c2d 100644 --- a/tests/queries/0_stateless/02116_tuple_element.sql +++ b/tests/queries/0_stateless/02116_tuple_element.sql @@ -14,12 +14,12 @@ EXPLAIN SYNTAX SELECT tupleElement(t1, 2) FROM t_tuple_element; SELECT tupleElement(t1, 'a') FROM t_tuple_element; EXPLAIN SYNTAX SELECT tupleElement(t1, 'a') FROM t_tuple_element; -SELECT tupleElement(number, 1) FROM numbers(1); -- { serverError 43 } -SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError 42 } -SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError 10, 47 } -SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError 127 } -SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError 127 } -SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError 43 } +SELECT tupleElement(number, 1) FROM numbers(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } +SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX } +SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX } +SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT t2.1 FROM t_tuple_element; EXPLAIN SYNTAX SELECT t2.1 FROM t_tuple_element; @@ -27,11 +27,11 @@ EXPLAIN SYNTAX SELECT t2.1 FROM t_tuple_element; SELECT tupleElement(t2, 1) FROM t_tuple_element; EXPLAIN SYNTAX SELECT tupleElement(t2, 1) FROM t_tuple_element; -SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError 42 } -SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError 10, 47 } -SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError 127 } -SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError 127 } -SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError 43 } +SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } +SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX } +SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX } +SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } DROP TABLE t_tuple_element; diff --git a/tests/queries/0_stateless/02354_tuple_element_with_default.sql b/tests/queries/0_stateless/02354_tuple_element_with_default.sql index 908a869885b..ba1388cfa57 100644 --- a/tests/queries/0_stateless/02354_tuple_element_with_default.sql +++ b/tests/queries/0_stateless/02354_tuple_element_with_default.sql @@ -10,16 +10,16 @@ EXPLAIN SYNTAX SELECT tupleElement(t1, 'z', 0) FROM t_tuple_element_default; SELECT tupleElement(t2, 'z', 'z') FROM t_tuple_element_default; EXPLAIN SYNTAX SELECT tupleElement(t2, 'z', 'z') FROM t_tuple_element_default; -SELECT tupleElement(t1, 3, 'z') FROM t_tuple_element_default; -- { serverError 127 } -SELECT tupleElement(t1, 0, 'z') FROM t_tuple_element_default; -- { serverError 127 } +SELECT tupleElement(t1, 3, 'z') FROM t_tuple_element_default; -- { serverError ILLEGAL_INDEX } +SELECT tupleElement(t1, 0, 'z') FROM t_tuple_element_default; -- { serverError ILLEGAL_INDEX } DROP TABLE t_tuple_element_default; SELECT '--------------------'; -SELECT tupleElement(array(tuple(1, 2)), 'a', 0); -- { serverError 645 } -SELECT tupleElement(array(tuple(1, 2)), 'a', array(tuple(1, 2), tuple(3, 4))); -- { serverError 190 } -SELECT tupleElement(array(array(tuple(1))), 'a', array(array(1, 2, 3))); -- { serverError 190 } +SELECT tupleElement(array(tuple(1, 2)), 'a', 0); -- { serverError NUMBER_OF_DIMENSIONS_MISMATCHED } +SELECT tupleElement(array(tuple(1, 2)), 'a', array(tuple(1, 2), tuple(3, 4))); -- { serverError SIZES_OF_ARRAYS_DONT_MATCH } +SELECT tupleElement(array(array(tuple(1))), 'a', array(array(1, 2, 3))); -- { serverError SIZES_OF_ARRAYS_DONT_MATCH } SELECT tupleElement(array(tuple(1, 2)), 'a', array(tuple(3, 4))); EXPLAIN SYNTAX SELECT tupleElement(array(tuple(1, 2)), 'a', array(tuple(3, 4))); From bf54fb4caad774bb1efe1fd39e7b7abaef1a9869 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 28 Jun 2023 11:06:41 +0000 Subject: [PATCH 0939/1997] tupleElement(): Return default value for out-of-bounds-index Makes the actual and the documented behavior consistent. --- src/Functions/tupleElement.cpp | 14 ++++++++------ .../02354_tuple_element_with_default.reference | 2 ++ .../02354_tuple_element_with_default.sql | 4 ++-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/Functions/tupleElement.cpp b/src/Functions/tupleElement.cpp index fb8f1d3b48d..8689a095809 100644 --- a/src/Functions/tupleElement.cpp +++ b/src/Functions/tupleElement.cpp @@ -203,13 +203,15 @@ private: { const size_t index = index_column->getUInt(0); - if (index == 0) - throw Exception(ErrorCodes::ILLEGAL_INDEX, "Indices in tuples are 1-based."); + if (index > 0 && index <= tuple.getElements().size()) + return {index - 1}; + else + { + if (argument_size == 2) + throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, "Tuple doesn't have element with index '{}'", index); + return std::nullopt; + } - if (index > tuple.getElements().size()) - throw Exception(ErrorCodes::ILLEGAL_INDEX, "Index for tuple element is out of range."); - - return {index - 1}; } else if (const auto * name_col = checkAndGetColumnConst(index_column.get())) { diff --git a/tests/queries/0_stateless/02354_tuple_element_with_default.reference b/tests/queries/0_stateless/02354_tuple_element_with_default.reference index d5dfff17ef1..08a1f60e163 100644 --- a/tests/queries/0_stateless/02354_tuple_element_with_default.reference +++ b/tests/queries/0_stateless/02354_tuple_element_with_default.reference @@ -7,6 +7,8 @@ FROM t_tuple_element_default z SELECT tupleElement(t2, \'z\', \'z\') FROM t_tuple_element_default +z +z -------------------- [(3,4)] SELECT tupleElement([(1, 2)], \'a\', [(3, 4)]) diff --git a/tests/queries/0_stateless/02354_tuple_element_with_default.sql b/tests/queries/0_stateless/02354_tuple_element_with_default.sql index ba1388cfa57..de281c0a868 100644 --- a/tests/queries/0_stateless/02354_tuple_element_with_default.sql +++ b/tests/queries/0_stateless/02354_tuple_element_with_default.sql @@ -10,8 +10,8 @@ EXPLAIN SYNTAX SELECT tupleElement(t1, 'z', 0) FROM t_tuple_element_default; SELECT tupleElement(t2, 'z', 'z') FROM t_tuple_element_default; EXPLAIN SYNTAX SELECT tupleElement(t2, 'z', 'z') FROM t_tuple_element_default; -SELECT tupleElement(t1, 3, 'z') FROM t_tuple_element_default; -- { serverError ILLEGAL_INDEX } -SELECT tupleElement(t1, 0, 'z') FROM t_tuple_element_default; -- { serverError ILLEGAL_INDEX } +SELECT tupleElement(t1, 3, 'z') FROM t_tuple_element_default; +SELECT tupleElement(t1, 0, 'z') FROM t_tuple_element_default; DROP TABLE t_tuple_element_default; From 5b78b3903b468575dc36af84bbb90f673d50a7c0 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 28 Jun 2023 12:26:34 +0000 Subject: [PATCH 0940/1997] Fix logical error in tupleElement() --- src/Functions/tupleElement.cpp | 69 ------------------- .../02286_tuple_numeric_identifier.sql | 4 +- ...02354_tuple_element_with_default.reference | 43 ++++-------- .../02354_tuple_element_with_default.sql | 61 +++++----------- 4 files changed, 34 insertions(+), 143 deletions(-) diff --git a/src/Functions/tupleElement.cpp b/src/Functions/tupleElement.cpp index 8689a095809..96b5a047419 100644 --- a/src/Functions/tupleElement.cpp +++ b/src/Functions/tupleElement.cpp @@ -17,11 +17,8 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int ILLEGAL_INDEX; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NOT_FOUND_COLUMN_IN_BLOCK; - extern const int NUMBER_OF_DIMENSIONS_MISMATCHED; - extern const int SIZES_OF_ARRAYS_DONT_MATCH; } namespace @@ -79,21 +76,7 @@ public: return return_type; } else - { - const IDataType * default_type = arguments[2].type.get(); - size_t default_count_arrays = 0; - - if (const DataTypeArray * default_type_as_array = checkAndGetDataType(default_type)) - default_count_arrays = default_type_as_array->getNumberOfDimensions(); - - if (count_arrays != default_count_arrays) - throw Exception(ErrorCodes::NUMBER_OF_DIMENSIONS_MISMATCHED, - "Dimension of types mismatched between first argument and third argument. " - "Dimension of 1st argument: {}. " - "Dimension of 3rd argument: {}", count_arrays, default_count_arrays); - return arguments[2].type; - } } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override @@ -128,11 +111,7 @@ public: std::optional index = getElementIndex(arguments[1].column, *input_type_as_tuple, arguments.size()); if (!index.has_value()) - { - if (!array_offsets.empty()) - recursiveCheckArrayOffsets(arguments[0].column, arguments[2].column, array_offsets.size()); return arguments[2].column; - } ColumnPtr res = input_col_as_tuple->getColumns()[index.value()]; @@ -146,54 +125,6 @@ public: } private: - void recursiveCheckArrayOffsets(ColumnPtr col_x, ColumnPtr col_y, size_t depth) const - { - for (size_t i = 1; i < depth; ++i) - { - checkArrayOffsets(col_x, col_y); - col_x = assert_cast(col_x.get())->getDataPtr(); - col_y = assert_cast(col_y.get())->getDataPtr(); - } - checkArrayOffsets(col_x, col_y); - } - - void checkArrayOffsets(ColumnPtr col_x, ColumnPtr col_y) const - { - if (isColumnConst(*col_x)) - checkArrayOffsetsWithFirstArgConst(col_x, col_y); - else if (isColumnConst(*col_y)) - checkArrayOffsetsWithFirstArgConst(col_y, col_x); - else - { - const auto & array_x = *assert_cast(col_x.get()); - const auto & array_y = *assert_cast(col_y.get()); - if (!array_x.hasEqualOffsets(array_y)) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, - "The argument 1 and argument 3 of function {} have different array sizes", getName()); - } - } - - void checkArrayOffsetsWithFirstArgConst(ColumnPtr col_x, ColumnPtr col_y) const - { - col_x = assert_cast(col_x.get())->getDataColumnPtr(); - col_y = col_y->convertToFullColumnIfConst(); - const auto & array_x = *assert_cast(col_x.get()); - const auto & array_y = *assert_cast(col_y.get()); - - const auto & offsets_x = array_x.getOffsets(); - const auto & offsets_y = array_y.getOffsets(); - - ColumnArray::Offset prev_offset = 0; - size_t row_size = offsets_y.size(); - for (size_t row = 0; row < row_size; ++row) - { - if (offsets_x[0] != offsets_y[row] - prev_offset) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, - "The argument 1 and argument 3 of function {} have different array sizes", getName()); - prev_offset = offsets_y[row]; - } - } - std::optional getElementIndex(const ColumnPtr & index_column, const DataTypeTuple & tuple, size_t argument_size) const { if (checkAndGetColumnConst(index_column.get()) diff --git a/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql b/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql index a5fd8e57ad5..f723284ad61 100644 --- a/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql +++ b/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql @@ -12,8 +12,8 @@ SELECT * FROM t_tuple_numeric FORMAT JSONEachRow; SELECT `t`.`1`.`2`, `t`.`1`.`3`, `t`.`4` FROM t_tuple_numeric; SELECT t.1.1, t.1.2, t.2 FROM t_tuple_numeric; -SELECT t.1.3 FROM t_tuple_numeric; -- {serverError ILLEGAL_INDEX} -SELECT t.4 FROM t_tuple_numeric; -- {serverError ILLEGAL_INDEX} +SELECT t.1.3 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK} +SELECT t.4 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK} SELECT `t`.`1`.`1`, `t`.`1`.`2`, `t`.`2` FROM t_tuple_numeric; -- {serverError UNKNOWN_IDENTIFIER} DROP TABLE t_tuple_numeric; diff --git a/tests/queries/0_stateless/02354_tuple_element_with_default.reference b/tests/queries/0_stateless/02354_tuple_element_with_default.reference index 08a1f60e163..499b4c36a10 100644 --- a/tests/queries/0_stateless/02354_tuple_element_with_default.reference +++ b/tests/queries/0_stateless/02354_tuple_element_with_default.reference @@ -1,28 +1,15 @@ -z -SELECT tupleElement(t1, \'z\', \'z\') -FROM t_tuple_element_default -0 -SELECT tupleElement(t1, \'z\', 0) -FROM t_tuple_element_default -z -SELECT tupleElement(t2, \'z\', \'z\') -FROM t_tuple_element_default -z -z --------------------- -[(3,4)] -SELECT tupleElement([(1, 2)], \'a\', [(3, 4)]) --------------------- -SELECT tupleElement(t1, \'a\', [tuple(1)]) -FROM t_tuple_element_default --------------------- -[(0)] -SELECT tupleElement(t1, \'a\', [tuple(0)]) -FROM t_tuple_element_default -[0] -SELECT tupleElement(t1, \'a\', [0]) -FROM t_tuple_element_default -[0] -[0] -SELECT tupleElement(t1, \'a\', [0]) -FROM t_tuple_element_default +hello +world +default +default +[(['a'],1)] +[1,3] +[2,4] +default +-------- +hello +world +default +default +[(['a'],1)] +[[1,2,3]] diff --git a/tests/queries/0_stateless/02354_tuple_element_with_default.sql b/tests/queries/0_stateless/02354_tuple_element_with_default.sql index de281c0a868..89320f4d210 100644 --- a/tests/queries/0_stateless/02354_tuple_element_with_default.sql +++ b/tests/queries/0_stateless/02354_tuple_element_with_default.sql @@ -1,50 +1,23 @@ -DROP TABLE IF EXISTS t_tuple_element_default; +-- const tuple argument -CREATE TABLE t_tuple_element_default(t1 Tuple(a UInt32, s String), t2 Tuple(UInt32, String)) ENGINE = Memory; -INSERT INTO t_tuple_element_default VALUES ((1, 'a'), (2, 'b')); +SELECT tupleElement(('hello', 'world'), 1, 'default'); +SELECT tupleElement(('hello', 'world'), 2, 'default'); +SELECT tupleElement(('hello', 'world'), 3, 'default'); +SELECT tupleElement(('hello', 'world'), 'xyz', 'default'); +SELECT tupleElement(('hello', 'world'), 3, [([('a')], 1)]); -- arbitrary default value -SELECT tupleElement(t1, 'z', 'z') FROM t_tuple_element_default; -EXPLAIN SYNTAX SELECT tupleElement(t1, 'z', 'z') FROM t_tuple_element_default; -SELECT tupleElement(t1, 'z', 0) FROM t_tuple_element_default; -EXPLAIN SYNTAX SELECT tupleElement(t1, 'z', 0) FROM t_tuple_element_default; -SELECT tupleElement(t2, 'z', 'z') FROM t_tuple_element_default; -EXPLAIN SYNTAX SELECT tupleElement(t2, 'z', 'z') FROM t_tuple_element_default; +SELECT tupleElement([(1, 2), (3, 4)], 1, 'default'); +SELECT tupleElement([(1, 2), (3, 4)], 2, 'default'); +SELECT tupleElement([(1, 2), (3, 4)], 3, 'default'); -SELECT tupleElement(t1, 3, 'z') FROM t_tuple_element_default; -SELECT tupleElement(t1, 0, 'z') FROM t_tuple_element_default; +SELECT '--------'; -DROP TABLE t_tuple_element_default; +-- non-const tuple argument -SELECT '--------------------'; - -SELECT tupleElement(array(tuple(1, 2)), 'a', 0); -- { serverError NUMBER_OF_DIMENSIONS_MISMATCHED } -SELECT tupleElement(array(tuple(1, 2)), 'a', array(tuple(1, 2), tuple(3, 4))); -- { serverError SIZES_OF_ARRAYS_DONT_MATCH } -SELECT tupleElement(array(array(tuple(1))), 'a', array(array(1, 2, 3))); -- { serverError SIZES_OF_ARRAYS_DONT_MATCH } - -SELECT tupleElement(array(tuple(1, 2)), 'a', array(tuple(3, 4))); -EXPLAIN SYNTAX SELECT tupleElement(array(tuple(1, 2)), 'a', array(tuple(3, 4))); - -SELECT '--------------------'; - -CREATE TABLE t_tuple_element_default(t1 Array(Tuple(UInt32)), t2 UInt32) ENGINE = Memory; - -SELECT tupleElement(t1, 'a', array(tuple(1))) FROM t_tuple_element_default; -EXPLAIN SYNTAX SELECT tupleElement(t1, 'a', array(tuple(1))) FROM t_tuple_element_default; - -SELECT '--------------------'; - -INSERT INTO t_tuple_element_default VALUES ([(1)], 100); - -SELECT tupleElement(t1, 'a', array(tuple(0))) FROM t_tuple_element_default; -EXPLAIN SYNTAX SELECT tupleElement(t1, 'a', array(tuple(0))) FROM t_tuple_element_default; - -SELECT tupleElement(t1, 'a', array(0)) FROM t_tuple_element_default; -EXPLAIN SYNTAX SELECT tupleElement(t1, 'a', array(0)) FROM t_tuple_element_default; - -INSERT INTO t_tuple_element_default VALUES ([(2)], 200); - -SELECT tupleElement(t1, 'a', array(0)) FROM t_tuple_element_default; -EXPLAIN SYNTAX SELECT tupleElement(t1, 'a', array(0)) FROM t_tuple_element_default; - -DROP TABLE t_tuple_element_default; +SELECT tupleElement(materialize(('hello', 'world')), 1, 'default'); +SELECT tupleElement(materialize(('hello', 'world')), 2, 'default'); +SELECT tupleElement(materialize(('hello', 'world')), 3, 'default'); +SELECT tupleElement(materialize(('hello', 'world')), 'xzy', 'default'); +SELECT tupleElement(materialize(('hello', 'world')), 'xzy', [([('a')], 1)]); -- arbitrary default value +SELECT tupleElement([[(count('2147483646'), 1)]], 'aaaa', [[1, 2, 3]]) -- bug #51525 From 53a30bee8d66c06f69f53b8850a11b8fbde2e1d4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 28 Jun 2023 19:16:10 +0000 Subject: [PATCH 0941/1997] 00900_orc_load: kill less aggressively The test sporadically produces wrong results because the INSERT takes longer than 3 seconds, likely due to infrastructure latency. Removing the timeout to give it more headroom. Tests are afaik auto-killed after 10 min, so if there is a true issue in ClickHouse (e.g. deadlock), we would notice anyways. --- tests/queries/0_stateless/00900_orc_load.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00900_orc_load.sh b/tests/queries/0_stateless/00900_orc_load.sh index 62149fa554e..5dc5dfb87a7 100755 --- a/tests/queries/0_stateless/00900_orc_load.sh +++ b/tests/queries/0_stateless/00900_orc_load.sh @@ -12,6 +12,6 @@ ${CLICKHOUSE_CLIENT} --query="select * from orc_load FORMAT ORC" > "${CLICKHOUSE ${CLICKHOUSE_CLIENT} --query="truncate table orc_load" cat "${CLICKHOUSE_TMP}"/test.orc | ${CLICKHOUSE_CLIENT} -q "insert into orc_load format ORC" -timeout 3 ${CLICKHOUSE_CLIENT} -q "insert into orc_load format ORC" < "${CLICKHOUSE_TMP}"/test.orc +${CLICKHOUSE_CLIENT} -q "insert into orc_load format ORC" < "${CLICKHOUSE_TMP}"/test.orc ${CLICKHOUSE_CLIENT} --query="select * from orc_load" ${CLICKHOUSE_CLIENT} --query="drop table orc_load" From 71c144530081549c776e6432a48bebbca9f9f135 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Wed, 28 Jun 2023 21:45:56 +0200 Subject: [PATCH 0942/1997] Update 00417_kill_query.sh --- tests/queries/0_stateless/00417_kill_query.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/00417_kill_query.sh b/tests/queries/0_stateless/00417_kill_query.sh index 6eb0505f6bb..cd5b788a147 100755 --- a/tests/queries/0_stateless/00417_kill_query.sh +++ b/tests/queries/0_stateless/00417_kill_query.sh @@ -13,6 +13,7 @@ $CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LI sleep 1 $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' and query LIKE 'SELECT sleep(%' AND (elapsed >= 0.) SYNC" | cut -f $QUERY_FIELND_NUM +# 31 is for the query to be different from the previous one $CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 31" &>/dev/null & sleep 1 $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' and query = 'SELECT sleep(1) FROM system.numbers LIMIT 31' ASYNC" | cut -f $QUERY_FIELND_NUM From 8854f05a7534fc844b5a7b00af72ad0ae7c0ac1b Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 28 Jun 2023 19:53:49 +0000 Subject: [PATCH 0943/1997] style fix --- src/Common/ThreadPool.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index 979e53a72c0..cc88594d84f 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -420,7 +420,7 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ { if (DB::Exception::enable_job_stack_trace) DB::Exception::thread_frame_pointers = std::move(thread_frame_pointers); - + CurrentMetrics::Increment metric_active_pool_threads(metric_active_threads); From 45cd7f35cdfb3c1ed17cd2451468761aa25a6bfb Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 22 Jun 2023 02:16:56 +0200 Subject: [PATCH 0944/1997] Use clickhouse/integration-helper from changed images --- docker/test/integration/runner/dockerd-entrypoint.sh | 2 ++ tests/integration/helpers/network.py | 11 +++++++---- tests/integration/runner | 2 ++ 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh index fe47fc90951..347d904d5c0 100755 --- a/docker/test/integration/runner/dockerd-entrypoint.sh +++ b/docker/test/integration/runner/dockerd-entrypoint.sh @@ -52,6 +52,8 @@ export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=/clickhouse-config export CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH=/clickhouse-odbc-bridge export CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH=/clickhouse-library-bridge +export DOCKER_BASE_TAG=${DOCKER_BASE_TAG:=latest} +export DOCKER_HELPER_TAG=${DOCKER_HELPER_TAG:=latest} export DOCKER_MYSQL_GOLANG_CLIENT_TAG=${DOCKER_MYSQL_GOLANG_CLIENT_TAG:=latest} export DOCKER_DOTNET_CLIENT_TAG=${DOCKER_DOTNET_CLIENT_TAG:=latest} export DOCKER_MYSQL_JAVA_CLIENT_TAG=${DOCKER_MYSQL_JAVA_CLIENT_TAG:=latest} diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py index 471aa2bdc2e..4859a8c5946 100644 --- a/tests/integration/helpers/network.py +++ b/tests/integration/helpers/network.py @@ -231,6 +231,9 @@ class _NetworkManager: def _ensure_container(self): if self._container is None or self._container_expire_time <= time.time(): + image_name = "clickhouse/integration-helper:" + os.getenv( + "DOCKER_HELPER_TAG", "latest" + ) for i in range(5): if self._container is not None: try: @@ -247,7 +250,7 @@ class _NetworkManager: time.sleep(i) image = subprocess.check_output( - "docker images -q clickhouse/integration-helper 2>/dev/null", shell=True + f"docker images -q {image_name} 2>/dev/null", shell=True ) if not image.strip(): print("No network image helper, will try download") @@ -256,16 +259,16 @@ class _NetworkManager: for i in range(5): try: subprocess.check_call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL - "docker pull clickhouse/integration-helper", shell=True + f"docker pull {image_name}", shell=True ) break except: time.sleep(i) else: - raise Exception("Cannot pull clickhouse/integration-helper image") + raise Exception(f"Cannot pull {image_name} image") self._container = self._docker_client.containers.run( - "clickhouse/integration-helper", + image_name, auto_remove=True, command=("sleep %s" % self.container_exit_timeout), # /run/xtables.lock passed inside for correct iptables --wait diff --git a/tests/integration/runner b/tests/integration/runner index f658bac412b..301a707a78d 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -336,6 +336,8 @@ if __name__ == "__main__": env_tags += "-e {}={} ".format("DOCKER_MYSQL_PHP_CLIENT_TAG", tag) elif image == "clickhouse/postgresql-java-client": env_tags += "-e {}={} ".format("DOCKER_POSTGRESQL_JAVA_CLIENT_TAG", tag) + elif image == "clickhouse/integration-helper": + env_tags += "-e {}={} ".format("DOCKER_HELPER_TAG", tag) elif image == "clickhouse/integration-test": env_tags += "-e {}={} ".format("DOCKER_BASE_TAG", tag) elif image == "clickhouse/kerberized-hadoop": From eeb8cdbc19e69aab64b0da2de0b569d6e31f438f Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 22 Jun 2023 02:36:30 +0200 Subject: [PATCH 0945/1997] Add way to define additional urls in test reports --- tests/ci/upload_result_helper.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py index 150af7aff4a..fbb89ef8078 100644 --- a/tests/ci/upload_result_helper.py +++ b/tests/ci/upload_result_helper.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Dict, List +from typing import Dict, List, Optional import os import logging @@ -58,14 +58,19 @@ def upload_results( test_results: TestResults, additional_files: List[str], check_name: str, + additional_urls: Optional[List[str]] = None, ) -> str: normalized_check_name = check_name.lower() for r in ((" ", "_"), ("(", "_"), (")", "_"), (",", "_"), ("/", "_")): normalized_check_name = normalized_check_name.replace(*r) + + # Preserve additional_urls to not modify the original one + original_additional_urls = additional_urls or [] s3_path_prefix = f"{pr_number}/{commit_sha}/{normalized_check_name}" additional_urls = process_logs( s3_client, additional_files, s3_path_prefix, test_results ) + additional_urls.extend(original_additional_urls) branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commits/master" branch_name = "master" From 7b4e6faece3b172763f3237bf8a42282a289f059 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 19 Jun 2023 18:32:31 +0200 Subject: [PATCH 0946/1997] Speedup the submodules cloning --- docker/test/fasttest/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index dab873377ce..be9d569e65c 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -151,7 +151,7 @@ function clone_submodules ) git submodule sync - git submodule update --jobs=16 --depth 1 --init "${SUBMODULES_TO_UPDATE[@]}" + git submodule update --jobs=16 --depth 1 --single-branch --init "${SUBMODULES_TO_UPDATE[@]}" git submodule foreach git reset --hard git submodule foreach git checkout @ -f git submodule foreach git clean -xfd From b68d8fa76e7be0ef1ae58ce41c6a326027c3afe6 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 19 Jun 2023 18:49:54 +0200 Subject: [PATCH 0947/1997] Update the OS version for builder and testers docker images --- docker/test/fasttest/Dockerfile | 1 + docker/test/integration/base/Dockerfile | 2 +- docker/test/stateful/Dockerfile | 5 +++-- docker/test/stateless/Dockerfile | 7 ++++--- docker/test/util/Dockerfile | 2 +- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index ffb13fc774d..da4baa8c687 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -9,6 +9,7 @@ RUN apt-get update \ expect \ file \ lsof \ + odbcinst \ psmisc \ python3 \ python3-lxml \ diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index de8efa20af4..ff50626b6aa 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -49,7 +49,7 @@ RUN arch=${TARGETARCH:-amd64} \ && curl -o mysql-odbc.rpm "https://cdn.mysql.com/archives/mysql-connector-odbc-8.0/mysql-connector-odbc-8.0.27-1.el8.${rarch}.rpm" \ && rpm2archive mysql-odbc.rpm \ && tar xf mysql-odbc.rpm.tgz -C / ./usr/lib64/ \ - && LINK_DIR=$(dpkg -L libodbc1 | rg '^/usr/lib/.*-linux-gnu/odbc$') \ + && LINK_DIR=$(dpkg -L odbc-postgresql | rg '^/usr/lib/.*-linux-gnu/odbc$') \ && ln -s /usr/lib64/libmyodbc8a.so "$LINK_DIR" \ && ln -s /usr/lib64/libmyodbc8a.so "$LINK_DIR"/libmyodbc.so diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index 71a2e92e3a8..f513735a2d0 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -16,8 +16,9 @@ COPY s3downloader /s3downloader ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com" ENV DATASETS="hits visits" -RUN npm install -g azurite -RUN npm install tslib +# The following is already done in clickhouse/stateless-test +# RUN npm install -g azurite +# RUN npm install tslib COPY run.sh / CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 40109255a7e..32996140521 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -20,6 +20,7 @@ RUN apt-get update -y \ netcat-openbsd \ nodejs \ npm \ + odbcinst \ openjdk-11-jre-headless \ openssl \ postgresql-client \ @@ -71,7 +72,7 @@ RUN arch=${TARGETARCH:-amd64} \ && chmod +x ./mc ./minio -RUN wget 'https://dlcdn.apache.org/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \ +RUN wget --no-verbose 'https://dlcdn.apache.org/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \ && tar -xvf hadoop-3.3.1.tar.gz \ && rm -rf hadoop-3.3.1.tar.gz @@ -79,8 +80,8 @@ ENV MINIO_ROOT_USER="clickhouse" ENV MINIO_ROOT_PASSWORD="clickhouse" ENV EXPORT_S3_STORAGE_POLICIES=1 -RUN npm install -g azurite -RUN npm install tslib +RUN npm install -g azurite \ + && npm install -g tslib COPY run.sh / COPY setup_minio.sh / diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index a49278e960b..85e888f1df7 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -1,5 +1,5 @@ # docker build -t clickhouse/test-util . -FROM ubuntu:20.04 +FROM ubuntu:22.04 # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" From 5a8ce1f2fe5a7048066e9883ec49828e27ab7bd8 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 19 Jun 2023 22:26:27 +0200 Subject: [PATCH 0948/1997] Do not upload the binaries for fast-tests to tests-reports --- docker/test/fasttest/run.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index be9d569e65c..989ed9d2fbb 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -80,7 +80,7 @@ function start_server function clone_root { - git config --global --add safe.directory "$FASTTEST_SOURCE" + [ "$UID" -eq 0 ] && git config --global --add safe.directory "$FASTTEST_SOURCE" git clone --depth 1 https://github.com/ClickHouse/ClickHouse.git -- "$FASTTEST_SOURCE" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/clone_log.txt" ( @@ -202,10 +202,11 @@ function build | ts '%Y-%m-%d %H:%M:%S' \ | tee "$FASTTEST_OUTPUT/test_result.txt" if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then - cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse" + mkdir -p "$FASTTEST_OUTPUT/binaries/" + cp programs/clickhouse "$FASTTEST_OUTPUT/binaries/clickhouse" - strip programs/clickhouse -o "$FASTTEST_OUTPUT/clickhouse-stripped" - zstd --threads=0 "$FASTTEST_OUTPUT/clickhouse-stripped" + strip programs/clickhouse -o programs/clickhouse-stripped + zstd --threads=0 programs/clickhouse-stripped -o "$FASTTEST_OUTPUT/binaries/clickhouse-stripped.zst" fi ccache_status ccache --evict-older-than 1d ||: From edcf981c4837468bf4d08dd86183b38c120913c2 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 20 Jun 2023 13:11:11 +0200 Subject: [PATCH 0949/1997] Update mysql odbc connector --- docker/test/integration/base/Dockerfile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index ff50626b6aa..270b40e23a6 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -46,12 +46,13 @@ RUN arch=${TARGETARCH:-amd64} \ arm64) rarch=aarch64 ;; \ esac \ && cd /tmp \ - && curl -o mysql-odbc.rpm "https://cdn.mysql.com/archives/mysql-connector-odbc-8.0/mysql-connector-odbc-8.0.27-1.el8.${rarch}.rpm" \ + && curl -o mysql-odbc.rpm "https://cdn.mysql.com/archives/mysql-connector-odbc-8.0/mysql-connector-odbc-8.0.32-1.el9.${rarch}.rpm" \ && rpm2archive mysql-odbc.rpm \ && tar xf mysql-odbc.rpm.tgz -C / ./usr/lib64/ \ - && LINK_DIR=$(dpkg -L odbc-postgresql | rg '^/usr/lib/.*-linux-gnu/odbc$') \ - && ln -s /usr/lib64/libmyodbc8a.so "$LINK_DIR" \ - && ln -s /usr/lib64/libmyodbc8a.so "$LINK_DIR"/libmyodbc.so + && rm mysql-odbc.rpm mysql-odbc.rpm.tgz \ + && ODBC_DIR=$(dpkg -L odbc-postgresql | rg '^/usr/lib/.*-linux-gnu/odbc$') \ + && ln -s /usr/lib64/libmyodbc8a.so "$ODBC_DIR" \ + && ln -s /usr/lib64/libmyodbc8a.so "$ODBC_DIR"/libmyodbc.so # Unfortunately this is required for a single test for conversion data from zookeeper to clickhouse-keeper. # ZooKeeper is not started by default, but consumes some space in containers. From ead81879b43d8b86f9b73f97fc0ac4277a79f047 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 21 Jun 2023 16:33:43 +0200 Subject: [PATCH 0950/1997] Update integration tests runner too --- .../integration/helper_container/Dockerfile | 5 +- docker/test/integration/runner/Dockerfile | 50 ++++++++++--------- tests/integration/helpers/cluster.py | 2 - tests/integration/helpers/network.py | 4 -- tests/integration/pytest.ini | 3 ++ tests/integration/runner | 46 +++++++---------- tests/integration/test_storage_hudi/test.py | 2 +- 7 files changed, 51 insertions(+), 61 deletions(-) diff --git a/docker/test/integration/helper_container/Dockerfile b/docker/test/integration/helper_container/Dockerfile index 6a093081bf2..60adaea1796 100644 --- a/docker/test/integration/helper_container/Dockerfile +++ b/docker/test/integration/helper_container/Dockerfile @@ -2,4 +2,7 @@ # Helper docker container to run iptables without sudo FROM alpine -RUN apk add -U iproute2 +RUN apk add --no-cache -U iproute2 \ + && for bin in iptables iptables-restore iptables-save; \ + do ln -sf xtables-nft-multi "/sbin/$bin"; \ + done diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 14c97e479f6..40627354f70 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -1,5 +1,5 @@ # docker build -t clickhouse/integration-tests-runner . -FROM ubuntu:20.04 +FROM ubuntu:22.04 # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" @@ -56,17 +56,19 @@ RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \ /var/lib/apt/lists/* \ /var/cache/debconf \ /tmp/* \ - && apt-get clean + && apt-get clean \ + && dockerd --version; docker --version -RUN dockerd --version; docker --version RUN python3 -m pip install --no-cache-dir \ PyMySQL \ - aerospike==4.0.0 \ - avro==1.10.2 \ + aerospike==11.1.0 \ asyncio \ + avro==1.10.2 \ + azure-storage-blob \ cassandra-driver \ - confluent-kafka==1.5.0 \ + confluent-kafka==1.9.2 \ + delta-spark==2.3.0 \ dict2xml \ dicttoxml \ docker \ @@ -76,47 +78,47 @@ RUN python3 -m pip install --no-cache-dir \ kafka-python \ kazoo \ lz4 \ + meilisearch==0.18.3 \ minio \ nats-py \ protobuf \ - psycopg2-binary==2.8.6 \ + psycopg2-binary==2.9.6 \ + pyhdfs \ pymongo==3.11.0 \ + pyspark==3.3.2 \ pytest \ pytest-order==1.0.0 \ - pytest-timeout \ pytest-random \ - pytest-xdist \ pytest-repeat \ + pytest-timeout \ + pytest-xdist \ pytz \ redis \ - tzlocal==2.1 \ - urllib3 \ requests-kerberos \ - pyspark==3.3.2 \ - delta-spark==2.2.0 \ - pyhdfs \ - azure-storage-blob \ - meilisearch==0.18.3 - -COPY modprobe.sh /usr/local/bin/modprobe -COPY dockerd-entrypoint.sh /usr/local/bin/ -COPY compose/ /compose/ -COPY misc/ /misc/ + tzlocal==2.1 \ + urllib3 +# Hudi supports only spark 3.3.*, not 3.4 RUN curl -fsSL -O https://dlcdn.apache.org/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \ && tar xzvf spark-3.3.2-bin-hadoop3.tgz -C / \ && rm spark-3.3.2-bin-hadoop3.tgz # download spark and packages # if you change packages, don't forget to update them in tests/integration/helpers/cluster.py -RUN echo ":quit" | /spark-3.3.2-bin-hadoop3/bin/spark-shell --packages "org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0,io.delta:delta-core_2.12:2.2.0,org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0" > /dev/null +RUN echo ":quit" | /spark-3.3.2-bin-hadoop3/bin/spark-shell --packages "org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0,io.delta:delta-core_2.12:2.3.0,org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0" > /dev/null RUN set -x \ && addgroup --system dockremap \ - && adduser --system dockremap \ + && adduser --system dockremap \ && adduser dockremap dockremap \ && echo 'dockremap:165536:65536' >> /etc/subuid \ - && echo 'dockremap:165536:65536' >> /etc/subgid + && echo 'dockremap:165536:65536' >> /etc/subgid + +COPY modprobe.sh /usr/local/bin/modprobe +COPY dockerd-entrypoint.sh /usr/local/bin/ +COPY compose/ /compose/ +COPY misc/ /misc/ + # Same options as in test/base/Dockerfile # (in case you need to override them in tests) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index c77e67062a1..967eaaa78a5 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3054,7 +3054,6 @@ CLICKHOUSE_STAY_ALIVE_COMMAND = "bash -c \"trap 'pkill tail' INT TERM; {} --daem CLICKHOUSE_START_COMMAND ) -# /run/xtables.lock passed inside for correct iptables --wait DOCKER_COMPOSE_TEMPLATE = """ version: '2.3' services: @@ -3066,7 +3065,6 @@ services: - {db_dir}:/var/lib/clickhouse/ - {logs_dir}:/var/log/clickhouse-server/ - /etc/passwd:/etc/passwd:ro - - /run/xtables.lock:/run/xtables.lock:ro {binary_volume} {odbc_bridge_volume} {library_bridge_volume} diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py index 4859a8c5946..2df560708e0 100644 --- a/tests/integration/helpers/network.py +++ b/tests/integration/helpers/network.py @@ -271,10 +271,6 @@ class _NetworkManager: image_name, auto_remove=True, command=("sleep %s" % self.container_exit_timeout), - # /run/xtables.lock passed inside for correct iptables --wait - volumes={ - "/run/xtables.lock": {"bind": "/run/xtables.lock", "mode": "ro"} - }, detach=True, network_mode="host", ) diff --git a/tests/integration/pytest.ini b/tests/integration/pytest.ini index 772c96f7361..e40959bd37b 100644 --- a/tests/integration/pytest.ini +++ b/tests/integration/pytest.ini @@ -19,3 +19,6 @@ markers = long_run: marks tests which run for a long time addopts = -m 'not long_run' +; 'The asyncore module is deprecated' comes from casandra driver +filterwarnings = + ignore:The asyncore module is deprecated:DeprecationWarning diff --git a/tests/integration/runner b/tests/integration/runner index 301a707a78d..df52f587eee 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -395,39 +395,27 @@ if __name__ == "__main__": if args.keyword_expression: args.pytest_args += ["-k", args.keyword_expression] - cmd_base = "docker run {net} {tty} --rm --name {name} --privileged \ - --volume={odbc_bridge_bin}:/clickhouse-odbc-bridge --volume={bin}:/clickhouse \ - --volume={library_bridge_bin}:/clickhouse-library-bridge \ - --volume={base_cfg}:/clickhouse-config --volume={cases_dir}:/ClickHouse/tests/integration \ - --volume={src_dir}/Server/grpc_protos:/ClickHouse/src/Server/grpc_protos \ - --volume=/run:/run/host:ro \ - {dockerd_internal_volume} -e DOCKER_CLIENT_TIMEOUT=300 -e COMPOSE_HTTP_TIMEOUT=600 \ - -e XTABLES_LOCKFILE=/run/host/xtables.lock \ - -e PYTHONUNBUFFERED=1 \ - {env_tags} {env_cleanup} -e PYTEST_OPTS='{parallel} {opts} {tests_list} {rand} -vvv' {img}".format( - net=net, - tty=tty, - bin=args.binary, - odbc_bridge_bin=args.odbc_bridge_binary, - library_bridge_bin=args.library_bridge_binary, - base_cfg=args.base_configs_dir, - cases_dir=args.cases_dir, - src_dir=args.src_dir, - env_tags=env_tags, - env_cleanup=env_cleanup, - parallel=parallel_args, - rand=rand_args, - opts=" ".join(args.pytest_args).replace("'", "\\'"), - tests_list=" ".join(args.tests_list), - dockerd_internal_volume=dockerd_internal_volume, - img=DIND_INTEGRATION_TESTS_IMAGE_NAME + ":" + args.docker_image_version, - name=CONTAINER_NAME, + pytest_opts = " ".join(args.pytest_args).replace("'", "\\'") + tests_list = " ".join(args.tests_list) + cmd_base = ( + f"docker run {net} {tty} --rm --name {CONTAINER_NAME} " + "--privileged --dns-search='.' " # since recent dns search leaks from host + f"--volume={args.odbc_bridge_binary}:/clickhouse-odbc-bridge " + f"--volume={args.binary}:/clickhouse " + f"--volume={args.library_bridge_binary}:/clickhouse-library-bridge " + f"--volume={args.base_configs_dir}:/clickhouse-config " + f"--volume={args.cases_dir}:/ClickHouse/tests/integration " + f"--volume={args.src_dir}/Server/grpc_protos:/ClickHouse/src/Server/grpc_protos " + f"--volume=/run:/run/host:ro {dockerd_internal_volume} {env_tags} {env_cleanup} " + "-e DOCKER_CLIENT_TIMEOUT=300 -e COMPOSE_HTTP_TIMEOUT=600 -e PYTHONUNBUFFERED=1 " + f"-e PYTEST_OPTS='{parallel_args} {pytest_opts} {tests_list} {rand_args} -vvv'" + f" {DIND_INTEGRATION_TESTS_IMAGE_NAME}:{args.docker_image_version}" ) cmd = cmd_base + " " + args.command cmd_pre_pull = ( - cmd_base - + " find /compose -name docker_compose_*.yml -exec docker-compose -f '{}' pull \;" + f"{cmd_base} find /compose -name docker_compose_*.yml " + r"-exec docker-compose -f '{}' pull \;" ) containers = subprocess.check_output( diff --git a/tests/integration/test_storage_hudi/test.py b/tests/integration/test_storage_hudi/test.py index de9cde43609..2b77f4d6d61 100644 --- a/tests/integration/test_storage_hudi/test.py +++ b/tests/integration/test_storage_hudi/test.py @@ -79,7 +79,7 @@ def run_query(instance, query, stdin=None, settings=None): def write_hudi_from_df(spark, table_name, df, result_path, mode="overwrite"): - if mode is "overwrite": + if mode == "overwrite": hudi_write_mode = "insert_overwrite" else: hudi_write_mode = "upsert" From 953f1c78855500bdd2a6a603749a65082e0a4067 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 22 Jun 2023 02:36:30 +0200 Subject: [PATCH 0951/1997] Upload fast test binaries to builds bucket --- tests/ci/fast_test_check.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index 2849759a3ee..d5198e5c3d7 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -11,6 +11,7 @@ from typing import List, Tuple from github import Github +from build_check import get_release_or_pr from clickhouse_helper import ( ClickHouseHelper, mark_flaky_tests, @@ -31,6 +32,7 @@ from s3_helper import S3Helper from stopwatch import Stopwatch from tee_popen import TeePopen from upload_result_helper import upload_results +from version_helper import get_version_from_repo NAME = "Fast test" @@ -189,6 +191,17 @@ def main(): ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, NAME, test_results) + s3_path_prefix = os.path.join( + get_release_or_pr(pr_info, get_version_from_repo())[0], + pr_info.sha, + "fast_tests", + ) + build_urls = s3_helper.upload_build_folder_to_s3( + os.path.join(output_path, "binaries"), + s3_path_prefix, + keep_dirs_in_s3_path=False, + upload_symlinks=False, + ) report_url = upload_results( s3_helper, @@ -197,6 +210,7 @@ def main(): test_results, [run_log_path] + additional_logs, NAME, + build_urls, ) print(f"::notice ::Report url: {report_url}") post_commit_status(commit, state, report_url, description, NAME, pr_info) From 91dc6dfe34f1239b936506324d552f578cf39fa3 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 22 Jun 2023 18:20:23 +0200 Subject: [PATCH 0952/1997] Update mysql-php-client to the recent rolling version --- docker/test/integration/mysql_php_client/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/integration/mysql_php_client/Dockerfile b/docker/test/integration/mysql_php_client/Dockerfile index 55db4d15a7f..0e11ae023e6 100644 --- a/docker/test/integration/mysql_php_client/Dockerfile +++ b/docker/test/integration/mysql_php_client/Dockerfile @@ -1,7 +1,7 @@ # docker build -t clickhouse/mysql-php-client . # MySQL PHP client docker container -FROM php:8.0.18-cli +FROM php:8-cli-alpine COPY ./client.crt client.crt COPY ./client.key client.key From b2bfe2eb87e30e217839ec9b6c063a2e234c6f1f Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 22 Jun 2023 19:10:44 +0200 Subject: [PATCH 0953/1997] The error is changed in psycopg2=2.9.6 --- tests/integration/test_postgresql_protocol/test.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_postgresql_protocol/test.py b/tests/integration/test_postgresql_protocol/test.py index e1d8cbf9bcc..de01bba6862 100644 --- a/tests/integration/test_postgresql_protocol/test.py +++ b/tests/integration/test_postgresql_protocol/test.py @@ -111,7 +111,7 @@ def test_psql_client(started_cluster): def test_python_client(started_cluster): node = cluster.instances["node"] - with pytest.raises(py_psql.InternalError) as exc_info: + with pytest.raises(py_psql.OperationalError) as exc_info: ch = py_psql.connect( host=node.ip_address, port=server_port, @@ -122,9 +122,7 @@ def test_python_client(started_cluster): cur = ch.cursor() cur.execute("select name from tables;") - assert exc_info.value.args == ( - "Query execution failed.\nDB::Exception: Table default.tables doesn't exist\nSSL connection has been closed unexpectedly\n", - ) + assert exc_info.value.args == ("SSL connection has been closed unexpectedly\n",) ch = py_psql.connect( host=node.ip_address, From 22c8f1c0be46f5cc5115d6e8c55a963fa466b12e Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 23 Jun 2023 01:46:29 +0200 Subject: [PATCH 0954/1997] Fix test_jemalloc_percpu_arena after upgrade --- tests/integration/test_jemalloc_percpu_arena/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_jemalloc_percpu_arena/test.py b/tests/integration/test_jemalloc_percpu_arena/test.py index 0dccde3776e..8de3dcd7ea2 100755 --- a/tests/integration/test_jemalloc_percpu_arena/test.py +++ b/tests/integration/test_jemalloc_percpu_arena/test.py @@ -26,7 +26,7 @@ def run_command_in_container(cmd, *args): "run", "--rm", *args, - "ubuntu:20.04", + "ubuntu:22.04", "sh", "-c", cmd, From 406896f9a95658a27f298c4c8b154467f8d5cbb8 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 23 Jun 2023 02:10:47 +0200 Subject: [PATCH 0955/1997] Create cgroupsv2 subtree for docker-in-docker --- docker/test/integration/runner/dockerd-entrypoint.sh | 11 +++++++++++ tests/integration/test_cgroup_limit/test.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh index 347d904d5c0..3c4ff522b36 100755 --- a/docker/test/integration/runner/dockerd-entrypoint.sh +++ b/docker/test/integration/runner/dockerd-entrypoint.sh @@ -12,6 +12,17 @@ echo '{ "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"] }' | dd of=/etc/docker/daemon.json 2>/dev/null +if [ -f /sys/fs/cgroup/cgroup.controllers ]; then + # move the processes from the root group to the /init group, + # otherwise writing subtree_control fails with EBUSY. + # An error during moving non-existent process (i.e., "cat") is ignored. + mkdir -p /sys/fs/cgroup/init + xargs -rn1 < /sys/fs/cgroup/cgroup.procs > /sys/fs/cgroup/init/cgroup.procs || : + # enable controllers + sed -e 's/ / +/g' -e 's/^/+/' < /sys/fs/cgroup/cgroup.controllers \ + > /sys/fs/cgroup/cgroup.subtree_control +fi + # In case of test hung it is convenient to use pytest --pdb to debug it, # and on hung you can simply press Ctrl-C and it will spawn a python pdb, # but on SIGINT dockerd will exit, so ignore it to preserve the daemon. diff --git a/tests/integration/test_cgroup_limit/test.py b/tests/integration/test_cgroup_limit/test.py index f6392eca4d7..4ab12436dc9 100644 --- a/tests/integration/test_cgroup_limit/test.py +++ b/tests/integration/test_cgroup_limit/test.py @@ -22,7 +22,7 @@ def run_command_in_container(cmd, *args): "run", "--rm", *args, - "ubuntu:20.04", + "ubuntu:22.04", "sh", "-c", cmd, From a18300c497a051762f1ffcb5f366d50591a812f1 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 23 Jun 2023 02:28:01 +0200 Subject: [PATCH 0956/1997] Improve logging in integration/test_cgroup_limit --- tests/integration/test_cgroup_limit/test.py | 26 +++++++++++---------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_cgroup_limit/test.py b/tests/integration/test_cgroup_limit/test.py index 4ab12436dc9..e77b0f70960 100644 --- a/tests/integration/test_cgroup_limit/test.py +++ b/tests/integration/test_cgroup_limit/test.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import logging import os import math import subprocess @@ -16,18 +17,19 @@ def run_command_in_container(cmd, *args): f"{alternative_binary}:/usr/bin/clickhouse", ) - return subprocess.check_output( - [ - "docker", - "run", - "--rm", - *args, - "ubuntu:22.04", - "sh", - "-c", - cmd, - ] - ) + command = [ + "docker", + "run", + "--rm", + *args, + "ubuntu:22.04", + "sh", + "-c", + cmd, + ] + + logging.debug("Command: %s", " ".join(command)) + return subprocess.check_output(command) def run_with_cpu_limit(cmd, num_cpus, *args): From 4f1982e25e9313442f312feb70bc25bd88dfae2c Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 23 Jun 2023 02:51:18 +0200 Subject: [PATCH 0957/1997] API of pika for rabbitmq has changed --- tests/integration/test_storage_rabbitmq/test.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index b4dcf86e0ba..943e7742018 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -858,7 +858,7 @@ def test_rabbitmq_insert(rabbitmq_cluster): if len(insert_messages) == 50: channel.stop_consuming() - consumer.basic_consume(onReceived, queue_name) + consumer.basic_consume(queue_name, onReceived) consumer.start_consuming() consumer_connection.close() @@ -921,7 +921,7 @@ def test_rabbitmq_insert_headers_exchange(rabbitmq_cluster): if len(insert_messages) == 50: channel.stop_consuming() - consumer.basic_consume(onReceived, queue_name) + consumer.basic_consume(queue_name, onReceived) consumer.start_consuming() consumer_connection.close() @@ -2991,7 +2991,7 @@ def test_format_with_prefix_and_suffix(rabbitmq_cluster): if len(insert_messages) == 2: channel.stop_consuming() - consumer.basic_consume(onReceived, queue_name) + consumer.basic_consume(queue_name, onReceived) consumer.start_consuming() consumer_connection.close() @@ -3050,7 +3050,7 @@ def test_max_rows_per_message(rabbitmq_cluster): if len(insert_messages) == 2: channel.stop_consuming() - consumer.basic_consume(onReceived, queue_name) + consumer.basic_consume(queue_name, onReceived) consumer.start_consuming() consumer_connection.close() @@ -3148,7 +3148,7 @@ def test_row_based_formats(rabbitmq_cluster): if insert_messages == 2: channel.stop_consuming() - consumer.basic_consume(onReceived, queue_name) + consumer.basic_consume(queue_name, onReceived) consumer.start_consuming() consumer_connection.close() @@ -3211,7 +3211,7 @@ def test_block_based_formats_1(rabbitmq_cluster): if len(insert_messages) == 3: channel.stop_consuming() - consumer.basic_consume(onReceived, queue_name) + consumer.basic_consume(queue_name, onReceived) consumer.start_consuming() consumer_connection.close() @@ -3296,7 +3296,7 @@ def test_block_based_formats_2(rabbitmq_cluster): if insert_messages == 9: channel.stop_consuming() - consumer.basic_consume(onReceived, queue_name) + consumer.basic_consume(queue_name, onReceived) consumer.start_consuming() consumer_connection.close() From f85460d8ef6e5996f9ec56296b3a155344772dd8 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 23 Jun 2023 02:56:06 +0200 Subject: [PATCH 0958/1997] Enable tests with broken channel.start_consuming --- tests/integration/test_storage_rabbitmq/test.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 943e7742018..14764dd3835 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -2953,7 +2953,6 @@ def test_rabbitmq_address(rabbitmq_cluster): instance2.query("drop table rabbit_out sync") -@pytest.mark.skip(reason="FIXME: flaky (something with channel.start_consuming()") def test_format_with_prefix_and_suffix(rabbitmq_cluster): instance.query( """ @@ -3002,7 +3001,6 @@ def test_format_with_prefix_and_suffix(rabbitmq_cluster): ) -@pytest.mark.skip(reason="FIXME: flaky (something with channel.start_consuming()") def test_max_rows_per_message(rabbitmq_cluster): num_rows = 5 @@ -3075,7 +3073,6 @@ def test_max_rows_per_message(rabbitmq_cluster): assert result == "0\t0\n10\t100\n20\t200\n30\t300\n40\t400\n" -@pytest.mark.skip(reason="FIXME: flaky (something with channel.start_consuming()") def test_row_based_formats(rabbitmq_cluster): num_rows = 10 @@ -3172,7 +3169,6 @@ def test_row_based_formats(rabbitmq_cluster): assert result == expected -@pytest.mark.skip(reason="FIXME: flaky (something with channel.start_consuming()") def test_block_based_formats_1(rabbitmq_cluster): instance.query( """ @@ -3234,7 +3230,6 @@ def test_block_based_formats_1(rabbitmq_cluster): ] -@pytest.mark.skip(reason="FIXME: flaky (something with channel.start_consuming()") def test_block_based_formats_2(rabbitmq_cluster): num_rows = 100 From 0680f0988c906a01567fae54b2097f5ad60f749d Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 23 Jun 2023 03:02:03 +0200 Subject: [PATCH 0959/1997] Enable other working tests --- tests/integration/test_storage_rabbitmq/test.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 14764dd3835..751279f5e5a 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -1020,7 +1020,6 @@ def test_rabbitmq_many_inserts(rabbitmq_cluster): ), "ClickHouse lost some messages: {}".format(result) -@pytest.mark.skip(reason="Flaky") def test_rabbitmq_overloaded_insert(rabbitmq_cluster): instance.query( """ @@ -2050,7 +2049,6 @@ def test_rabbitmq_restore_failed_connection_without_losses_1(rabbitmq_cluster): ) -@pytest.mark.skip(reason="Timeout: FIXME") def test_rabbitmq_restore_failed_connection_without_losses_2(rabbitmq_cluster): logging.getLogger("pika").propagate = False instance.query( From f728f9735048d5cea37bb422fefc72ffcab649ce Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 27 Jun 2023 13:54:01 +0200 Subject: [PATCH 0960/1997] Add cgroupsv2 for getCGroupLimitedCPUCores --- src/Common/getNumberOfPhysicalCPUCores.cpp | 86 +++++++++++++++++++++- 1 file changed, 83 insertions(+), 3 deletions(-) diff --git a/src/Common/getNumberOfPhysicalCPUCores.cpp b/src/Common/getNumberOfPhysicalCPUCores.cpp index ed82c59140d..8fbb32e911f 100644 --- a/src/Common/getNumberOfPhysicalCPUCores.cpp +++ b/src/Common/getNumberOfPhysicalCPUCores.cpp @@ -1,4 +1,5 @@ #include "getNumberOfPhysicalCPUCores.h" +#include #include "config.h" #if defined(OS_LINUX) @@ -7,6 +8,8 @@ #endif #include +#include +#include #include #include @@ -15,7 +18,7 @@ namespace { #if defined(OS_LINUX) -int32_t readFrom(const char * filename, int default_value) +int32_t readFrom(const std::filesystem::path & filename, int default_value) { std::ifstream infile(filename); if (!infile.is_open()) @@ -31,10 +34,87 @@ int32_t readFrom(const char * filename, int default_value) uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count) { uint32_t quota_count = default_cpu_count; + std::filesystem::path prefix = "/sys/fs/cgroup"; + /// cgroupsv2 + std::ifstream contr_file(prefix / "cgroup.controllers"); + if (contr_file.is_open()) + { + /// First, we identify the cgroup the process belongs + std::ifstream cgroup_name_file("/proc/self/cgroup"); + if (!cgroup_name_file.is_open()) + return default_cpu_count; + + // cgroup_name_file always starts with '0::/' for v2 + cgroup_name_file.ignore(4); + std::string cgroup_name; + cgroup_name_file >> cgroup_name; + + std::filesystem::path current_cgroup; + if (cgroup_name.empty()) + current_cgroup = prefix; + else + current_cgroup = prefix / cgroup_name; + + // Looking for cpu.max in directories from the current cgroup to the top level + // It does not stop on the first time since the child could have a greater value than parent + while (current_cgroup != prefix.parent_path()) + { + std::ifstream cpu_max_file(current_cgroup / "cpu.max"); + current_cgroup = current_cgroup.parent_path(); + if (cpu_max_file.is_open()) + { + std::string cpu_limit_str; + float cpu_period; + cpu_max_file >> cpu_limit_str >> cpu_period; + if (cpu_limit_str != "max" && cpu_period != 0) + { + float cpu_limit = std::stof(cpu_limit_str); + quota_count = std::min(static_cast(ceil(cpu_limit / cpu_period)), quota_count); + } + } + } + current_cgroup = prefix / cgroup_name; + // Looking for cpuset.cpus.effective in directories from the current cgroup to the top level + while (current_cgroup != prefix.parent_path()) + { + std::ifstream cpuset_cpus_file(current_cgroup / "cpuset.cpus.effective"); + current_cgroup = current_cgroup.parent_path(); + if (cpuset_cpus_file.is_open()) + { + // The line in the file is "0,2-4,6,9-14" cpu numbers + // It's always grouped and ordered + std::vector cpu_ranges; + std::string cpuset_line; + cpuset_cpus_file >> cpuset_line; + if (cpuset_line.empty()) + continue; + boost::split(cpu_ranges, cpuset_line, boost::is_any_of(",")); + uint32_t cpus_count = 0; + for (const std::string& cpu_number_or_range : cpu_ranges) + { + std::vector cpu_range; + boost::split(cpu_range, cpu_number_or_range, boost::is_any_of("-")); + + if (cpu_range.size() == 2) + { + int start = std::stoi(cpu_range[0]); + int end = std::stoi(cpu_range[1]); + cpus_count += (end - start) + 1; + } + else + cpus_count++; + } + quota_count = std::min(cpus_count, quota_count); + break; + } + } + return quota_count; + } + /// cgroupsv1 /// Return the number of milliseconds per period process is guaranteed to run. /// -1 for no quota - int cgroup_quota = readFrom("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", -1); - int cgroup_period = readFrom("/sys/fs/cgroup/cpu/cpu.cfs_period_us", -1); + int cgroup_quota = readFrom(prefix / "cpu/cpu.cfs_quota_us", -1); + int cgroup_period = readFrom(prefix / "cpu/cpu.cfs_period_us", -1); if (cgroup_quota > -1 && cgroup_period > 0) quota_count = static_cast(ceil(static_cast(cgroup_quota) / static_cast(cgroup_period))); From 29a0220e63420a60d1aef9cfec0f9c6c0bba8160 Mon Sep 17 00:00:00 2001 From: Thom O'Connor Date: Wed, 28 Jun 2023 14:42:01 -0600 Subject: [PATCH 0961/1997] Update functions-for-nulls.md Fixing broken link to HTML anchor on page --- docs/en/sql-reference/functions/functions-for-nulls.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index 6f82fedaab7..d57b799e94c 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -8,7 +8,7 @@ sidebar_label: Nullable ## isNull -Returns whether the argument is [NULL](../../sql-reference/syntax.md#null-literal). +Returns whether the argument is [NULL](../../sql-reference/syntax.md#null). ``` sql isNull(x) From 296f9968c04f6ca49599e281b4092f6cffef5bfd Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 28 Jun 2023 23:20:02 +0200 Subject: [PATCH 0962/1997] fix --- docker/test/upgrade/run.sh | 1 + src/Storages/MergeTree/IDataPartStorage.h | 1 + src/Storages/MergeTree/IMergeTreeDataPart.cpp | 3 ++- src/Storages/MergeTree/MergeTreeData.cpp | 10 ++++++++++ src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 9 +++++++++ src/Storages/StorageReplicatedMergeTree.cpp | 9 ++++++--- 6 files changed, 29 insertions(+), 4 deletions(-) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index e72c28b7167..8fd514eaa93 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -189,6 +189,7 @@ rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ -e "Authentication failed" \ -e "Cannot flush" \ -e "Container already exists" \ + -e "doesn't have metadata version on disk" \ clickhouse-server.upgrade.log \ | grep -av -e "_repl_01111_.*Mapping for table with UUID" \ | zgrep -Fa "" > /test_output/upgrade_error_messages.txt \ diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index b40a9aa1b46..a2ca30bf73a 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -244,6 +244,7 @@ public: bool keep_metadata_version = false; bool make_source_readonly = false; DiskTransactionPtr external_transaction = nullptr; + std::optional metadata_version_to_write = std::nullopt; }; virtual std::shared_ptr freeze( diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 1f105951757..b9591864869 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1363,7 +1363,8 @@ void IMergeTreeDataPart::loadColumns(bool require) { loaded_metadata_version = metadata_snapshot->getMetadataVersion(); old_part_with_no_metadata_version_on_disk = true; - LOG_WARNING(storage.log, "Part {} doesn't have metadata version on disk, setting it to {}. " + if (storage.supportsReplication()) + LOG_WARNING(storage.log, "Part {} doesn't have metadata version on disk, setting it to {}. " "It's okay if the part was created by an old version of ClickHouse", name, loaded_metadata_version); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index f3cf4a85953..9f2fa6c0434 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7349,6 +7349,16 @@ std::pair MergeTreeData::cloneAn /*save_metadata_callback=*/ {}, params); + if (params.metadata_version_to_write.has_value()) + { + chassert(!params.keep_metadata_version); + auto out_metadata = dst_part_storage->writeFile(IMergeTreeDataPart::METADATA_VERSION_FILE_NAME, 4096, getContext()->getWriteSettings()); + writeText(metadata_snapshot->getMetadataVersion(), *out_metadata); + out_metadata->finalize(); + if (getSettings()->fsync_after_insert) + out_metadata->sync(); + } + LOG_DEBUG(log, "Clone{} part {} to {}{}", src_flushed_tmp_part ? " flushed" : "", src_part_storage->getFullPath(), diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index c93077fb4fb..1c896c4e2a6 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -604,6 +604,15 @@ void ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData: { /// NOTE: No delay in this case. That's Ok. + part->getDataPartStorage().removeFileIfExists(IMergeTreeDataPart::METADATA_VERSION_FILE_NAME); + { + auto out_metadata = part->getDataPartStorage().writeFile(IMergeTreeDataPart::METADATA_VERSION_FILE_NAME, 4096, context->getWriteSettings()); + writeText(metadata_snapshot->getMetadataVersion(), *out_metadata); + out_metadata->finalize(); + if (storage.getSettings()->fsync_after_insert) + out_metadata->sync(); + } + auto origin_zookeeper = storage.getZooKeeper(); assertSessionIsNotExpired(origin_zookeeper); auto zookeeper = std::make_shared(origin_zookeeper); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index b9d48fc75f3..0361d46612b 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2448,7 +2448,8 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) bool zero_copy_enabled = dynamic_cast(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; IDataPartStorage::ClonePartParams clone_params { - .copy_instead_of_hardlink = zero_copy_enabled && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport() + .copy_instead_of_hardlink = zero_copy_enabled && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport(), + .metadata_version_to_write = metadata_snapshot->getMetadataVersion() }; auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk( part_desc->src_table_part, TMP_PREFIX + "clone_", part_desc->new_part_info, metadata_snapshot, clone_params); @@ -7306,7 +7307,8 @@ void StorageReplicatedMergeTree::replacePartitionFrom( || dynamic_cast(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; IDataPartStorage::ClonePartParams clone_params { - .copy_instead_of_hardlink = zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport() + .copy_instead_of_hardlink = zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport(), + .metadata_version_to_write = metadata_snapshot->getMetadataVersion() }; auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, metadata_snapshot, clone_params); src_parts.emplace_back(src_part); @@ -7545,7 +7547,8 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta || dynamic_cast(dest_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; IDataPartStorage::ClonePartParams clone_params { - .copy_instead_of_hardlink = zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport() + .copy_instead_of_hardlink = zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport(), + .metadata_version_to_write = dest_metadata_snapshot->getMetadataVersion() }; auto [dst_part, dst_part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, dest_metadata_snapshot, clone_params); From 92d7b067b0ae9eff9b28fb41c8ffbeebc8b2c31c Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 28 Jun 2023 23:36:15 +0200 Subject: [PATCH 0963/1997] Bring back lost /tmp cleanup in clickhouse-server docker image --- docker/server/Dockerfile.ubuntu | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 1bb0cfdc700..42ae81655d2 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -48,14 +48,15 @@ ARG TARGETARCH RUN arch="${TARGETARCH:-amd64}" \ && if [ -n "${deb_location_url}" ]; then \ echo "installing from custom url with deb packages: ${deb_location_url}" \ - rm -rf /tmp/clickhouse_debs \ + && rm -rf /tmp/clickhouse_debs \ && mkdir -p /tmp/clickhouse_debs \ && for package in ${PACKAGES}; do \ { wget --progress=bar:force:noscroll "${deb_location_url}/${package}_${VERSION}_${arch}.deb" -P /tmp/clickhouse_debs || \ wget --progress=bar:force:noscroll "${deb_location_url}/${package}_${VERSION}_all.deb" -P /tmp/clickhouse_debs ; } \ || exit 1 \ ; done \ - && dpkg -i /tmp/clickhouse_debs/*.deb ; \ + && dpkg -i /tmp/clickhouse_debs/*.deb \ + && rm -rf /tmp/* ; \ fi # install from a single binary @@ -65,11 +66,12 @@ RUN if [ -n "${single_binary_location_url}" ]; then \ && mkdir -p /tmp/clickhouse_binary \ && wget --progress=bar:force:noscroll "${single_binary_location_url}" -O /tmp/clickhouse_binary/clickhouse \ && chmod +x /tmp/clickhouse_binary/clickhouse \ - && /tmp/clickhouse_binary/clickhouse install --user "clickhouse" --group "clickhouse" ; \ + && /tmp/clickhouse_binary/clickhouse install --user "clickhouse" --group "clickhouse" \ + && rm -rf /tmp/* ; \ fi # A fallback to installation from ClickHouse repository -RUN if ! clickhouse local -q "SELECT ''" > /dev/null; then \ +RUN if ! clickhouse local -q "SELECT ''" > /dev/null 2>&1; then \ apt-get update \ && apt-get install --yes --no-install-recommends \ apt-transport-https \ @@ -90,12 +92,12 @@ RUN if ! clickhouse local -q "SELECT ''" > /dev/null; then \ packages="${packages} ${package}=${VERSION}" \ ; done \ && apt-get install --allow-unauthenticated --yes --no-install-recommends ${packages} || exit 1 \ - && rm -rf \ - /var/lib/apt/lists/* \ - /var/cache/debconf \ - /tmp/* \ - && apt-get autoremove --purge -yq libksba8 \ - && apt-get autoremove -yq \ + && rm -rf \ + /var/lib/apt/lists/* \ + /var/cache/debconf \ + /tmp/* \ + && apt-get autoremove --purge -yq libksba8 \ + && apt-get autoremove -yq \ ; fi # post install From 45232770e06079b2b346209ae7b412597ff5996f Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 29 Jun 2023 00:41:23 +0300 Subject: [PATCH 0964/1997] Try to fix deadlock in ZooKeeper client --- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 14b31c6a411..dd555e39529 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -362,6 +362,16 @@ ZooKeeper::ZooKeeper( { tryLogCurrentException(log, "Failed to connect to ZooKeeper"); + try + { + requests_queue.finish(); + socket.shutdown(); + } + catch (...) + { + tryLogCurrentException(log); + } + send_thread.join(); receive_thread.join(); From b27cf4e7ba0d9d579722202f693f820777c6bfc7 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 29 Jun 2023 00:15:45 +0200 Subject: [PATCH 0965/1997] fix --- src/Storages/MergeTree/MergeTreeData.cpp | 11 +++++++++-- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 10 ---------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 9f2fa6c0434..add48e2cf03 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4707,12 +4707,19 @@ MergeTreeData::DataPartPtr MergeTreeData::getPartIfExistsUnlocked(const MergeTre return nullptr; } -static void loadPartAndFixMetadataImpl(MergeTreeData::MutableDataPartPtr part) +static void loadPartAndFixMetadataImpl(MergeTreeData::MutableDataPartPtr part, ContextPtr local_context, int32_t metadata_version, bool sync) { /// Remove metadata version file and take it from table. /// Currently we cannot attach parts with different schema, so /// we can assume that it's equal to table's current schema. part->removeMetadataVersion(); + { + auto out_metadata = part->getDataPartStorage().writeFile(IMergeTreeDataPart::METADATA_VERSION_FILE_NAME, 4096, local_context->getWriteSettings()); + writeText(metadata_version, *out_metadata); + out_metadata->finalize(); + if (sync) + out_metadata->sync(); + } part->loadColumnsChecksumsIndexes(false, true); part->modification_time = part->getDataPartStorage().getLastModified().epochTime(); @@ -5844,7 +5851,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const .withPartFormatFromDisk() .build(); - loadPartAndFixMetadataImpl(part); + loadPartAndFixMetadataImpl(part, local_context, getInMemoryMetadataPtr()->getMetadataVersion(), getSettings()->fsync_after_insert); loaded_parts.push_back(part); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 1c896c4e2a6..9c5890383dc 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -603,16 +603,6 @@ template void ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::MutableDataPartPtr & part) { /// NOTE: No delay in this case. That's Ok. - - part->getDataPartStorage().removeFileIfExists(IMergeTreeDataPart::METADATA_VERSION_FILE_NAME); - { - auto out_metadata = part->getDataPartStorage().writeFile(IMergeTreeDataPart::METADATA_VERSION_FILE_NAME, 4096, context->getWriteSettings()); - writeText(metadata_snapshot->getMetadataVersion(), *out_metadata); - out_metadata->finalize(); - if (storage.getSettings()->fsync_after_insert) - out_metadata->sync(); - } - auto origin_zookeeper = storage.getZooKeeper(); assertSessionIsNotExpired(origin_zookeeper); auto zookeeper = std::make_shared(origin_zookeeper); From 37ad50bd637b8bfd6612e0b80b8f6dd388a19595 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Wed, 28 Jun 2023 19:40:40 -0300 Subject: [PATCH 0966/1997] Update settings.md --- .../server-configuration-parameters/settings.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 40c1b8d64a1..bad7e388377 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -2120,7 +2120,13 @@ This section contains the following parameters: - `operation_timeout_ms` — Maximum timeout for one operation in milliseconds. - `root` — The [znode](http://zookeeper.apache.org/doc/r3.5.5/zookeeperOver.html#Nodes+and+ephemeral+nodes) that is used as the root for znodes used by the ClickHouse server. Optional. - `identity` — User and password, that can be required by ZooKeeper to give access to requested znodes. Optional. - +- zookeeper_load_balancing - Specifies the algorithm of ZooKeeper node selection. + * random - randomly selects one of ZooKeeper nodes. + * in_order - selects the first ZooKeeper node, if it's not available then the second, and so on. + * nearest_hostname - selects a ZooKeeper node with a hostname that is most similar to the server’s hostname. + * first_or_random - selects the first ZooKeeper node, if it's not available then randomly selects one of remaining ZooKeeper nodes. + * round_robin - selects the first ZooKeeper node, if reconnection happens selects the next. + **Example configuration** ``` xml @@ -2139,6 +2145,8 @@ This section contains the following parameters: /path/to/zookeeper/node user:password + + random ``` From fdd8a0a3966028a5c72e7ce5e07410f68ce50da5 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Thu, 29 Jun 2023 02:35:07 +0200 Subject: [PATCH 0967/1997] Fix flaky test 00416_pocopatch_progress_in_http_headers --- ...0416_pocopatch_progress_in_http_headers.sh | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh index b2189ab0cc2..7e954db2c86 100755 --- a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh +++ b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh @@ -4,9 +4,28 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=5&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT max(number) FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]' +RETRIES=5 + +result="" +lines_expected=4 +counter=0 +while [ $counter -lt $RETRIES ] && [ $(echo "$result" | wc -l) != "$lines_expected" ]; do + result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=5&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT max(number) FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') + let counter=counter+1 + # echo "$result" | wc -l +done +echo "$result" + +result="" +lines_expected=12 +counter=0 +while [ $counter -lt $RETRIES ] && [ $(echo "$result" | wc -l) != "$lines_expected" ]; do + result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') + let counter=counter+1 + # echo "$result" | wc -l +done +echo "$result" -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]' ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 10' | gzip -d # 'send_progress_in_http_headers' is false by default @@ -26,7 +45,13 @@ ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'DROP ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'CREATE TABLE insert_number_query (record UInt32) Engine = Memory' > /dev/null 2>&1 ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'CREATE TABLE insert_number_query_2 (record UInt32) Engine = Memory' > /dev/null 2>&1 -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&http_headers_progress_interval_ms=0&send_progress_in_http_headers=1" -d 'INSERT INTO insert_number_query (record) SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Summary|^[0-9]' +result="" +counter=0 +while [ $counter -lt $RETRIES ] && [ -z "$result" ]; do + result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&http_headers_progress_interval_ms=0&send_progress_in_http_headers=1" -d 'INSERT INTO insert_number_query (record) SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Summary|^[0-9]') + let counter=counter+1 +done +echo "$result" ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'DROP TABLE insert_number_query' > /dev/null 2>&1 ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'DROP TABLE insert_number_query_2' > /dev/null 2>&1 From 58581ce5f6bdfe0df9135a95c0df14404af91e2a Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Thu, 29 Jun 2023 02:37:09 +0200 Subject: [PATCH 0968/1997] Update 00416_pocopatch_progress_in_http_headers.sh --- .../0_stateless/00416_pocopatch_progress_in_http_headers.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh index 7e954db2c86..ad7e89a7357 100755 --- a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh +++ b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh @@ -12,7 +12,6 @@ counter=0 while [ $counter -lt $RETRIES ] && [ $(echo "$result" | wc -l) != "$lines_expected" ]; do result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=5&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT max(number) FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') let counter=counter+1 - # echo "$result" | wc -l done echo "$result" @@ -22,7 +21,6 @@ counter=0 while [ $counter -lt $RETRIES ] && [ $(echo "$result" | wc -l) != "$lines_expected" ]; do result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') let counter=counter+1 - # echo "$result" | wc -l done echo "$result" From f23bf9c5acdd885d61ec7f68bdf8be1b1fa79dee Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Thu, 29 Jun 2023 10:04:36 +0800 Subject: [PATCH 0969/1997] Update redis.md --- .../engines/table-engines/integrations/redis.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/redis.md b/docs/en/engines/table-engines/integrations/redis.md index 568179eb690..7ef87927bfd 100644 --- a/docs/en/engines/table-engines/integrations/redis.md +++ b/docs/en/engines/table-engines/integrations/redis.md @@ -44,9 +44,10 @@ Create a table in ClickHouse which allows to read data from Redis: ``` sql CREATE TABLE redis_table ( - `k` String, - `m` String, - `n` UInt32 + `key` String, + `v1` UInt32, + `v2` String, + `v3` Float32 ) ENGINE = Redis('redis1:6379') PRIMARY KEY(k); ``` @@ -111,9 +112,16 @@ Flush Redis db asynchronously. Also `Truncate` support SYNC mode. TRUNCATE TABLE redis_table SYNC; ``` +Join: + +Join with other tables. + +``` +SELECT * FROM redis_table JOIN merge_tree_table ON redis_table.key=merge_tree_table.key; +``` ## Limitations {#limitations} Redis engine also supports scanning queries, such as `where k > xx`, but it has some limitations: -1. Scanning query may produce some duplicated keys in a very rare case when it is rehashing. See details in [Redis Scan](https://github.com/redis/redis/blob/e4d183afd33e0b2e6e8d1c79a832f678a04a7886/src/dict.c#L1186-L1269) +1. Scanning query may produce some duplicated keys in a very rare case when it is rehashing. See details in [Redis Scan](https://github.com/redis/redis/blob/e4d183afd33e0b2e6e8d1c79a832f678a04a7886/src/dict.c#L1186-L1269). 2. During the scanning, keys could be created and deleted, so the resulting dataset can not represent a valid point in time. From f8f0b7d086d23f70a49be88233b19d152e99e3f4 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Thu, 29 Jun 2023 10:09:27 +0800 Subject: [PATCH 0970/1997] fix typo --- docs/en/engines/table-engines/integrations/redis.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/redis.md b/docs/en/engines/table-engines/integrations/redis.md index 7ef87927bfd..2697abcf30e 100644 --- a/docs/en/engines/table-engines/integrations/redis.md +++ b/docs/en/engines/table-engines/integrations/redis.md @@ -49,7 +49,7 @@ CREATE TABLE redis_table `v2` String, `v3` Float32 ) -ENGINE = Redis('redis1:6379') PRIMARY KEY(k); +ENGINE = Redis('redis1:6379') PRIMARY KEY(key); ``` Insert: From 98966796d0e003d618aade919f7f3e52788ce7e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Thu, 29 Jun 2023 10:18:47 +0800 Subject: [PATCH 0971/1997] Update docs/en/sql-reference/functions/string-functions.md Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> --- docs/en/sql-reference/functions/string-functions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 5197b786884..f6b629f1179 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -574,9 +574,9 @@ Alias: Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. -## substringIndex(s, delim, index) +## substringIndex(s, delim, count) -Returns the substring of `s` before `index` occurrences of the delimiter `delim`, as in Spark or MySQL. +Returns the substring of `s` before `count` occurrences of the delimiter `delim`, as in Spark or MySQL. **Syntax** From e2236384d1795ac8f95cb1281b7e5199f8844e8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Thu, 29 Jun 2023 10:18:54 +0800 Subject: [PATCH 0972/1997] Update docs/en/sql-reference/functions/string-functions.md Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> --- docs/en/sql-reference/functions/string-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index f6b629f1179..f3bcc99d83d 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -605,7 +605,7 @@ Result: └──────────────────────────────────────────────┘ ``` -## substringIndexUTF8(s, delim, index) +## substringIndexUTF8(s, delim, count) Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. From e9bac152e1a5f08845c2d40e608ed293bd5c0384 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Thu, 29 Jun 2023 10:19:04 +0800 Subject: [PATCH 0973/1997] Update docs/en/sql-reference/functions/string-functions.md Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> --- docs/en/sql-reference/functions/string-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index f3bcc99d83d..3ed60434834 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -581,7 +581,7 @@ Returns the substring of `s` before `count` occurrences of the delimiter `delim` **Syntax** ```sql -substringIndex(s, delim, index) +substringIndex(s, delim, count) ``` Alias: `SUBSTRING_INDEX` From 40ded2eca001ecc145358d4ab4c3a5e43738d2e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Thu, 29 Jun 2023 10:19:13 +0800 Subject: [PATCH 0974/1997] Update docs/en/sql-reference/functions/string-functions.md Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> --- docs/en/sql-reference/functions/string-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 3ed60434834..12aa8d2c076 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -590,7 +590,7 @@ Alias: `SUBSTRING_INDEX` - s: The string to extract substring from. [String](../../sql-reference/data-types/string.md). - delim: The character to split. [String](../../sql-reference/data-types/string.md). -- index: The number of occurrences of the delimiter to count before extracting the substring. If index is positive, everything to the left of the final delimiter (counting from the left) is returned. If index is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) +- count: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) **Example** From f049914c2f47d3e202a936464f4fd04390c1669f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Thu, 29 Jun 2023 10:19:19 +0800 Subject: [PATCH 0975/1997] Update src/Functions/substringIndex.cpp Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> --- src/Functions/substringIndex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp index fbb20b245f6..34d3ab4b3fb 100644 --- a/src/Functions/substringIndex.cpp +++ b/src/Functions/substringIndex.cpp @@ -57,7 +57,7 @@ namespace arguments[1]->getName(), getName()); - if (!isNativeNumber(arguments[2])) + if (!isNativeInteger(arguments[2])) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of third argument of function {}", From 21ff69772caf01c03512c8076bcf0e95e7805588 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Thu, 29 Jun 2023 10:19:28 +0800 Subject: [PATCH 0976/1997] Update src/Functions/substringIndex.cpp Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> --- src/Functions/substringIndex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp index 34d3ab4b3fb..fb74936b0bc 100644 --- a/src/Functions/substringIndex.cpp +++ b/src/Functions/substringIndex.cpp @@ -46,7 +46,7 @@ namespace if (!isString(arguments[0])) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of first argument of function {}", + "Illegal type {} of first argument of function {}, String expected", arguments[0]->getName(), getName()); From a005b5d0c8aaefb8d1c4b74fffefe464e882d329 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Thu, 29 Jun 2023 10:19:39 +0800 Subject: [PATCH 0977/1997] Update src/Functions/substringIndex.cpp Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> --- src/Functions/substringIndex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp index fb74936b0bc..653ef9e509a 100644 --- a/src/Functions/substringIndex.cpp +++ b/src/Functions/substringIndex.cpp @@ -53,7 +53,7 @@ namespace if (!isString(arguments[1])) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of second argument of function {}", + "Illegal type {} of second argument of function {}, String expected", arguments[1]->getName(), getName()); From a35476ee13573b37ad0ae667c3c4b2405f681d01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Thu, 29 Jun 2023 10:19:48 +0800 Subject: [PATCH 0978/1997] Update src/Functions/substringIndex.cpp Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> --- src/Functions/substringIndex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp index 653ef9e509a..963420a4fee 100644 --- a/src/Functions/substringIndex.cpp +++ b/src/Functions/substringIndex.cpp @@ -60,7 +60,7 @@ namespace if (!isNativeInteger(arguments[2])) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of third argument of function {}", + "Illegal type {} of third argument of function {}, Integer expected", arguments[2]->getName(), getName()); From 9e34227fe96a2c37d8895663c7fecdd3688037fd Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 29 Jun 2023 10:39:20 +0800 Subject: [PATCH 0979/1997] change as requested --- src/Functions/substringIndex.cpp | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp index 963420a4fee..903edfe5031 100644 --- a/src/Functions/substringIndex.cpp +++ b/src/Functions/substringIndex.cpp @@ -136,8 +136,13 @@ namespace { StringRef str_ref = str_column->getDataAt(i); Int64 index = index_column->getInt(i); - StringRef res_ref - = !is_utf8 ? substringIndex(str_ref, delim[0], index) : substringIndexUTF8(searcher.get(), str_ref, delim, index); + + StringRef res_ref; + if constexpr (!is_utf8) + res_ref = substringIndex(str_ref, delim[0], index); + else + res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, index); + appendToResultColumn(res_ref, res_data, res_offsets); } } @@ -159,8 +164,13 @@ namespace for (size_t i = 0; i < rows; ++i) { StringRef str_ref = str_column->getDataAt(i); - StringRef res_ref - = !is_utf8 ? substringIndex(str_ref, delim[0], index) : substringIndexUTF8(searcher.get(), str_ref, delim, index); + + StringRef res_ref; + if constexpr (!is_utf8) + res_ref = substringIndex(str_ref, delim[0], index); + else + res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, index); + appendToResultColumn(res_ref, res_data, res_offsets); } } @@ -183,8 +193,13 @@ namespace for (size_t i = 0; i < rows; ++i) { Int64 index = index_column->getInt(i); - StringRef res_ref - = !is_utf8 ? substringIndex(str_ref, delim[0], index) : substringIndexUTF8(searcher.get(), str_ref, delim, index); + + StringRef res_ref; + if constexpr (!is_utf8) + res_ref = substringIndex(str_ref, delim[0], index); + else + res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, index); + appendToResultColumn(res_ref, res_data, res_offsets); } } From 3a01a859d9aaef5fc6bba54cc0a40c7c30c4a23f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Thu, 29 Jun 2023 10:39:25 +0800 Subject: [PATCH 0980/1997] Update src/Functions/substringIndex.cpp Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> --- src/Functions/substringIndex.cpp | 46 +++++++++----------------------- 1 file changed, 12 insertions(+), 34 deletions(-) diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp index 963420a4fee..41b46af284a 100644 --- a/src/Functions/substringIndex.cpp +++ b/src/Functions/substringIndex.cpp @@ -256,41 +256,19 @@ namespace if (index == 0) return {str_ref.data, 0}; - if (index > 0) - { - const auto * end = str_ref.data + str_ref.size; - const auto * pos = str_ref.data; - Int64 i = 0; - while (i < index) - { - pos = std::find(pos, end, delim); - if (pos != end) - { - ++pos; - ++i; - } - else - return str_ref; - } - return {str_ref.data, static_cast(pos - str_ref.data - 1)}; - } - else - { - const auto * begin = str_ref.data; - const auto * pos = str_ref.data + str_ref.size; - Int64 i = 0; - while (i + index < 0) - { - --pos; - while (pos >= begin && *pos != delim) - --pos; + const auto pos = index > 0 ? str_ref.data : str_ref.data + str_ref.size - 1; + const auto end = index > 0 ? str_ref.data + str_ref.size : str_ref.data - 1; + int d = index > 0 ? 1 : -1; - if (pos >= begin) - ++i; - else - return str_ref; - } - return {pos + 1, static_cast(str_ref.data + str_ref.size - pos - 1)}; + for (; index; pos += d) + { + if (pos == end) + return str_ref; + if (*pos == delim) + index -= d; + } + pos -= d; + return {d > 0 ? str_ref.data : pos + 1, static_cast(d > 0 ? pos - str_ref.data : str_ref.data + str_ref.size - pos - 1)} ; } } }; From 95a9270b747322bb376dbfacde8aa58ce0835930 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 29 Jun 2023 10:54:40 +0800 Subject: [PATCH 0981/1997] change as request --- src/Functions/substringIndex.cpp | 70 ++++++++++++++++---------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp index ade8d0ce504..d1791c9696b 100644 --- a/src/Functions/substringIndex.cpp +++ b/src/Functions/substringIndex.cpp @@ -71,7 +71,7 @@ namespace { ColumnPtr column_string = arguments[0].column; ColumnPtr column_delim = arguments[1].column; - ColumnPtr column_index = arguments[2].column; + ColumnPtr column_count = arguments[2].column; const ColumnConst * column_delim_const = checkAndGetColumnConst(column_delim.get()); if (!column_delim_const) @@ -97,7 +97,7 @@ namespace if (column_string_const) { String str = column_string_const->getValue(); - constantVector(str, delim, column_index.get(), vec_res, offsets_res); + constantVector(str, delim, column_count.get(), vec_res, offsets_res); } else { @@ -105,14 +105,14 @@ namespace if (!col_str) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument to {} must be a String", getName()); - bool is_index_const = isColumnConst(*column_index); - if (is_index_const) + bool is_count_const = isColumnConst(*column_count); + if (is_count_const) { - Int64 index = column_index->getInt(0); - vectorConstant(col_str, delim, index, vec_res, offsets_res); + Int64 count = column_count->getInt(0); + vectorConstant(col_str, delim, count, vec_res, offsets_res); } else - vectorVector(col_str, delim, column_index.get(), vec_res, offsets_res); + vectorVector(col_str, delim, column_count.get(), vec_res, offsets_res); } return column_res; } @@ -121,7 +121,7 @@ namespace static void vectorVector( const ColumnString * str_column, const String & delim, - const IColumn * index_column, + const IColumn * count_column, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { @@ -135,13 +135,13 @@ namespace for (size_t i = 0; i < rows; ++i) { StringRef str_ref = str_column->getDataAt(i); - Int64 index = index_column->getInt(i); + Int64 count = count_column->getInt(i); StringRef res_ref; if constexpr (!is_utf8) - res_ref = substringIndex(str_ref, delim[0], index); + res_ref = substringIndex(str_ref, delim[0], count); else - res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, index); + res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, count); appendToResultColumn(res_ref, res_data, res_offsets); } @@ -150,7 +150,7 @@ namespace static void vectorConstant( const ColumnString * str_column, const String & delim, - Int64 index, + Int64 count, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { @@ -167,9 +167,9 @@ namespace StringRef res_ref; if constexpr (!is_utf8) - res_ref = substringIndex(str_ref, delim[0], index); + res_ref = substringIndex(str_ref, delim[0], count); else - res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, index); + res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, count); appendToResultColumn(res_ref, res_data, res_offsets); } @@ -178,11 +178,11 @@ namespace static void constantVector( const String & str, const String & delim, - const IColumn * index_column, + const IColumn * count_column, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { - size_t rows = index_column->size(); + size_t rows = count_column->size(); res_data.reserve(str.size() * rows / 2); res_offsets.reserve(rows); @@ -192,13 +192,13 @@ namespace StringRef str_ref{str.data(), str.size()}; for (size_t i = 0; i < rows; ++i) { - Int64 index = index_column->getInt(i); + Int64 count = count_column->getInt(i); StringRef res_ref; if constexpr (!is_utf8) - res_ref = substringIndex(str_ref, delim[0], index); + res_ref = substringIndex(str_ref, delim[0], count); else - res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, index); + res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, count); appendToResultColumn(res_ref, res_data, res_offsets); } @@ -217,18 +217,18 @@ namespace } static StringRef substringIndexUTF8( - const PositionCaseSensitiveUTF8::SearcherInBigHaystack * searcher, const StringRef & str_ref, const String & delim, Int64 index) + const PositionCaseSensitiveUTF8::SearcherInBigHaystack * searcher, const StringRef & str_ref, const String & delim, Int64 count) { - if (index == 0) + if (count == 0) return {str_ref.data, 0}; const auto * begin = reinterpret_cast(str_ref.data); const auto * end = reinterpret_cast(str_ref.data + str_ref.size); const auto * pos = begin; - if (index > 0) + if (count > 0) { Int64 i = 0; - while (i < index) + while (i < count) { pos = searcher->search(pos, end - pos); @@ -251,13 +251,13 @@ namespace ++total; } - if (total + index < 0) + if (total + count < 0) return str_ref; pos = begin; Int64 i = 0; - Int64 index_from_left = total + 1 + index; - while (i < index_from_left && pos < end && end != (pos = searcher->search(pos, end - pos))) + Int64 count_from_left = total + 1 + count; + while (i < count_from_left && pos < end && end != (pos = searcher->search(pos, end - pos))) { pos += delim.size(); ++i; @@ -266,25 +266,25 @@ namespace } } - static StringRef substringIndex(const StringRef & str_ref, char delim, Int64 index) + static StringRef substringIndex(const StringRef & str_ref, char delim, Int64 count) { - if (index == 0) + if (count == 0) return {str_ref.data, 0}; - const auto pos = index > 0 ? str_ref.data : str_ref.data + str_ref.size - 1; - const auto end = index > 0 ? str_ref.data + str_ref.size : str_ref.data - 1; - int d = index > 0 ? 1 : -1; + const auto * pos = count > 0 ? str_ref.data : str_ref.data + str_ref.size - 1; + const auto * end = count > 0 ? str_ref.data + str_ref.size : str_ref.data - 1; + int d = count > 0 ? 1 : -1; - for (; index; pos += d) + for (; count; pos += d) { if (pos == end) return str_ref; if (*pos == delim) - index -= d; + count -= d; } pos -= d; - return {d > 0 ? str_ref.data : pos + 1, static_cast(d > 0 ? pos - str_ref.data : str_ref.data + str_ref.size - pos - 1)} ; - } + return { + d > 0 ? str_ref.data : pos + 1, static_cast(d > 0 ? pos - str_ref.data : str_ref.data + str_ref.size - pos - 1)}; } }; } From 635ab9f9af3894c42b69ef093cc34e64cefce219 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Thu, 29 Jun 2023 12:53:23 +0800 Subject: [PATCH 0982/1997] move redis_table to right --- docs/en/engines/table-engines/integrations/redis.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/redis.md b/docs/en/engines/table-engines/integrations/redis.md index 2697abcf30e..8086a6503b8 100644 --- a/docs/en/engines/table-engines/integrations/redis.md +++ b/docs/en/engines/table-engines/integrations/redis.md @@ -117,7 +117,7 @@ Join: Join with other tables. ``` -SELECT * FROM redis_table JOIN merge_tree_table ON redis_table.key=merge_tree_table.key; +SELECT * FROM redis_table JOIN merge_tree_table ON merge_tree_table.key=redis_table.key; ``` ## Limitations {#limitations} From 0c697b920fa529fcd10ab2802828c35c44878c46 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 29 Jun 2023 06:58:53 +0200 Subject: [PATCH 0983/1997] Remove the usage of Analyzer setting in the client --- src/Client/Suggest.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index 6e989e10f76..1723f85dc16 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -101,9 +101,8 @@ static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggesti add_column("name", "columns", true, suggestion_limit); } - /// FIXME: Forbid this query using new analyzer because of bug https://github.com/ClickHouse/ClickHouse/issues/50669 - /// We should remove this restriction after resolving this bug. - query = "SELECT DISTINCT arrayJoin(extractAll(name, '[\\\\w_]{2,}')) AS res FROM (" + query + ") WHERE notEmpty(res) SETTINGS allow_experimental_analyzer=0"; + /// FIXME: This query does not work with the new analyzer because of bug https://github.com/ClickHouse/ClickHouse/issues/50669 + query = "SELECT DISTINCT arrayJoin(extractAll(name, '[\\\\w_]{2,}')) AS res FROM (" + query + ") WHERE notEmpty(res)"; return query; } From ffa4f37c9af8feb2acdc4d41c4e22cf30e10986c Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 29 Jun 2023 06:59:42 +0000 Subject: [PATCH 0984/1997] Try to fix style --- src/IO/Progress.cpp | 6 ++++-- src/Server/HTTPHandler.cpp | 6 +++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/IO/Progress.cpp b/src/IO/Progress.cpp index bf42cdf91d6..c652a62574d 100644 --- a/src/IO/Progress.cpp +++ b/src/IO/Progress.cpp @@ -74,7 +74,8 @@ void ProgressValues::writeJSON(WriteBuffer & out) const /// Numbers are written in double quotes (as strings) to avoid loss of precision /// of 64-bit integers after interpretation by JavaScript. - writeCString("{\"read_rows\":\"", out); + //writeCString("{\"read_rows\":\"", out); + writeCString("\"read_rows\":\"", out); writeText(read_rows, out); writeCString("\",\"read_bytes\":\"", out); writeText(read_bytes, out); @@ -88,7 +89,8 @@ void ProgressValues::writeJSON(WriteBuffer & out) const writeText(result_rows, out); writeCString("\",\"result_bytes\":\"", out); writeText(result_bytes, out); - writeCString("\"}", out); + //writeCString("\"}", out); + writeCString("\"", out); } bool Progress::incrementPiecewiseAtomically(const Progress & rhs) diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 562f1e7e93f..c7ec4a848e9 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -816,12 +816,12 @@ void HTTPHandler::processQuery( /// While still no data has been sent, we will report about query execution progress by sending HTTP headers. /// Note that we add it unconditionally so the progress is available for `X-ClickHouse-Summary` - append_callback([&used_output, this](const Progress & progress) { + append_callback([&used_output](const Progress & progress) { used_output.out->onProgress(progress); auto thread_group = CurrentThread::getGroup(); auto peak_memory_usage = thread_group->memory_tracker.getPeak(); - used_output.out->onMemoryUsage(peak_memory_usage); - }); + used_output.out->onMemoryUsage(peak_memory_usage); + }); if (settings.readonly > 0 && settings.cancel_http_readonly_queries_on_client_close) { From 2392dda4126f00ed968f1496e6b61ab8089832f9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 29 Jun 2023 09:38:59 +0200 Subject: [PATCH 0985/1997] Changelog for 23.6 --- CHANGELOG.md | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 72372c8fac4..a2e7b021081 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v23.6, 2023-06-30](#236)**
**[ClickHouse release v23.5, 2023-06-08](#235)**
**[ClickHouse release v23.4, 2023-04-26](#234)**
**[ClickHouse release v23.3 LTS, 2023-03-30](#233)**
@@ -8,6 +9,107 @@ # 2023 Changelog +### ClickHouse release 23.6, 2023-06-29 + +#### Backward Incompatible Change +* Delete feature `do_not_evict_index_and_mark_files` in the fs cache. This feature was only making things worse. [#51253](https://github.com/ClickHouse/ClickHouse/pull/51253) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove ALTER support for experimental LIVE VIEW. [#51287](https://github.com/ClickHouse/ClickHouse/pull/51287) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Decrease the default values for `http_max_field_value_size` and `http_max_field_name_size` to 128 KiB. [#51163](https://github.com/ClickHouse/ClickHouse/pull/51163) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CGroups metrics related to CPU are replaced with one metric, `CGroupMaxCPU` for better usability. The `Normalized` CPU usage metrics will be normalized to CGroups limits instead of the total number of CPUs when they are set. This closes [#50836](https://github.com/ClickHouse/ClickHouse/issues/50836). [#50835](https://github.com/ClickHouse/ClickHouse/pull/50835) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Added `Overlay` database engine to combine multiple databases into one. Added `Filesystem` database engine to represent a directory in the filesystem as a set of implicitly available tables with auto-detected formats and structures. A new `S3` database engine allows to read-only interact with s3 storage by representing a prefix as a set of tables. A new `HDFS` database engine allows to interact with HDFS storage in the same way. [#48821](https://github.com/ClickHouse/ClickHouse/pull/48821) ([alekseygolub](https://github.com/alekseygolub)). +* The function `transform` as well as `CASE` with value matching started to support all data types. This closes [#29730](https://github.com/ClickHouse/ClickHouse/issues/29730). This closes [#32387](https://github.com/ClickHouse/ClickHouse/issues/32387). This closes [#50827](https://github.com/ClickHouse/ClickHouse/issues/50827). This closes [#31336](https://github.com/ClickHouse/ClickHouse/issues/31336). This closes [#40493](https://github.com/ClickHouse/ClickHouse/issues/40493). [#51351](https://github.com/ClickHouse/ClickHouse/pull/51351) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added option `--rename_files_after_processing `. This closes [#34207](https://github.com/ClickHouse/ClickHouse/issues/34207). [#49626](https://github.com/ClickHouse/ClickHouse/pull/49626) ([alekseygolub](https://github.com/alekseygolub)). +* Add support for `APPEND` modifier in `INTO OUTFILE` clause. Suggest using `APPEND` or `TRUNCATE` for `INTO OUTFILE` when file exists. [#50950](https://github.com/ClickHouse/ClickHouse/pull/50950) ([alekar](https://github.com/alekar)). +* Add table engine `Redis` and table function `redis`. It allows querying external Redis servers. [#50150](https://github.com/ClickHouse/ClickHouse/pull/50150) ([JackyWoo](https://github.com/JackyWoo)). +* Allow to skip empty files in file/s3/url/hdfs table functions using settings `s3_skip_empty_files`, `hdfs_skip_empty_files`, `engine_file_skip_empty_files`, `engine_url_skip_empty_files`. [#50364](https://github.com/ClickHouse/ClickHouse/pull/50364) ([Kruglov Pavel](https://github.com/Avogar)). +* Add a new setting named `use_mysql_types_in_show_columns` to alter the `SHOW COLUMNS` SQL statement to display MySQL equivalent types when a client is connected via the MySQL compatibility port. [#49577](https://github.com/ClickHouse/ClickHouse/pull/49577) ([Thomas Panetti](https://github.com/tpanetti)). +* Clickhouse-client can now be called with a connection string instead of "--host", "--port", "--user" etc. [#50689](https://github.com/ClickHouse/ClickHouse/pull/50689) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Add setting `session_timezone`, it is used as default timezone for session when not explicitly specified. [#44149](https://github.com/ClickHouse/ClickHouse/pull/44149) ([Andrey Zvonov](https://github.com/zvonand)). +* Codec DEFLATE_QPL is now controlled via server setting "enable_deflate_qpl_codec" (default: false) instead of setting "allow_experimental_codecs". This marks DEFLATE_QPL non-experimental. [#50775](https://github.com/ClickHouse/ClickHouse/pull/50775) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Performance Improvement +* Improved scheduling of merge selecting and cleanup tasks in `ReplicatedMergeTree`. The tasks will not be executed too frequently when there's nothing to merge or cleanup. Added settings `max_merge_selecting_sleep_ms`, `merge_selecting_sleep_slowdown_factor`, `max_cleanup_delay_period` and `cleanup_thread_preferred_points_per_iteration`. It should close [#31919](https://github.com/ClickHouse/ClickHouse/issues/31919). [#50107](https://github.com/ClickHouse/ClickHouse/pull/50107) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Make filter push down through cross join. [#50605](https://github.com/ClickHouse/ClickHouse/pull/50605) ([Han Fei](https://github.com/hanfei1991)). +* Improve performance with enabled QueryProfiler using thread-local timer_id instead of global object. [#48778](https://github.com/ClickHouse/ClickHouse/pull/48778) ([Jiebin Sun](https://github.com/jiebinn)). +* Rewrite CapnProto input/output format to improve its performance. Map column names and CapnProto fields case insensitive, fix reading/writing of nested structure fields. [#49752](https://github.com/ClickHouse/ClickHouse/pull/49752) ([Kruglov Pavel](https://github.com/Avogar)). +* Optimize parquet write performance for parallel threads. [#50102](https://github.com/ClickHouse/ClickHouse/pull/50102) ([Hongbin Ma](https://github.com/binmahone)). +* Disable `parallelize_output_from_storages` for processing MATERIALIZED VIEWs and storages with one block only. [#50214](https://github.com/ClickHouse/ClickHouse/pull/50214) ([Azat Khuzhin](https://github.com/azat)). +* Merge PR [#46558](https://github.com/ClickHouse/ClickHouse/pull/46558). Avoid block permutation during sort if the block is already sorted. [#50697](https://github.com/ClickHouse/ClickHouse/pull/50697) ([Alexey Milovidov](https://github.com/alexey-milovidov), [Maksim Kita](https://github.com/kitaisreal)). +* Make multiple list requests to ZooKeeper in parallel to speed up reading from system.zookeeper table. [#51042](https://github.com/ClickHouse/ClickHouse/pull/51042) ([Alexander Gololobov](https://github.com/davenger)). +* Speedup initialization of DateTime lookup tables for time zones. This should reduce startup/connect time of clickhouse-client especially in debug build as it is rather heavy. [#51347](https://github.com/ClickHouse/ClickHouse/pull/51347) ([Alexander Gololobov](https://github.com/davenger)). +* Fix data lakes slowness because of synchronous head requests. (Related to Iceberg/Deltalake/Hudi being slow with a lot of files). [#50976](https://github.com/ClickHouse/ClickHouse/pull/50976) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Do not replicate `ALTER PARTITION` queries and mutations through `Replicated` database if it has only one shard and the underlying table is `ReplicatedMergeTree`. [#51049](https://github.com/ClickHouse/ClickHouse/pull/51049) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not read all the columns from right GLOBAL JOIN table. [#50721](https://github.com/ClickHouse/ClickHouse/pull/50721) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Experimental Feature +* Support parallel replicas with the analyzer. [#50441](https://github.com/ClickHouse/ClickHouse/pull/50441) ([Raúl Marín](https://github.com/Algunenano)). +* Add random sleep before large merges/mutations execution to split load more evenly between replicas in case of zero-copy replication. [#51282](https://github.com/ClickHouse/ClickHouse/pull/51282) ([alesapin](https://github.com/alesapin)). + +#### Improvement +* Relax the thresholds for "too many parts" to be more modern. Return the backpressure during long-running insert queries. [#50856](https://github.com/ClickHouse/ClickHouse/pull/50856) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to cast IPv6 to IPv4 address for CIDR ::ffff:0:0/96 (IPv4-mapped addresses). [#49759](https://github.com/ClickHouse/ClickHouse/pull/49759) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Update MongoDB protocol to support MongoDB 5.1 version and newer. Support for the versions with the old protocol (<3.6) is preserved. Closes [#45621](https://github.com/ClickHouse/ClickHouse/issues/45621), [#49879](https://github.com/ClickHouse/ClickHouse/issues/49879). [#50061](https://github.com/ClickHouse/ClickHouse/pull/50061) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add setting `input_format_max_bytes_to_read_for_schema_inference` to limit the number of bytes to read in schema inference. Closes [#50577](https://github.com/ClickHouse/ClickHouse/issues/50577). [#50592](https://github.com/ClickHouse/ClickHouse/pull/50592) ([Kruglov Pavel](https://github.com/Avogar)). +* Respect setting `input_format_null_as_default` in schema inference. [#50602](https://github.com/ClickHouse/ClickHouse/pull/50602) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow to skip trailing empty lines in CSV/TSV/CustomSeparated formats via settings `input_format_csv_skip_trailing_empty_lines`, `input_format_tsv_skip_trailing_empty_lines` and `input_format_custom_skip_trailing_empty_lines` (disabled by default). Closes [#49315](https://github.com/ClickHouse/ClickHouse/issues/49315). [#50635](https://github.com/ClickHouse/ClickHouse/pull/50635) ([Kruglov Pavel](https://github.com/Avogar)). +* Functions "toDateOrDefault|OrNull" and "accuateCast[OrDefault|OrNull]" now correctly parse numeric arguments. [#50709](https://github.com/ClickHouse/ClickHouse/pull/50709) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Support CSV with whitespace or `\t` field delimiters, and these delimiters are supported in Spark. [#50712](https://github.com/ClickHouse/ClickHouse/pull/50712) ([KevinyhZou](https://github.com/KevinyhZou)). +* Settings `number_of_mutations_to_delay` and `number_of_mutations_to_throw` are enabled by default now with values 500 and 1000 respectively. [#50726](https://github.com/ClickHouse/ClickHouse/pull/50726) ([Anton Popov](https://github.com/CurtizJ)). +* The dashboard correctly shows missing values. This closes [#50831](https://github.com/ClickHouse/ClickHouse/issues/50831). [#50832](https://github.com/ClickHouse/ClickHouse/pull/50832) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added the possibility to use date and time arguments in the syslog timestamp format in functions `parseDateTimeBestEffort*` and `parseDateTime64BestEffort*`. [#50925](https://github.com/ClickHouse/ClickHouse/pull/50925) ([Victor Krasnov](https://github.com/sirvickr)). +* Command line parameter "--password" in clickhouse-client can now be specified only once. [#50966](https://github.com/ClickHouse/ClickHouse/pull/50966) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Use `hash_of_all_files` from `system.parts` to check identity of parts during on-cluster backups. [#50997](https://github.com/ClickHouse/ClickHouse/pull/50997) ([Vitaly Baranov](https://github.com/vitlibar)). +* The system table zookeeper_connection connected_time identifies the time when the connection is established (standard format), and session_uptime_elapsed_seconds is added, which labels the duration of the established connection session (in seconds). [#51026](https://github.com/ClickHouse/ClickHouse/pull/51026) ([郭小龙](https://github.com/guoxiaolongzte)). +* Improve the progress bar for file/s3/hdfs/url table functions by using chunk size from source data and using incremental total size counting in each thread. Fix the progress bar for *Cluster functions. This closes [#47250](https://github.com/ClickHouse/ClickHouse/issues/47250). [#51088](https://github.com/ClickHouse/ClickHouse/pull/51088) ([Kruglov Pavel](https://github.com/Avogar)). +* Add total_bytes_to_read to the Progress packet in TCP protocol for better Progress bar. [#51158](https://github.com/ClickHouse/ClickHouse/pull/51158) ([Kruglov Pavel](https://github.com/Avogar)). +* Better checking of data parts on disks with filesystem cache. [#51164](https://github.com/ClickHouse/ClickHouse/pull/51164) ([Anton Popov](https://github.com/CurtizJ)). +* Fix sometimes not correct current_elements_num in fs cache. [#51242](https://github.com/ClickHouse/ClickHouse/pull/51242) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Build/Testing/Packaging Improvement +* Add embedded keeper-client to standalone keeper binary. [#50964](https://github.com/ClickHouse/ClickHouse/pull/50964) ([pufit](https://github.com/pufit)). +* Actual LZ4 version is used now. [#50621](https://github.com/ClickHouse/ClickHouse/pull/50621) ([Nikita Taranov](https://github.com/nickitat)). +* ClickHouse server will print the list of changed settings on fatal errors. This closes [#51137](https://github.com/ClickHouse/ClickHouse/issues/51137). [#51138](https://github.com/ClickHouse/ClickHouse/pull/51138) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow building ClickHouse with clang-17. [#51300](https://github.com/ClickHouse/ClickHouse/pull/51300) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* [SQLancer](https://github.com/sqlancer/sqlancer) check is considered stable as bugs that were triggered by it are fixed. Now failures of SQLancer check will be reported as failed check status. [#51340](https://github.com/ClickHouse/ClickHouse/pull/51340) ([Ilya Yatsishin](https://github.com/qoega)). +* Split huge `RUN` in Dockerfile into smaller conditional. Install the necessary tools on demand in the same `RUN` layer, and remove them after that. Upgrade the OS only once at the beginning. Use a modern way to check the signed repository. Downgrade the base repo to ubuntu:20.04 to address the issues on older docker versions. Upgrade golang version to address golang vulnerabilities. [#51504](https://github.com/ClickHouse/ClickHouse/pull/51504) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Report loading status for executable dictionaries correctly [#48775](https://github.com/ClickHouse/ClickHouse/pull/48775) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Proper mutation of skip indices and projections [#50104](https://github.com/ClickHouse/ClickHouse/pull/50104) ([Amos Bird](https://github.com/amosbird)). +* Cleanup moving parts [#50489](https://github.com/ClickHouse/ClickHouse/pull/50489) ([vdimir](https://github.com/vdimir)). +* Fix backward compatibility for IP types hashing in aggregate functions [#50551](https://github.com/ClickHouse/ClickHouse/pull/50551) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix Log family table return wrong rows count after truncate [#50585](https://github.com/ClickHouse/ClickHouse/pull/50585) ([flynn](https://github.com/ucasfl)). +* Fix bug in `uniqExact` parallel merging [#50590](https://github.com/ClickHouse/ClickHouse/pull/50590) ([Nikita Taranov](https://github.com/nickitat)). +* Revert recent grace hash join changes [#50699](https://github.com/ClickHouse/ClickHouse/pull/50699) ([vdimir](https://github.com/vdimir)). +* Query Cache: Try to fix bad cast from `ColumnConst` to `ColumnVector` [#50704](https://github.com/ClickHouse/ClickHouse/pull/50704) ([Robert Schulze](https://github.com/rschu1ze)). +* Avoid storing logs in Keeper containing unknown operation [#50751](https://github.com/ClickHouse/ClickHouse/pull/50751) ([Antonio Andelic](https://github.com/antonio2368)). +* SummingMergeTree support for DateTime64 [#50797](https://github.com/ClickHouse/ClickHouse/pull/50797) ([Jordi Villar](https://github.com/jrdi)). +* Add compatibility setting for non-const timezones [#50834](https://github.com/ClickHouse/ClickHouse/pull/50834) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix hashing of LDAP params in the cache entries [#50865](https://github.com/ClickHouse/ClickHouse/pull/50865) ([Julian Maicher](https://github.com/jmaicher)). +* Fallback to parsing big integer from String instead of exception in Parquet format [#50873](https://github.com/ClickHouse/ClickHouse/pull/50873) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix checking the lock file too often while writing a backup [#50889](https://github.com/ClickHouse/ClickHouse/pull/50889) ([Vitaly Baranov](https://github.com/vitlibar)). +* Do not apply projection if read-in-order was enabled. [#50923](https://github.com/ClickHouse/ClickHouse/pull/50923) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix race in the Azure blob storage iterator [#50936](https://github.com/ClickHouse/ClickHouse/pull/50936) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix erroneous `sort_description` propagation in `CreatingSets` [#50955](https://github.com/ClickHouse/ClickHouse/pull/50955) ([Nikita Taranov](https://github.com/nickitat)). +* Fix Iceberg v2 optional metadata parsing [#50974](https://github.com/ClickHouse/ClickHouse/pull/50974) ([Kseniia Sumarokova](https://github.com/kssenii)). +* MaterializedMySQL: Keep parentheses for empty table overrides [#50977](https://github.com/ClickHouse/ClickHouse/pull/50977) ([Val Doroshchuk](https://github.com/valbok)). +* Fix crash in BackupCoordinationStageSync::setError() [#51012](https://github.com/ClickHouse/ClickHouse/pull/51012) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)). +* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fix ineffective query cache for SELECTs with subqueries [#51132](https://github.com/ClickHouse/ClickHouse/pull/51132) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix Set index with constant nullable comparison. [#51205](https://github.com/ClickHouse/ClickHouse/pull/51205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix a crash in s3 and s3Cluster functions [#51209](https://github.com/ClickHouse/ClickHouse/pull/51209) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix a crash with compiled expressions [#51231](https://github.com/ClickHouse/ClickHouse/pull/51231) ([LiuNeng](https://github.com/liuneng1994)). +* Fix use-after-free in StorageURL when switching URLs [#51260](https://github.com/ClickHouse/ClickHouse/pull/51260) ([Michael Kolupaev](https://github.com/al13n321)). +* Updated check for parameterized view [#51272](https://github.com/ClickHouse/ClickHouse/pull/51272) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix multiple writing of same file to backup [#51299](https://github.com/ClickHouse/ClickHouse/pull/51299) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix fuzzer failure in ActionsDAG [#51301](https://github.com/ClickHouse/ClickHouse/pull/51301) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove garbage from function `transform` [#51350](https://github.com/ClickHouse/ClickHouse/pull/51350) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + + ### ClickHouse release 23.5, 2023-06-08 #### Upgrade Notes From 7e992955df72f27e59d0da2bd2630b9715a9a8a7 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 29 Jun 2023 08:12:33 +0000 Subject: [PATCH 0986/1997] Next trying slyle fix --- src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp | 10 ++++++++-- src/Server/HTTPHandler.cpp | 7 ++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp index 1e6d520de0a..62c7b6e7346 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp @@ -3,7 +3,7 @@ #include #include #include - +#include namespace DB { @@ -35,10 +35,16 @@ void WriteBufferFromHTTPServerResponse::writeHeaderSummary() return; WriteBufferFromOwnString progress_string_writer; + + writeCString("{", progress_string_writer); accumulated_progress.writeJSON(progress_string_writer); + writeCString(",\"peak_memory_usage\":\"", progress_string_writer); + writeText(peak_memory_usage, progress_string_writer); + writeCString("\"}", progress_string_writer); + if (response_header_ostr) - *response_header_ostr << "X-ClickHouse-Summary: " << progress_string_writer.str() << " Mem " << formatReadableSizeWithBinarySuffix(peak_memory_usage) << "\r\n" << std::flush; + *response_header_ostr << "X-ClickHouse-Summary: " << progress_string_writer.str() << "\r\n" << std::flush; } void WriteBufferFromHTTPServerResponse::writeHeaderProgress() diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index c7ec4a848e9..2c4e3bacbd8 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -816,12 +816,13 @@ void HTTPHandler::processQuery( /// While still no data has been sent, we will report about query execution progress by sending HTTP headers. /// Note that we add it unconditionally so the progress is available for `X-ClickHouse-Summary` - append_callback([&used_output](const Progress & progress) { + append_callback([&used_output](const Progress & progress) + { used_output.out->onProgress(progress); auto thread_group = CurrentThread::getGroup(); auto peak_memory_usage = thread_group->memory_tracker.getPeak(); - used_output.out->onMemoryUsage(peak_memory_usage); - }); + used_output.out->onMemoryUsage(peak_memory_usage); + }); if (settings.readonly > 0 && settings.cancel_http_readonly_queries_on_client_close) { From 6f4d6fd8d6e54b947e7f4634f308354694926715 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 29 Jun 2023 08:29:16 +0000 Subject: [PATCH 0987/1997] Remove whitespace --- src/Server/HTTPHandler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 2c4e3bacbd8..e3ff43ba41a 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -816,7 +816,7 @@ void HTTPHandler::processQuery( /// While still no data has been sent, we will report about query execution progress by sending HTTP headers. /// Note that we add it unconditionally so the progress is available for `X-ClickHouse-Summary` - append_callback([&used_output](const Progress & progress) + append_callback([&used_output](const Progress & progress) { used_output.out->onProgress(progress); auto thread_group = CurrentThread::getGroup(); From 340262814a721ca7be1523ff1194c404b0f03de8 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 29 Jun 2023 12:24:19 +0200 Subject: [PATCH 0988/1997] Avoid additional maven requests for spark tests --- docker/test/integration/runner/Dockerfile | 6 +++++- tests/integration/helpers/cluster.py | 10 ++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 40627354f70..38d8ed5f223 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -105,7 +105,11 @@ RUN curl -fsSL -O https://dlcdn.apache.org/spark/spark-3.3.2/spark-3.3.2-bin-had # download spark and packages # if you change packages, don't forget to update them in tests/integration/helpers/cluster.py -RUN echo ":quit" | /spark-3.3.2-bin-hadoop3/bin/spark-shell --packages "org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0,io.delta:delta-core_2.12:2.3.0,org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0" > /dev/null +RUN packages="org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0,\ +io.delta:delta-core_2.12:2.3.0,\ +org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0" \ + && /spark-3.3.2-bin-hadoop3/bin/spark-shell --packages "$packages" > /dev/null \ + && find /root/.ivy2/ -name '*.jar' -exec ln -sf {} /spark-3.3.2-bin-hadoop3/jars/ \; RUN set -x \ && addgroup --system dockremap \ diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 967eaaa78a5..21398790be3 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -624,10 +624,12 @@ class ClickHouseCluster: # if you change packages, don't forget to update them in docker/test/integration/runner/dockerd-entrypoint.sh ( pyspark.sql.SparkSession.builder.appName("spark_test") - .config( - "spark.jars.packages", - "org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0,io.delta:delta-core_2.12:2.2.0,org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0", - ) + # The jars are now linked to "$SPARK_HOME/jars" and we don't + # need packages to be downloaded once and once again + # .config( + # "spark.jars.packages", + # "org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0,io.delta:delta-core_2.12:2.2.0,org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0", + # ) .master("local") .getOrCreate() .stop() From 919bf5429478261cd9d0329129191323e77263f2 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 29 Jun 2023 12:38:46 +0200 Subject: [PATCH 0989/1997] fix race condition --- src/IO/WriteBufferFromS3.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 8714282f7a8..a72fac138b3 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -223,8 +223,8 @@ String WriteBufferFromS3::getShortLogDetails() const multipart_upload_details = fmt::format(", upload id {}" , multipart_upload_id); - return fmt::format("Details: bucket {}, key {}, total size {}{}", - bucket, key, total_size, multipart_upload_details); + return fmt::format("Details: bucket {}, key {}{}", + bucket, key, multipart_upload_details); } void WriteBufferFromS3::tryToAbortMultipartUpload() From 60ce9773e0310992fd900e2cbc7a0f0f2f858c10 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 29 Jun 2023 10:42:33 +0000 Subject: [PATCH 0990/1997] Added docs for primary_key_size --- docs/en/operations/system-tables/parts.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md index e61c6ed2ba4..861104ff236 100644 --- a/docs/en/operations/system-tables/parts.md +++ b/docs/en/operations/system-tables/parts.md @@ -39,6 +39,8 @@ Columns: - `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included. +- `primary_key_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The amount of memory (in bytes) used by primary key values in the primary.idx/cidx file on disk. + - `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The size of the file with marks. - `secondary_indices_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of compressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included. From 4ba334c98360bb24e7d21eb12453ac5cc765d9d8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 29 Jun 2023 13:01:31 +0200 Subject: [PATCH 0991/1997] Review fix --- src/Disks/getOrCreateDiskFromAST.cpp | 7 ++++++- .../02808_custom_disk_with_user_defined_name.sh | 4 +++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Disks/getOrCreateDiskFromAST.cpp b/src/Disks/getOrCreateDiskFromAST.cpp index c5ec0f5d91b..93d70738a82 100644 --- a/src/Disks/getOrCreateDiskFromAST.cpp +++ b/src/Disks/getOrCreateDiskFromAST.cpp @@ -38,7 +38,12 @@ namespace } else { - disk_name = function.name.substr(std::strlen("disk_")); + static constexpr std::string_view custom_disk_prefix = "disk_"; + + if (disk_name.size() <= custom_disk_prefix.size()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid disk name: {}", disk_name); + + disk_name = function.name.substr(custom_disk_prefix.size()); } auto result_disk = context->getOrCreateDisk(disk_name, [&](const DisksMap & disks_map) -> DiskPtr { diff --git a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh index 99b9a0ed7b3..615da606519 100755 --- a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh +++ b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh @@ -1,3 +1,4 @@ +#!/usr/bin/env bash # Tags: no-fasttest, no-parallel # set -x @@ -14,11 +15,12 @@ ENGINE = MergeTree() ORDER BY tuple() SETTINGS disk = disk_s3disk(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3disk); """ 2>&1 | grep -q "Disk with name \`s3disk\` already exist" && echo 'OK' || echo 'FAIL' +disk_name="${CLICKHOUSE_TEST_UNIQUE_NAME}" + $CLICKHOUSE_CLIENT -nm --query """ SELECT count() FROM system.disks WHERE name = '$disk_name' """ -disk_name="${CLICKHOUSE_TEST_UNIQUE_NAME}" $CLICKHOUSE_CLIENT -nm --query """ DROP TABLE IF EXISTS test; CREATE TABLE test (a Int32, b String) From 306ca66eb4d3e9ae8ee74004a09ab1d530f46658 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 29 Jun 2023 13:11:22 +0200 Subject: [PATCH 0992/1997] Fix --- src/Interpreters/Cache/FileCache.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index eb5b59a447d..e165c0914b6 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -757,12 +757,14 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) chassert(candidate->releasable()); const auto * segment = candidate->file_segment.get(); + auto queue_it = segment->getQueueIterator(); + chassert(queue_it); ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedFileSegments); ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedBytes, segment->range().size()); locked_key->removeFileSegment(segment->offset(), segment->lock()); - segment->getQueueIterator()->remove(cache_lock); + queue_it->remove(cache_lock); if (query_context) query_context->remove(current_key, segment->offset(), cache_lock); From ff25b72e6da94acb2af9214150131b0c4a8aad88 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 29 Jun 2023 13:30:53 +0200 Subject: [PATCH 0993/1997] Update 02808_custom_disk_with_user_defined_name.sh --- .../0_stateless/02808_custom_disk_with_user_defined_name.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh index 615da606519..50dee04f6a6 100755 --- a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh +++ b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest # set -x From f73d8786329237ad24f06505dcde23485a83a534 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 29 Jun 2023 13:31:41 +0200 Subject: [PATCH 0994/1997] Update src/Disks/getOrCreateDiskFromAST.cpp Co-authored-by: Antonio Andelic --- src/Disks/getOrCreateDiskFromAST.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/getOrCreateDiskFromAST.cpp b/src/Disks/getOrCreateDiskFromAST.cpp index 93d70738a82..691a51d8b48 100644 --- a/src/Disks/getOrCreateDiskFromAST.cpp +++ b/src/Disks/getOrCreateDiskFromAST.cpp @@ -40,7 +40,7 @@ namespace { static constexpr std::string_view custom_disk_prefix = "disk_"; - if (disk_name.size() <= custom_disk_prefix.size()) + if (disk_name.size() <= custom_disk_prefix.size() || !disk_name.starts_with(custom_disk_prefix)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid disk name: {}", disk_name); disk_name = function.name.substr(custom_disk_prefix.size()); From 1d9d712984df7d63bbcaf232b73dde562619ba55 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 29 Jun 2023 13:32:17 +0200 Subject: [PATCH 0995/1997] use timeout instead trap in 01443_merge_truncate_long.sh --- .../0_stateless/01443_merge_truncate_long.sh | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/01443_merge_truncate_long.sh b/tests/queries/0_stateless/01443_merge_truncate_long.sh index ce867d32c1a..65b9bcd366e 100755 --- a/tests/queries/0_stateless/01443_merge_truncate_long.sh +++ b/tests/queries/0_stateless/01443_merge_truncate_long.sh @@ -13,20 +13,20 @@ ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS t" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE t (x Int8) ENGINE = MergeTree ORDER BY ()" -function thread() +function thread_optimize() { - trap 'BREAK=1' 2 - - while [[ -z "${BREAK}" ]] + while true; do ${CLICKHOUSE_CLIENT} --query="OPTIMIZE TABLE t FINAL;" 2>&1 | tr -d '\n' | rg -v 'Cancelled merging parts' ||: done } -thread & -pid=$! +TIMEOUT=15 +export -f thread_optimize +timeout $TIMEOUT bash -c thread_optimize 2> /dev/null & -for i in {1..100}; do +for i in {1..100}; +do echo " INSERT INTO t VALUES (0); INSERT INTO t VALUES (0); @@ -36,7 +36,6 @@ for i in {1..100}; do " done | ${CLICKHOUSE_CLIENT} --multiquery -kill -2 "$pid" wait $CLICKHOUSE_CLIENT -q "DROP TABLE t" From 351f2757f06d995ba8b27ac7c9cd6ffbb64e4aee Mon Sep 17 00:00:00 2001 From: taojiatao <245915794@qq.com> Date: Thu, 29 Jun 2023 19:34:04 +0800 Subject: [PATCH 0996/1997] translation correction, about assumeNotNull with null value --- docs/zh/sql-reference/functions/functions-for-nulls.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/sql-reference/functions/functions-for-nulls.md b/docs/zh/sql-reference/functions/functions-for-nulls.md index 4dd30970923..b3dca3ac549 100644 --- a/docs/zh/sql-reference/functions/functions-for-nulls.md +++ b/docs/zh/sql-reference/functions/functions-for-nulls.md @@ -192,7 +192,7 @@ SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook **返回值** - 如果`x`不为`NULL`,返回非`Nullable`类型的原始值。 -- 如果`x`为`NULL`,返回对应非`Nullable`类型的默认值。 +- 如果`x`为`NULL`,则返回任意值。 **示例** From f88c1fdf44e4f1a6da7bf0db743416f4c020718f Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 29 Jun 2023 11:59:09 +0000 Subject: [PATCH 0997/1997] Fixes --- src/Storages/HDFS/HDFSCommon.h | 18 ++++++++++++++++-- src/Storages/StorageAzureBlob.cpp | 2 +- src/Storages/StorageAzureBlob.h | 12 +++--------- src/Storages/StorageFile.cpp | 6 ++++-- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/Storages/HDFS/HDFSCommon.h b/src/Storages/HDFS/HDFSCommon.h index 5eb687695f2..23f9e4d8f12 100644 --- a/src/Storages/HDFS/HDFSCommon.h +++ b/src/Storages/HDFS/HDFSCommon.h @@ -58,8 +58,22 @@ public: HDFSBuilderWrapper(const HDFSBuilderWrapper &) = delete; HDFSBuilderWrapper & operator=(const HDFSBuilderWrapper &) = delete; - HDFSBuilderWrapper(HDFSBuilderWrapper &&) = default; - HDFSBuilderWrapper & operator=(HDFSBuilderWrapper &&) = default; + + HDFSBuilderWrapper(HDFSBuilderWrapper && other) noexcept + { + *this = std::move(other); + } + + HDFSBuilderWrapper & operator=(HDFSBuilderWrapper && other) noexcept + { + std::swap(hdfs_builder, other.hdfs_builder); + config_stor = std::move(other.config_stor); + hadoop_kerberos_keytab = std::move(other.hadoop_kerberos_keytab); + hadoop_kerberos_principal = std::move(other.hadoop_kerberos_principal); + hadoop_security_kerberos_ticket_cache_path = std::move(other.hadoop_security_kerberos_ticket_cache_path); + need_kinit = std::move(other.need_kinit); + return *this; + } hdfsBuilder * get() { return hdfs_builder; } diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 95ce8ce3552..365de2611ce 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -940,7 +940,7 @@ void StorageAzureBlobSource::GlobIterator::createFilterAST(const String & any_ke StorageAzureBlobSource::KeysIterator::KeysIterator( AzureObjectStorage * object_storage_, const std::string & container_, - Strings keys_, + const Strings & keys_, ASTPtr query_, const Block & virtual_header_, ContextPtr context_, diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index fdd4bd1afb7..ad87da1f61a 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -148,7 +148,6 @@ public: IIterator(ContextPtr context_):WithContext(context_) {} virtual ~IIterator() = default; virtual RelativePathWithMetadata next() = 0; - virtual size_t getTotalSize() const = 0; RelativePathWithMetadata operator ()() { return next(); } }; @@ -167,7 +166,6 @@ public: std::function file_progress_callback_ = {}); RelativePathWithMetadata next() override; - size_t getTotalSize() const override; ~GlobIterator() override = default; private: @@ -179,7 +177,6 @@ public: Block virtual_header; size_t index = 0; - std::atomic total_size = 0; RelativePathsWithMetadata blobs_with_metadata; RelativePathsWithMetadata * outer_blobs; @@ -202,14 +199,14 @@ public: KeysIterator( AzureObjectStorage * object_storage_, const std::string & container_, - Strings keys_, + const Strings & keys_, ASTPtr query_, const Block & virtual_header_, ContextPtr context_, - RelativePathsWithMetadata * outer_blobs_); + RelativePathsWithMetadata * outer_blobs, + std::function file_progress_callback = {}); RelativePathWithMetadata next() override; - size_t getTotalSize() const override; ~KeysIterator() override = default; private: @@ -222,9 +219,6 @@ public: Block virtual_header; std::atomic index = 0; - std::atomic total_size = 0; - - RelativePathsWithMetadata * outer_blobs; }; StorageAzureBlobSource( diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index e9faa5e112a..d87aac50deb 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -739,8 +739,10 @@ public: if (reader->pull(chunk)) { UInt64 num_rows = chunk.getNumRows(); - size_t chunk_size = input_format->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size); + size_t chunk_size = 0; + if (storage->format_name != "Distributed") + chunk_size = input_format->getApproxBytesReadForChunk(); + progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); /// Enrich with virtual columns. if (files_info->need_path_column) From bdfaffb6e241d8507c3b127f0288d09125ae0fb0 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 29 Jun 2023 12:08:08 +0000 Subject: [PATCH 0998/1997] Add/fix tests --- src/IO/Progress.cpp | 12 +++++++----- src/IO/Progress.h | 4 ++-- .../HTTP/WriteBufferFromHTTPServerResponse.cpp | 3 +-- .../00416_pocopatch_progress_in_http_headers.sh | 2 +- tests/queries/0_stateless/01921_test_progress_bar.py | 7 ++++--- tests/queries/0_stateless/02136_scalar_progress.sh | 2 +- .../0_stateless/02373_progress_contain_result.sh | 2 +- .../0_stateless/02423_insert_summary_behaviour.sh | 12 ++++++------ .../0_stateless/02457_insert_select_progress_http.sh | 2 +- 9 files changed, 24 insertions(+), 22 deletions(-) diff --git a/src/IO/Progress.cpp b/src/IO/Progress.cpp index c652a62574d..6a78a453d8a 100644 --- a/src/IO/Progress.cpp +++ b/src/IO/Progress.cpp @@ -69,12 +69,13 @@ void ProgressValues::write(WriteBuffer & out, UInt64 client_revision) const } } -void ProgressValues::writeJSON(WriteBuffer & out) const +void ProgressValues::writeJSON(WriteBuffer & out, bool add_braces) const { /// Numbers are written in double quotes (as strings) to avoid loss of precision /// of 64-bit integers after interpretation by JavaScript. - //writeCString("{\"read_rows\":\"", out); + if (add_braces) + writeCString("{", out); writeCString("\"read_rows\":\"", out); writeText(read_rows, out); writeCString("\",\"read_bytes\":\"", out); @@ -89,8 +90,9 @@ void ProgressValues::writeJSON(WriteBuffer & out) const writeText(result_rows, out); writeCString("\",\"result_bytes\":\"", out); writeText(result_bytes, out); - //writeCString("\"}", out); writeCString("\"", out); + if (add_braces) + writeCString("}", out); } bool Progress::incrementPiecewiseAtomically(const Progress & rhs) @@ -232,9 +234,9 @@ void Progress::write(WriteBuffer & out, UInt64 client_revision) const getValues().write(out, client_revision); } -void Progress::writeJSON(WriteBuffer & out) const +void Progress::writeJSON(WriteBuffer & out, bool add_braces) const { - getValues().writeJSON(out); + getValues().writeJSON(out, add_braces); } } diff --git a/src/IO/Progress.h b/src/IO/Progress.h index c21b1b854b0..7cf67a0a432 100644 --- a/src/IO/Progress.h +++ b/src/IO/Progress.h @@ -32,7 +32,7 @@ struct ProgressValues void read(ReadBuffer & in, UInt64 server_revision); void write(WriteBuffer & out, UInt64 client_revision) const; - void writeJSON(WriteBuffer & out) const; + void writeJSON(WriteBuffer & out, bool add_braces = true) const; }; struct ReadProgress @@ -118,7 +118,7 @@ struct Progress void write(WriteBuffer & out, UInt64 client_revision) const; /// Progress in JSON format (single line, without whitespaces) is used in HTTP headers. - void writeJSON(WriteBuffer & out) const; + void writeJSON(WriteBuffer & out, bool add_braces = true) const; /// Each value separately is changed atomically (but not whole object). bool incrementPiecewiseAtomically(const Progress & rhs); diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp index 62c7b6e7346..544442c4c05 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp @@ -37,11 +37,10 @@ void WriteBufferFromHTTPServerResponse::writeHeaderSummary() WriteBufferFromOwnString progress_string_writer; writeCString("{", progress_string_writer); - accumulated_progress.writeJSON(progress_string_writer); + accumulated_progress.writeJSON(progress_string_writer, false); writeCString(",\"peak_memory_usage\":\"", progress_string_writer); writeText(peak_memory_usage, progress_string_writer); writeCString("\"}", progress_string_writer); - if (response_header_ostr) *response_header_ostr << "X-ClickHouse-Summary: " << progress_string_writer.str() << "\r\n" << std::flush; diff --git a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh index b2189ab0cc2..d830debdf55 100755 --- a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh +++ b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh @@ -26,7 +26,7 @@ ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'DROP ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'CREATE TABLE insert_number_query (record UInt32) Engine = Memory' > /dev/null 2>&1 ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'CREATE TABLE insert_number_query_2 (record UInt32) Engine = Memory' > /dev/null 2>&1 -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&http_headers_progress_interval_ms=0&send_progress_in_http_headers=1" -d 'INSERT INTO insert_number_query (record) SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Summary|^[0-9]' +${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&http_headers_progress_interval_ms=0&send_progress_in_http_headers=1" -d 'INSERT INTO insert_number_query (record) SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Summary|^[0-9]' | sed 's/,\"peak_mem[^}]*//' ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'DROP TABLE insert_number_query' > /dev/null 2>&1 ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'DROP TABLE insert_number_query_2' > /dev/null 2>&1 diff --git a/tests/queries/0_stateless/01921_test_progress_bar.py b/tests/queries/0_stateless/01921_test_progress_bar.py index 3b0b429d396..9ce2168e2ae 100755 --- a/tests/queries/0_stateless/01921_test_progress_bar.py +++ b/tests/queries/0_stateless/01921_test_progress_bar.py @@ -14,6 +14,7 @@ log = None with client(name="client1>", log=log) as client1: client1.expect(prompt) - client1.send("SELECT number FROM numbers(100) FORMAT Null") - client1.expect("Progress: 100\.00 rows, 800\.00 B.*" + end_of_block) - client1.expect("0 rows in set. Elapsed: [\\w]{1}\.[\\w]{3} sec." + end_of_block) + client1.send("SELECT number FROM numbers(1000) FORMAT Null") + client1.expect("Progress: 1\.00 thousand rows, 8\.00 KB .*" + end_of_block) + client1.expect("0 rows in set. Elapsed: [\\w]{1}\.[\\w]{3} sec.") + client1.expect("Peak memory usage \(for query\) .*B" + end_of_block) diff --git a/tests/queries/0_stateless/02136_scalar_progress.sh b/tests/queries/0_stateless/02136_scalar_progress.sh index 9f4429b0caa..517fe19ecd4 100755 --- a/tests/queries/0_stateless/02136_scalar_progress.sh +++ b/tests/queries/0_stateless/02136_scalar_progress.sh @@ -4,4 +4,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CURL -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d "SELECT (SELECT max(number), count(number) FROM numbers(100000) settings max_block_size=65505);" -v 2>&1 | grep -E "X-ClickHouse-Summary|X-ClickHouse-Progress" +$CLICKHOUSE_CURL -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d "SELECT (SELECT max(number), count(number) FROM numbers(100000) settings max_block_size=65505);" -v 2>&1 | grep -E "X-ClickHouse-Summary|X-ClickHouse-Progress" | sed 's/,\"peak_mem[^}]*//' diff --git a/tests/queries/0_stateless/02373_progress_contain_result.sh b/tests/queries/0_stateless/02373_progress_contain_result.sh index 1b257b699f5..c84af0ee269 100755 --- a/tests/queries/0_stateless/02373_progress_contain_result.sh +++ b/tests/queries/0_stateless/02373_progress_contain_result.sh @@ -6,4 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) echo 'SELECT 1 FROM numbers(100)' | ${CLICKHOUSE_CURL_COMMAND} -v "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=0" --data-binary @- 2>&1 | - grep 'X-ClickHouse-Summary' + grep 'X-ClickHouse-Summary' | sed 's/,\"peak_mem[^}]*//' diff --git a/tests/queries/0_stateless/02423_insert_summary_behaviour.sh b/tests/queries/0_stateless/02423_insert_summary_behaviour.sh index 6c75efa5150..a0b3c519806 100755 --- a/tests/queries/0_stateless/02423_insert_summary_behaviour.sh +++ b/tests/queries/0_stateless/02423_insert_summary_behaviour.sh @@ -11,11 +11,11 @@ $CLICKHOUSE_CLIENT -q "CREATE MATERIALIZED VIEW floats_to_target TO target_1 AS $CLICKHOUSE_CLIENT -q "CREATE MATERIALIZED VIEW floats_to_target_2 TO target_2 AS SELECT * FROM floats, numbers(2) n" echo "No materialized views" -${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+target_1" -d "VALUES(1.0)" -v 2>&1 | grep 'X-ClickHouse-Summary' -$CLICKHOUSE_LOCAL -q "SELECT number::Float64 AS v FROM numbers(10)" --format Native | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+target_1+FORMAT+Native" --data-binary @- -v 2>&1 | grep 'X-ClickHouse-Summary' -$CLICKHOUSE_LOCAL -q "SELECT number::Float64 AS v FROM numbers(10)" --format RowBinary | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+target_1+FORMAT+RowBinary" --data-binary @- -v 2>&1 | grep 'X-ClickHouse-Summary' +${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+target_1" -d "VALUES(1.0)" -v 2>&1 | grep 'X-ClickHouse-Summary' | sed 's/,\"peak_mem[^}]*//' +$CLICKHOUSE_LOCAL -q "SELECT number::Float64 AS v FROM numbers(10)" --format Native | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+target_1+FORMAT+Native" --data-binary @- -v 2>&1 | grep 'X-ClickHouse-Summary' | sed 's/,\"peak_mem[^}]*//' +$CLICKHOUSE_LOCAL -q "SELECT number::Float64 AS v FROM numbers(10)" --format RowBinary | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+target_1+FORMAT+RowBinary" --data-binary @- -v 2>&1 | grep 'X-ClickHouse-Summary' | sed 's/,\"peak_mem[^}]*//' echo "With materialized views" -${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+floats" -d "VALUES(1.0)" -v 2>&1 | grep 'X-ClickHouse-Summary' -$CLICKHOUSE_LOCAL -q "SELECT number::Float64 AS v FROM numbers(10)" --format Native | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+floats+FORMAT+Native" --data-binary @- -v 2>&1 | grep 'X-ClickHouse-Summary' -$CLICKHOUSE_LOCAL -q "SELECT number::Float64 AS v FROM numbers(10)" --format RowBinary | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+floats+FORMAT+RowBinary" --data-binary @- -v 2>&1 | grep 'X-ClickHouse-Summary' +${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+floats" -d "VALUES(1.0)" -v 2>&1 | grep 'X-ClickHouse-Summary' | sed 's/,\"peak_mem[^}]*//' +$CLICKHOUSE_LOCAL -q "SELECT number::Float64 AS v FROM numbers(10)" --format Native | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+floats+FORMAT+Native" --data-binary @- -v 2>&1 | grep 'X-ClickHouse-Summary' | sed 's/,\"peak_mem[^}]*//' +$CLICKHOUSE_LOCAL -q "SELECT number::Float64 AS v FROM numbers(10)" --format RowBinary | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query=INSERT+INTO+floats+FORMAT+RowBinary" --data-binary @- -v 2>&1 | grep 'X-ClickHouse-Summary' | sed 's/,\"peak_mem[^}]*//' diff --git a/tests/queries/0_stateless/02457_insert_select_progress_http.sh b/tests/queries/0_stateless/02457_insert_select_progress_http.sh index 656ab3dc403..7f7fc67ae00 100755 --- a/tests/queries/0_stateless/02457_insert_select_progress_http.sh +++ b/tests/queries/0_stateless/02457_insert_select_progress_http.sh @@ -5,5 +5,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d @- <<< "insert into function null('_ Int') select * from numbers(5) settings max_block_size=1" -v |& { - grep -F -e X-ClickHouse-Progress: -e X-ClickHouse-Summary: + grep -F -e X-ClickHouse-Progress: -e X-ClickHouse-Summary: | sed 's/,\"peak_mem[^}]*//' } From b56d1602e841663f073c6f53a06e848b89c9214e Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 29 Jun 2023 14:25:13 +0200 Subject: [PATCH 0999/1997] Invalidate opened file cache when removing file --- src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp | 2 +- src/IO/OpenedFileCache.h | 9 +++++++-- src/Interpreters/Cache/FileSegment.h | 2 ++ src/Interpreters/Cache/Metadata.cpp | 5 +++++ 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 76d54f9d27c..e4dadb4eb21 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -160,7 +160,7 @@ CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segm if (use_external_buffer) local_read_settings.local_fs_buffer_size = 0; - auto buf = createReadBufferFromFileBase(path, local_read_settings); + auto buf = createReadBufferFromFileBase(path, local_read_settings, std::nullopt, std::nullopt, file_segment.getFlagsForLocalRead()); if (getFileSizeFromReadBuffer(*buf) == 0) throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read from an empty cache file: {}", path); diff --git a/src/IO/OpenedFileCache.h b/src/IO/OpenedFileCache.h index 844e5b31d11..61e502a494b 100644 --- a/src/IO/OpenedFileCache.h +++ b/src/IO/OpenedFileCache.h @@ -72,6 +72,13 @@ public: return res; } + void remove(const std::string & path, int flags) + { + Key key(path, flags); + std::lock_guard lock(mutex); + files.erase(key); + } + static OpenedFileCache & instance() { static OpenedFileCache res; @@ -82,5 +89,3 @@ public: using OpenedFileCachePtr = std::shared_ptr; } - - diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index 681c0d719e4..b8fa9486472 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -161,6 +161,8 @@ public: String getPathInLocalCache() const; + int getFlagsForLocalRead() const { return O_RDONLY | O_CLOEXEC; } + /** * ========== Methods for _any_ file segment's owner ======================== */ diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index f799bae1e10..f201455384b 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -406,6 +406,11 @@ KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegm if (exists) { fs::remove(path); + + int flags = file_segment->getFlagsForLocalRead(); + OpenedFileCache::instance().remove(path, flags); + OpenedFileCache::instance().remove(path, flags | O_DIRECT); + LOG_TEST(key_metadata->log, "Removed file segment at path: {}", path); } else if (file_segment->downloaded_size) From 795e34fcf645d9ac200933ae3be78406972f28e5 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 29 Jun 2023 12:36:50 +0000 Subject: [PATCH 1000/1997] Fix test --- tests/integration/test_storage_s3/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 6c251d2f84e..619157c8369 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1361,16 +1361,16 @@ def test_select_columns(started_cluster): instance.query("SYSTEM FLUSH LOGS") result1 = instance.query( - f"SELECT read_bytes FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT value2 FROM {name}'" + f"SELECT ProfileEvents['ReadBufferFromS3Bytes'] FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT value2 FROM {name}'" ) instance.query(f"SELECT * FROM {name}") instance.query("SYSTEM FLUSH LOGS") result2 = instance.query( - f"SELECT read_bytes FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT * FROM {name}'" + f"SELECT ProfileEvents['ReadBufferFromS3Bytes'] FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT * FROM {name}'" ) - assert int(result1) * 3 <= int(result2) + assert round(int(result2) / int(result1)) == 3 def test_insert_select_schema_inference(started_cluster): From f1d695463717703d9c9f076b0e18972425b6bf46 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 29 Jun 2023 13:09:55 +0000 Subject: [PATCH 1001/1997] Fix logical error in ANTI join with NULL --- src/Interpreters/HashJoin.cpp | 3 +++ .../02771_semi_join_use_nulls.reference | 16 ++++++++++++++++ .../0_stateless/02771_semi_join_use_nulls.sql.j2 | 6 ++++++ 3 files changed, 25 insertions(+) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 6fe2b8464f5..7fee2ab7a6f 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1383,6 +1383,9 @@ NO_INLINE IColumn::Filter joinRightColumns( { if (!right_row_found && null_element_found) { + if constexpr (join_features.is_anti_join && join_features.left) + setUsed(filter, i); + addNotFoundRow(added_columns, current_offset); if constexpr (join_features.need_replication) diff --git a/tests/queries/0_stateless/02771_semi_join_use_nulls.reference b/tests/queries/0_stateless/02771_semi_join_use_nulls.reference index 8d4b1a3a75e..91c0d964968 100644 --- a/tests/queries/0_stateless/02771_semi_join_use_nulls.reference +++ b/tests/queries/0_stateless/02771_semi_join_use_nulls.reference @@ -11,7 +11,9 @@ 0 0 0 0 0 1 +\N 0 0 1 +\N 0 0 0 0 0 0 \N @@ -25,7 +27,9 @@ 0 0 0 0 0 2 +\N 1 0 2 +\N 1 0 \N 0 0 0 \N @@ -39,7 +43,9 @@ 0 \N 0 \N 0 1 +\N \N 0 1 +\N \N 0 0 0 0 0 0 @@ -53,7 +59,9 @@ 0 0 0 0 \N 2 +\N 1 \N 2 +\N 1 0 0 0 0 0 0 @@ -67,7 +75,9 @@ 0 0 0 0 0 1 +\N 0 0 1 +\N 0 0 0 0 0 0 0 @@ -81,7 +91,9 @@ 0 0 0 0 1 2 +\N 1 1 2 +\N 1 0 \N 0 0 0 0 @@ -95,7 +107,9 @@ 0 \N 0 \N 0 1 +\N \N 0 1 +\N \N 0 0 0 0 0 0 @@ -109,4 +123,6 @@ 0 0 0 0 1 2 +\N 1 1 2 +\N 1 diff --git a/tests/queries/0_stateless/02771_semi_join_use_nulls.sql.j2 b/tests/queries/0_stateless/02771_semi_join_use_nulls.sql.j2 index 37b2e63761b..248461a98bb 100644 --- a/tests/queries/0_stateless/02771_semi_join_use_nulls.sql.j2 +++ b/tests/queries/0_stateless/02771_semi_join_use_nulls.sql.j2 @@ -44,6 +44,12 @@ SELECT id > 1, d.idd FROM (SELECT {{ maybe_materialize }}(toLowCardinality(0)) A ON a.id = d.idd ; +SELECT * +FROM (SELECT {{ maybe_materialize }}(NULL :: Nullable(UInt64)) AS id) AS a +{{ strictness }} {{ kind }} JOIN (SELECT {{ maybe_materialize }}(1 :: UInt32) AS id) AS d +ON a.id = d.id +; + {% endfor -%} {% endfor -%} {% endfor -%} From bac126fe3edbaec15834ba21ae57b3aec0f3d299 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 29 Jun 2023 15:11:02 +0200 Subject: [PATCH 1002/1997] fix --- src/Storages/StorageReplicatedMergeTree.cpp | 11 ++++------- tests/integration/test_merge_tree_hdfs/test.py | 1 - tests/integration/test_merge_tree_s3/test.py | 1 - 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 0361d46612b..a967bf3c4c4 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7301,7 +7301,6 @@ void StorageReplicatedMergeTree::replacePartitionFrom( UInt64 index = lock->getNumber(); MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); - HardlinkedFiles hardlinked_files; bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication || dynamic_cast(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; @@ -7377,8 +7376,8 @@ void StorageReplicatedMergeTree::replacePartitionFrom( renameTempPartAndReplaceUnlocked(part, transaction, data_parts_lock); } - for (size_t i = 0; i < dst_parts.size(); ++i) - lockSharedData(*dst_parts[i], false, /*hardlinked_files*/ {}); + for (const auto & dst_part : dst_parts) + lockSharedData(*dst_part, false, /*hardlinked_files*/ {}); Coordination::Error code = zookeeper->tryMulti(ops, op_results); if (code == Coordination::Error::ZOK) @@ -7541,8 +7540,6 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta UInt64 index = lock->getNumber(); MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); - HardlinkedFiles hardlinked_files; - bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication || dynamic_cast(dest_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; IDataPartStorage::ClonePartParams clone_params @@ -7625,8 +7622,8 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta for (auto & part : dst_parts) dest_table_storage->renameTempPartAndReplaceUnlocked(part, transaction, dest_data_parts_lock); - for (size_t i = 0; i < dst_parts.size(); ++i) - dest_table_storage->lockSharedData(*dst_parts[i], false, /*hardlinked_files*/ {}); + for (const auto & dst_part : dst_parts) + dest_table_storage->lockSharedData(*dst_part, false, /*hardlinked_files*/ {}); Coordination::Error code = zookeeper->tryMulti(ops, op_results); if (code == Coordination::Error::ZBADVERSION) diff --git a/tests/integration/test_merge_tree_hdfs/test.py b/tests/integration/test_merge_tree_hdfs/test.py index d1a145c00c1..ae3e2bde836 100644 --- a/tests/integration/test_merge_tree_hdfs/test.py +++ b/tests/integration/test_merge_tree_hdfs/test.py @@ -239,7 +239,6 @@ def test_attach_detach_partition(cluster): len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 - - FILES_OVERHEAD_METADATA_VERSION ) node.query("ALTER TABLE hdfs_test DROP PARTITION '2020-01-03'") diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 303fff66166..d0be8a11a06 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -338,7 +338,6 @@ def test_attach_detach_partition(cluster, node_name): len(list_objects(cluster, "data/")) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 - - FILES_OVERHEAD_METADATA_VERSION ) node.query("ALTER TABLE s3_test DROP PARTITION '2020-01-03'") From c43acc6f909d22dab3c3282fabb46c3c6d877080 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 29 Jun 2023 13:19:04 +0000 Subject: [PATCH 1003/1997] better fix --- src/Interpreters/HashJoin.cpp | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 7fee2ab7a6f..3e4f2902359 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1284,7 +1284,6 @@ NO_INLINE IColumn::Filter joinRightColumns( for (size_t i = 0; i < rows; ++i) { bool right_row_found = false; - bool null_element_found = false; KnownRowsHolder known_rows; for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx) @@ -1293,10 +1292,7 @@ NO_INLINE IColumn::Filter joinRightColumns( if constexpr (has_null_map) { if (join_keys.null_map && (*join_keys.null_map)[i]) - { - null_element_found = true; continue; - } } bool row_acceptable = !join_keys.isRowFiltered(i); @@ -1379,23 +1375,6 @@ NO_INLINE IColumn::Filter joinRightColumns( } } - if constexpr (has_null_map) - { - if (!right_row_found && null_element_found) - { - if constexpr (join_features.is_anti_join && join_features.left) - setUsed(filter, i); - - addNotFoundRow(added_columns, current_offset); - - if constexpr (join_features.need_replication) - { - (*added_columns.offsets_to_replicate)[i] = current_offset; - } - continue; - } - } - if (!right_row_found) { if constexpr (join_features.is_anti_join && join_features.left) From 54838e92d48d9f0f177a48c85f633a9a83a13097 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 29 Jun 2023 13:20:38 +0000 Subject: [PATCH 1004/1997] Remove whitespace --- src/IO/Progress.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/Progress.cpp b/src/IO/Progress.cpp index 6a78a453d8a..620d2f0f762 100644 --- a/src/IO/Progress.cpp +++ b/src/IO/Progress.cpp @@ -69,7 +69,7 @@ void ProgressValues::write(WriteBuffer & out, UInt64 client_revision) const } } -void ProgressValues::writeJSON(WriteBuffer & out, bool add_braces) const +void ProgressValues::writeJSON(WriteBuffer & out, bool add_braces) const { /// Numbers are written in double quotes (as strings) to avoid loss of precision /// of 64-bit integers after interpretation by JavaScript. From 4581526af76848ee7370d685e96f9cc3c464df6c Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 29 Jun 2023 13:24:55 +0000 Subject: [PATCH 1005/1997] Remove has_null_map template parameter from hash join --- src/Interpreters/HashJoin.cpp | 50 +++++++++-------------------------- 1 file changed, 12 insertions(+), 38 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 3e4f2902359..967e58f6d40 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -581,7 +581,7 @@ namespace }; - template + template size_t NO_INLINE insertFromBlockImplTypeCase( HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted) @@ -600,7 +600,7 @@ namespace for (size_t i = 0; i < rows; ++i) { - if (has_null_map && (*null_map)[i]) + if (null_map && (*null_map)[i]) { /// nulls are not inserted into hash table, /// keep them for RIGHT and FULL joins @@ -622,21 +622,6 @@ namespace return map.getBufferSizeInCells(); } - - template - size_t insertFromBlockImplType( - HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, - const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted) - { - if (null_map) - return insertFromBlockImplTypeCase( - join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); - else - return insertFromBlockImplTypeCase( - join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); - } - - template size_t insertFromBlockImpl( HashJoin & join, HashJoin::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns, @@ -653,7 +638,7 @@ namespace #define M(TYPE) \ case HashJoin::Type::TYPE: \ - return insertFromBlockImplType>::Type>(\ + return insertFromBlockImplTypeCase>::Type>(\ join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); \ break; @@ -1260,7 +1245,7 @@ void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unuse /// Joins right table columns which indexes are present in right_indexes using specified map. /// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). -template +template NO_INLINE IColumn::Filter joinRightColumns( std::vector && key_getter_vector, const std::vector & mapv, @@ -1289,11 +1274,8 @@ NO_INLINE IColumn::Filter joinRightColumns( for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx) { const auto & join_keys = added_columns.join_on_keys[onexpr_idx]; - if constexpr (has_null_map) - { - if (join_keys.null_map && (*join_keys.null_map)[i]) - continue; - } + if (join_keys.null_map && (*join_keys.null_map)[i]) + continue; bool row_acceptable = !join_keys.isRowFiltered(i); using FindResult = typename KeyGetter::FindResult; @@ -1392,7 +1374,7 @@ NO_INLINE IColumn::Filter joinRightColumns( return filter; } -template +template IColumn::Filter joinRightColumnsSwitchMultipleDisjuncts( std::vector && key_getter_vector, const std::vector & mapv, @@ -1400,8 +1382,8 @@ IColumn::Filter joinRightColumnsSwitchMultipleDisjuncts( JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]]) { return mapv.size() > 1 - ? joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags) - : joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags); + ? joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags) + : joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags); } template @@ -1411,21 +1393,13 @@ IColumn::Filter joinRightColumnsSwitchNullability( AddedColumns & added_columns, JoinStuff::JoinUsedFlags & used_flags) { - bool has_null_map = std::any_of(added_columns.join_on_keys.begin(), added_columns.join_on_keys.end(), - [](const auto & k) { return k.null_map; }); if (added_columns.need_filter) { - if (has_null_map) - return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); - else - return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); + return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); } else { - if (has_null_map) - return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); - else - return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); + return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); } } @@ -1850,7 +1824,7 @@ struct AdderNonJoined /// Based on: /// - map offsetInternal saved in used_flags for single disjuncts /// - flags in BlockWithFlags for multiple disjuncts -template +template class NotJoinedHash final : public NotJoinedBlocks::RightColumnsFiller { public: From bbd4159beeca878616546b77a3dd36b3a9a391d6 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 29 Jun 2023 13:26:21 +0000 Subject: [PATCH 1006/1997] Automatic style fix --- tests/integration/test_merge_tree_hdfs/test.py | 6 +----- tests/integration/test_merge_tree_s3/test.py | 3 +-- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_merge_tree_hdfs/test.py b/tests/integration/test_merge_tree_hdfs/test.py index ae3e2bde836..95b63a5c8a3 100644 --- a/tests/integration/test_merge_tree_hdfs/test.py +++ b/tests/integration/test_merge_tree_hdfs/test.py @@ -235,11 +235,7 @@ def test_attach_detach_partition(cluster): assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(8192)" hdfs_objects = fs.listdir("/clickhouse") - assert ( - len(hdfs_objects) - == FILES_OVERHEAD - + FILES_OVERHEAD_PER_PART_WIDE * 2 - ) + assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 node.query("ALTER TABLE hdfs_test DROP PARTITION '2020-01-03'") assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(4096)" diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index d0be8a11a06..f754bc905bf 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -336,8 +336,7 @@ def test_attach_detach_partition(cluster, node_name): assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" assert ( len(list_objects(cluster, "data/")) - == FILES_OVERHEAD - + FILES_OVERHEAD_PER_PART_WIDE * 2 + == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 ) node.query("ALTER TABLE s3_test DROP PARTITION '2020-01-03'") From 2a4cfeb01bcff1112d6615ddff529286ff024186 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 29 Jun 2023 15:27:40 +0200 Subject: [PATCH 1007/1997] Add comment --- src/Interpreters/Cache/Metadata.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index f201455384b..d4389e0d0b8 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -407,7 +407,10 @@ KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegm { fs::remove(path); + /// Clear OpenedFileCache to avoid reading from incorrect file descriptor. int flags = file_segment->getFlagsForLocalRead(); + /// Files are created with flags from file_segment->getFlagsForLocalRead() + /// plus optionally O_DIRECT is added, depends on query setting, so remove both. OpenedFileCache::instance().remove(path, flags); OpenedFileCache::instance().remove(path, flags | O_DIRECT); From 0cccba62cfdb6ee9c04a6ec0199d46f91df89160 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 25 Jun 2023 16:28:18 +0200 Subject: [PATCH 1008/1997] Support getHexUIntLowercase() with CityHash_v1_0_2::uint128 parameter. --- base/base/hex.h | 424 ++++++++++-------- src/Common/getHashOfLoadedBinary.cpp | 2 +- src/Compression/CompressedReadBufferBase.cpp | 4 +- .../DistributedAsyncInsertHeader.cpp | 5 +- .../MergeTree/MergeTreeDataPartChecksum.cpp | 14 +- .../PartMetadataManagerWithCache.cpp | 8 +- src/Storages/StorageReplicatedMergeTree.cpp | 3 +- src/Storages/System/StorageSystemParts.cpp | 6 +- .../System/StorageSystemProjectionParts.cpp | 6 +- utils/checksum-for-compressed-block/main.cpp | 2 +- 10 files changed, 267 insertions(+), 207 deletions(-) diff --git a/base/base/hex.h b/base/base/hex.h index b8cf95db893..0780e6159a1 100644 --- a/base/base/hex.h +++ b/base/base/hex.h @@ -4,212 +4,286 @@ #include #include "types.h" -/// Maps 0..15 to 0..9A..F or 0..9a..f correspondingly. +namespace CityHash_v1_0_2 { struct uint128; } -constexpr inline std::string_view hex_digit_to_char_uppercase_table = "0123456789ABCDEF"; -constexpr inline std::string_view hex_digit_to_char_lowercase_table = "0123456789abcdef"; +namespace wide +{ + template + class integer; +} + +namespace impl +{ + /// Maps 0..15 to 0..9A..F or 0..9a..f correspondingly. + constexpr inline std::string_view hex_digit_to_char_uppercase_table = "0123456789ABCDEF"; + constexpr inline std::string_view hex_digit_to_char_lowercase_table = "0123456789abcdef"; + + /// Maps 0..255 to 00..FF or 00..ff correspondingly. + constexpr inline std::string_view hex_byte_to_char_uppercase_table = // + "000102030405060708090A0B0C0D0E0F" + "101112131415161718191A1B1C1D1E1F" + "202122232425262728292A2B2C2D2E2F" + "303132333435363738393A3B3C3D3E3F" + "404142434445464748494A4B4C4D4E4F" + "505152535455565758595A5B5C5D5E5F" + "606162636465666768696A6B6C6D6E6F" + "707172737475767778797A7B7C7D7E7F" + "808182838485868788898A8B8C8D8E8F" + "909192939495969798999A9B9C9D9E9F" + "A0A1A2A3A4A5A6A7A8A9AAABACADAEAF" + "B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF" + "C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF" + "D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF" + "E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF" + "F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF"; + + constexpr inline std::string_view hex_byte_to_char_lowercase_table = // + "000102030405060708090a0b0c0d0e0f" + "101112131415161718191a1b1c1d1e1f" + "202122232425262728292a2b2c2d2e2f" + "303132333435363738393a3b3c3d3e3f" + "404142434445464748494a4b4c4d4e4f" + "505152535455565758595a5b5c5d5e5f" + "606162636465666768696a6b6c6d6e6f" + "707172737475767778797a7b7c7d7e7f" + "808182838485868788898a8b8c8d8e8f" + "909192939495969798999a9b9c9d9e9f" + "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" + "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" + "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" + "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" + "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; + + /// Maps 0..255 to 00000000..11111111 correspondingly. + constexpr inline std::string_view bin_byte_to_char_table = // + "0000000000000001000000100000001100000100000001010000011000000111" + "0000100000001001000010100000101100001100000011010000111000001111" + "0001000000010001000100100001001100010100000101010001011000010111" + "0001100000011001000110100001101100011100000111010001111000011111" + "0010000000100001001000100010001100100100001001010010011000100111" + "0010100000101001001010100010101100101100001011010010111000101111" + "0011000000110001001100100011001100110100001101010011011000110111" + "0011100000111001001110100011101100111100001111010011111000111111" + "0100000001000001010000100100001101000100010001010100011001000111" + "0100100001001001010010100100101101001100010011010100111001001111" + "0101000001010001010100100101001101010100010101010101011001010111" + "0101100001011001010110100101101101011100010111010101111001011111" + "0110000001100001011000100110001101100100011001010110011001100111" + "0110100001101001011010100110101101101100011011010110111001101111" + "0111000001110001011100100111001101110100011101010111011001110111" + "0111100001111001011110100111101101111100011111010111111001111111" + "1000000010000001100000101000001110000100100001011000011010000111" + "1000100010001001100010101000101110001100100011011000111010001111" + "1001000010010001100100101001001110010100100101011001011010010111" + "1001100010011001100110101001101110011100100111011001111010011111" + "1010000010100001101000101010001110100100101001011010011010100111" + "1010100010101001101010101010101110101100101011011010111010101111" + "1011000010110001101100101011001110110100101101011011011010110111" + "1011100010111001101110101011101110111100101111011011111010111111" + "1100000011000001110000101100001111000100110001011100011011000111" + "1100100011001001110010101100101111001100110011011100111011001111" + "1101000011010001110100101101001111010100110101011101011011010111" + "1101100011011001110110101101101111011100110111011101111011011111" + "1110000011100001111000101110001111100100111001011110011011100111" + "1110100011101001111010101110101111101100111011011110111011101111" + "1111000011110001111100101111001111110100111101011111011011110111" + "1111100011111001111110101111101111111100111111011111111011111111"; + + /// Maps 0..9, A..F, a..f to 0..15. Other chars are mapped to implementation specific value. + constexpr inline std::string_view hex_char_to_digit_table + = {"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9 + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff", + 256}; + + /// Converts a hex digit '0'..'f' or '0'..'F' to its value 0..15. + constexpr UInt8 unhexDigit(char c) + { + return hex_char_to_digit_table[static_cast(c)]; + } + + /// Converts an unsigned integer in the native endian to hexadecimal representation and back. Used as a base class for HexConversion. + template + struct HexConversionUInt + { + static const constexpr size_t num_hex_digits = sizeof(TUInt) * 2; + + static void hex(TUInt uint_, char * out, std::string_view table) + { + union + { + TUInt value; + UInt8 uint8[sizeof(TUInt)]; + }; + + value = uint_; + + for (size_t i = 0; i < sizeof(TUInt); ++i) + { + if constexpr (std::endian::native == std::endian::little) + memcpy(out + i * 2, &table[static_cast(uint8[sizeof(TUInt) - 1 - i]) * 2], 2); + else + memcpy(out + i * 2, &table[static_cast(uint8[i]) * 2], 2); + } + } + + static TUInt unhex(const char * data) + { + TUInt res; + if constexpr (sizeof(TUInt) == 1) + { + res = static_cast(unhexDigit(data[0])) * 0x10 + static_cast(unhexDigit(data[1])); + } + else if constexpr (sizeof(TUInt) == 2) + { + res = static_cast(unhexDigit(data[0])) * 0x1000 + static_cast(unhexDigit(data[1])) * 0x100 + + static_cast(unhexDigit(data[2])) * 0x10 + static_cast(unhexDigit(data[3])); + } + else if constexpr ((sizeof(TUInt) <= 8) || ((sizeof(TUInt) % 8) != 0)) + { + res = 0; + for (size_t i = 0; i < sizeof(TUInt) * 2; ++i, ++data) + { + res <<= 4; + res += unhexDigit(*data); + } + } + else + { + res = 0; + for (size_t i = 0; i < sizeof(TUInt) / 8; ++i, data += 16) + { + res <<= 64; + res += HexConversionUInt::unhex(data); + } + } + return res; + } + }; + + /// Helper template class to convert a value of any supported type to hexadecimal representation and back. + template + struct HexConversion; + + template + struct HexConversion>> : public HexConversionUInt {}; + + template + struct HexConversion> : public HexConversionUInt> {}; + + template + struct HexConversion>> + { + static const constexpr size_t num_hex_digits = 32; + + static void hex(const CityHashUInt128 & uint_, char * out, std::string_view table) + { + HexConversion::hex(uint_.high64, out, table); + HexConversion::hex(uint_.low64, out + 16, table); + } + + static CityHashUInt128 unhex(const char * data) + { + CityHashUInt128 res; + res.high64 = HexConversion::unhex(data); + res.low64 = HexConversion::unhex(data + 16); + return res; + } + }; +} + +/// Produces a hexadecimal representation of an integer value with leading zeros (for checksums). +/// The function supports native integer types, wide::integer, CityHash_v1_0_2::uint128. +template +void writeHexUIntUppercase(const T & value, char * out) +{ + impl::HexConversion::hex(value, out, impl::hex_byte_to_char_uppercase_table); +} + +template +void writeHexUIntLowercase(const T & value, char * out) +{ + impl::HexConversion::hex(value, out, impl::hex_byte_to_char_lowercase_table); +} + +template +std::string getHexUIntUppercase(const T & value) +{ + std::string res(impl::HexConversion::num_hex_digits, '\0'); + writeHexUIntUppercase(value, res.data()); + return res; +} + +template +std::string getHexUIntLowercase(const T & value) +{ + std::string res(impl::HexConversion::num_hex_digits, '\0'); + writeHexUIntLowercase(value, res.data()); + return res; +} constexpr char hexDigitUppercase(unsigned char c) { - return hex_digit_to_char_uppercase_table[c]; + return impl::hex_digit_to_char_uppercase_table[c]; } + constexpr char hexDigitLowercase(unsigned char c) { - return hex_digit_to_char_lowercase_table[c]; + return impl::hex_digit_to_char_lowercase_table[c]; } -/// Maps 0..255 to 00..FF or 00..ff correspondingly - -constexpr inline std::string_view hex_byte_to_char_uppercase_table = // - "000102030405060708090A0B0C0D0E0F" - "101112131415161718191A1B1C1D1E1F" - "202122232425262728292A2B2C2D2E2F" - "303132333435363738393A3B3C3D3E3F" - "404142434445464748494A4B4C4D4E4F" - "505152535455565758595A5B5C5D5E5F" - "606162636465666768696A6B6C6D6E6F" - "707172737475767778797A7B7C7D7E7F" - "808182838485868788898A8B8C8D8E8F" - "909192939495969798999A9B9C9D9E9F" - "A0A1A2A3A4A5A6A7A8A9AAABACADAEAF" - "B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF" - "C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF" - "D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF" - "E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF" - "F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF"; - -constexpr inline std::string_view hex_byte_to_char_lowercase_table = // - "000102030405060708090a0b0c0d0e0f" - "101112131415161718191a1b1c1d1e1f" - "202122232425262728292a2b2c2d2e2f" - "303132333435363738393a3b3c3d3e3f" - "404142434445464748494a4b4c4d4e4f" - "505152535455565758595a5b5c5d5e5f" - "606162636465666768696a6b6c6d6e6f" - "707172737475767778797a7b7c7d7e7f" - "808182838485868788898a8b8c8d8e8f" - "909192939495969798999a9b9c9d9e9f" - "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" - "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" - "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" - "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" - "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" - "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; - inline void writeHexByteUppercase(UInt8 byte, void * out) { - memcpy(out, &hex_byte_to_char_uppercase_table[static_cast(byte) * 2], 2); + memcpy(out, &impl::hex_byte_to_char_uppercase_table[static_cast(byte) * 2], 2); } inline void writeHexByteLowercase(UInt8 byte, void * out) { - memcpy(out, &hex_byte_to_char_lowercase_table[static_cast(byte) * 2], 2); + memcpy(out, &impl::hex_byte_to_char_lowercase_table[static_cast(byte) * 2], 2); } -constexpr inline std::string_view bin_byte_to_char_table = // - "0000000000000001000000100000001100000100000001010000011000000111" - "0000100000001001000010100000101100001100000011010000111000001111" - "0001000000010001000100100001001100010100000101010001011000010111" - "0001100000011001000110100001101100011100000111010001111000011111" - "0010000000100001001000100010001100100100001001010010011000100111" - "0010100000101001001010100010101100101100001011010010111000101111" - "0011000000110001001100100011001100110100001101010011011000110111" - "0011100000111001001110100011101100111100001111010011111000111111" - "0100000001000001010000100100001101000100010001010100011001000111" - "0100100001001001010010100100101101001100010011010100111001001111" - "0101000001010001010100100101001101010100010101010101011001010111" - "0101100001011001010110100101101101011100010111010101111001011111" - "0110000001100001011000100110001101100100011001010110011001100111" - "0110100001101001011010100110101101101100011011010110111001101111" - "0111000001110001011100100111001101110100011101010111011001110111" - "0111100001111001011110100111101101111100011111010111111001111111" - "1000000010000001100000101000001110000100100001011000011010000111" - "1000100010001001100010101000101110001100100011011000111010001111" - "1001000010010001100100101001001110010100100101011001011010010111" - "1001100010011001100110101001101110011100100111011001111010011111" - "1010000010100001101000101010001110100100101001011010011010100111" - "1010100010101001101010101010101110101100101011011010111010101111" - "1011000010110001101100101011001110110100101101011011011010110111" - "1011100010111001101110101011101110111100101111011011111010111111" - "1100000011000001110000101100001111000100110001011100011011000111" - "1100100011001001110010101100101111001100110011011100111011001111" - "1101000011010001110100101101001111010100110101011101011011010111" - "1101100011011001110110101101101111011100110111011101111011011111" - "1110000011100001111000101110001111100100111001011110011011100111" - "1110100011101001111010101110101111101100111011011110111011101111" - "1111000011110001111100101111001111110100111101011111011011110111" - "1111100011111001111110101111101111111100111111011111111011111111"; - -inline void writeBinByte(UInt8 byte, void * out) +/// Converts a hex representation with leading zeros back to an integer value. +/// The function supports native integer types, wide::integer, CityHash_v1_0_2::uint128. +template +constexpr T unhexUInt(const char * data) { - memcpy(out, &bin_byte_to_char_table[static_cast(byte) * 8], 8); + return impl::HexConversion::unhex(data); } -/// Produces hex representation of an unsigned int with leading zeros (for checksums) -template -inline void writeHexUIntImpl(TUInt uint_, char * out, std::string_view table) -{ - union - { - TUInt value; - UInt8 uint8[sizeof(TUInt)]; - }; - - value = uint_; - - for (size_t i = 0; i < sizeof(TUInt); ++i) - { - if constexpr (std::endian::native == std::endian::little) - memcpy(out + i * 2, &table[static_cast(uint8[sizeof(TUInt) - 1 - i]) * 2], 2); - else - memcpy(out + i * 2, &table[static_cast(uint8[i]) * 2], 2); - } -} - -template -inline void writeHexUIntUppercase(TUInt uint_, char * out) -{ - writeHexUIntImpl(uint_, out, hex_byte_to_char_uppercase_table); -} - -template -inline void writeHexUIntLowercase(TUInt uint_, char * out) -{ - writeHexUIntImpl(uint_, out, hex_byte_to_char_lowercase_table); -} - -template -std::string getHexUIntUppercase(TUInt uint_) -{ - std::string res(sizeof(TUInt) * 2, '\0'); - writeHexUIntUppercase(uint_, res.data()); - return res; -} - -template -std::string getHexUIntLowercase(TUInt uint_) -{ - std::string res(sizeof(TUInt) * 2, '\0'); - writeHexUIntLowercase(uint_, res.data()); - return res; -} - -/// Maps 0..9, A..F, a..f to 0..15. Other chars are mapped to implementation specific value. - -constexpr inline std::string_view hex_char_to_digit_table - = {"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9 - "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff", - 256}; - +/// Converts a hexadecimal digit '0'..'f' or '0'..'F' to UInt8. constexpr UInt8 unhex(char c) { - return hex_char_to_digit_table[static_cast(c)]; + return impl::unhexDigit(c); } +/// Converts two hexadecimal digits to UInt8. constexpr UInt8 unhex2(const char * data) { - return static_cast(unhex(data[0])) * 0x10 + static_cast(unhex(data[1])); + return unhexUInt(data); } +/// Converts four hexadecimal digits to UInt16. constexpr UInt16 unhex4(const char * data) { - return static_cast(unhex(data[0])) * 0x1000 + static_cast(unhex(data[1])) * 0x100 - + static_cast(unhex(data[2])) * 0x10 + static_cast(unhex(data[3])); + return unhexUInt(data); } -template -constexpr TUInt unhexUInt(const char * data) +/// Produces a binary representation of a single byte. +inline void writeBinByte(UInt8 byte, void * out) { - TUInt res = 0; - if constexpr ((sizeof(TUInt) <= 8) || ((sizeof(TUInt) % 8) != 0)) - { - for (size_t i = 0; i < sizeof(TUInt) * 2; ++i, ++data) - { - res <<= 4; - res += unhex(*data); - } - } - else - { - for (size_t i = 0; i < sizeof(TUInt) / 8; ++i, data += 16) - { - res <<= 64; - res += unhexUInt(data); - } - } - return res; + memcpy(out, &impl::bin_byte_to_char_table[static_cast(byte) * 8], 8); } diff --git a/src/Common/getHashOfLoadedBinary.cpp b/src/Common/getHashOfLoadedBinary.cpp index cc0ad0d2143..b81300b8536 100644 --- a/src/Common/getHashOfLoadedBinary.cpp +++ b/src/Common/getHashOfLoadedBinary.cpp @@ -37,7 +37,7 @@ SipHash getHashOfLoadedBinary() std::string getHashOfLoadedBinaryHex() { SipHash hash = getHashOfLoadedBinary(); - std::array checksum; + UInt128 checksum; hash.get128(checksum); return getHexUIntUppercase(checksum); } diff --git a/src/Compression/CompressedReadBufferBase.cpp b/src/Compression/CompressedReadBufferBase.cpp index bae52c8bece..dd19955d010 100644 --- a/src/Compression/CompressedReadBufferBase.cpp +++ b/src/Compression/CompressedReadBufferBase.cpp @@ -49,8 +49,8 @@ static void validateChecksum(char * data, size_t size, const Checksum expected_c /// TODO mess up of endianness in error message. message << "Checksum doesn't match: corrupted data." - " Reference: " + getHexUIntLowercase(expected_checksum.high64) + getHexUIntLowercase(expected_checksum.low64) - + ". Actual: " + getHexUIntLowercase(calculated_checksum.high64) + getHexUIntLowercase(calculated_checksum.low64) + " Reference: " + getHexUIntLowercase(expected_checksum) + + ". Actual: " + getHexUIntLowercase(calculated_checksum) + ". Size of compressed block: " + toString(size); const char * message_hardware_failure = "This is most likely due to hardware failure. " diff --git a/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp index e1b54304f23..a8ed89e66f1 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp @@ -39,9 +39,8 @@ DistributedAsyncInsertHeader DistributedAsyncInsertHeader::read(ReadBufferFromFi if (expected_checksum != calculated_checksum) { throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, - "Checksum of extra info doesn't match: corrupted data. Reference: {}{}. Actual: {}{}.", - getHexUIntLowercase(expected_checksum.high64), getHexUIntLowercase(expected_checksum.low64), - getHexUIntLowercase(calculated_checksum.high64), getHexUIntLowercase(calculated_checksum.low64)); + "Checksum of extra info doesn't match: corrupted data. Reference: {}. Actual: {}.", + getHexUIntLowercase(expected_checksum), getHexUIntLowercase(calculated_checksum)); } /// Read the parts of the header. diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index 58ba7acb9ba..6628cd68eaf 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -307,19 +307,7 @@ static void updateHash(SipHash & hash, const std::string & data) /// Hash is the same as MinimalisticDataPartChecksums::hash_of_all_files String MergeTreeDataPartChecksums::getTotalChecksumHex() const { - SipHash hash_of_all_files; - - for (const auto & [name, checksum] : files) - { - updateHash(hash_of_all_files, name); - hash_of_all_files.update(checksum.file_hash); - } - - UInt64 lo; - UInt64 hi; - hash_of_all_files.get128(lo, hi); - - return getHexUIntUppercase(hi) + getHexUIntUppercase(lo); + return getHexUIntUppercase(getTotalChecksumUInt128()); } MergeTreeDataPartChecksums::Checksum::uint128 MergeTreeDataPartChecksums::getTotalChecksumUInt128() const diff --git a/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp b/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp index 324bd4bbaee..bb6462b3058 100644 --- a/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp +++ b/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp @@ -250,8 +250,8 @@ std::unordered_map PartMetadataManagerWit ErrorCodes::CORRUPTED_DATA, "Checksums doesn't match in part {} for {}. Expected: {}. Found {}.", part->name, file_path, - getHexUIntUppercase(disk_checksum.high64) + getHexUIntUppercase(disk_checksum.low64), - getHexUIntUppercase(cache_checksums[i].high64) + getHexUIntUppercase(cache_checksums[i].low64)); + getHexUIntUppercase(disk_checksum), + getHexUIntUppercase(cache_checksums[i])); disk_checksums.push_back(disk_checksum); continue; @@ -287,8 +287,8 @@ std::unordered_map PartMetadataManagerWit ErrorCodes::CORRUPTED_DATA, "Checksums doesn't match in projection part {} {}. Expected: {}. Found {}.", part->name, proj_name, - getHexUIntUppercase(disk_checksum.high64) + getHexUIntUppercase(disk_checksum.low64), - getHexUIntUppercase(cache_checksums[i].high64) + getHexUIntUppercase(cache_checksums[i].low64)); + getHexUIntUppercase(disk_checksum), + getHexUIntUppercase(cache_checksums[i])); disk_checksums.push_back(disk_checksum); } return results; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index b1ba06c77f9..a2138efdf56 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2891,8 +2891,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo desired_checksums = MinimalisticDataPartChecksums::deserializeFrom(desired_checksums_str); } - const auto [lo, hi] = desired_checksums.hash_of_all_files; - log_entry.part_checksum = getHexUIntUppercase(hi) + getHexUIntUppercase(lo); + log_entry.part_checksum = getHexUIntUppercase(desired_checksums.hash_of_all_files); } else { diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index b642f4b5088..7399bd789a7 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -252,17 +252,17 @@ void StorageSystemParts::processNextStorage( if (columns_mask[src_index++]) { auto checksum = helper.hash_of_all_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64)); + columns[res_index++]->insert(getHexUIntLowercase(checksum)); } if (columns_mask[src_index++]) { auto checksum = helper.hash_of_uncompressed_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64)); + columns[res_index++]->insert(getHexUIntLowercase(checksum)); } if (columns_mask[src_index++]) { auto checksum = helper.uncompressed_hash_of_compressed_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64)); + columns[res_index++]->insert(getHexUIntLowercase(checksum)); } } diff --git a/src/Storages/System/StorageSystemProjectionParts.cpp b/src/Storages/System/StorageSystemProjectionParts.cpp index 05c83747c4d..213865a8d61 100644 --- a/src/Storages/System/StorageSystemProjectionParts.cpp +++ b/src/Storages/System/StorageSystemProjectionParts.cpp @@ -221,17 +221,17 @@ void StorageSystemProjectionParts::processNextStorage( if (columns_mask[src_index++]) { auto checksum = helper.hash_of_all_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64)); + columns[res_index++]->insert(getHexUIntLowercase(checksum)); } if (columns_mask[src_index++]) { auto checksum = helper.hash_of_uncompressed_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64)); + columns[res_index++]->insert(getHexUIntLowercase(checksum)); } if (columns_mask[src_index++]) { auto checksum = helper.uncompressed_hash_of_compressed_files; - columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64)); + columns[res_index++]->insert(getHexUIntLowercase(checksum)); } } diff --git a/utils/checksum-for-compressed-block/main.cpp b/utils/checksum-for-compressed-block/main.cpp index d30a3798820..4ae06a78ab4 100644 --- a/utils/checksum-for-compressed-block/main.cpp +++ b/utils/checksum-for-compressed-block/main.cpp @@ -45,7 +45,7 @@ int main(int, char **) { auto flipped = flipBit(str, pos); auto checksum = CityHash_v1_0_2::CityHash128(flipped.data(), flipped.size()); - std::cout << getHexUIntLowercase(checksum.high64) << getHexUIntLowercase(checksum.low64) << "\t" << pos / 8 << ", " << pos % 8 << "\n"; + std::cout << getHexUIntLowercase(checksum) << "\t" << pos / 8 << ", " << pos % 8 << "\n"; } return 0; From ebeef65920fe671d64d8632ec6ca4d535bc2247e Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 29 Jun 2023 13:35:27 +0000 Subject: [PATCH 1009/1997] Add placeholder for rull filename in rename_files_after_processing setting --- docs/en/operations/settings/settings.md | 1 + docs/ru/operations/settings/settings.md | 1 + src/Common/FileRenamer.cpp | 6 ++++-- src/Common/FileRenamer.h | 1 + src/Core/Settings.h | 2 +- .../02732_rename_after_processing.reference | 3 +++ .../0_stateless/02732_rename_after_processing.sh | 10 ++++++++++ 7 files changed, 21 insertions(+), 3 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index cff65e049f3..25baad11282 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4488,6 +4488,7 @@ This setting allows to specify renaming pattern for files processed by `file` ta ### Placeholders +- `%a` — Full original filename (e.g., "sample.csv"). - `%f` — Original filename without extension (e.g., "sample"). - `%e` — Original file extension with dot (e.g., ".csv"). - `%t` — Timestamp (in microseconds). diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index f83d05ff710..70f2793f6bb 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -4195,6 +4195,7 @@ SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS sessi ### Шаблон Шаблон поддерживает следующие виды плейсхолдеров: +- `%a` — Полное исходное имя файла (например "sample.csv"). - `%f` — Исходное имя файла без расширения (например "sample"). - `%e` — Оригинальное расширение файла с точкой (например ".csv"). - `%t` — Текущее время (в микросекундах). diff --git a/src/Common/FileRenamer.cpp b/src/Common/FileRenamer.cpp index 3473d543c00..33b55233a2e 100644 --- a/src/Common/FileRenamer.cpp +++ b/src/Common/FileRenamer.cpp @@ -47,6 +47,7 @@ String FileRenamer::generateNewFilename(const String & filename) const // Define placeholders and their corresponding values std::map placeholders = { + {"%a", filename}, {"%f", file_base}, {"%e", file_ext}, {"%t", timestamp}, @@ -69,16 +70,17 @@ bool FileRenamer::isEmpty() const bool FileRenamer::validateRenamingRule(const String & rule, bool throw_on_error) { // Check if the rule contains invalid placeholders - re2::RE2 invalid_placeholder_pattern("^([^%]|%[fet%])*$"); + re2::RE2 invalid_placeholder_pattern("^([^%]|%[afet%])*$"); if (!re2::RE2::FullMatch(rule, invalid_placeholder_pattern)) { if (throw_on_error) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid renaming rule: Allowed placeholders only %f, %e, %t, and %%"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid renaming rule: Allowed placeholders only %a, %f, %e, %t, and %%"); return false; } // Replace valid placeholders with empty strings and count remaining percentage signs. String replaced_rule = rule; + boost::replace_all(replaced_rule, "%a", ""); boost::replace_all(replaced_rule, "%f", ""); boost::replace_all(replaced_rule, "%e", ""); boost::replace_all(replaced_rule, "%t", ""); diff --git a/src/Common/FileRenamer.h b/src/Common/FileRenamer.h index c062978d6f6..91f74f09032 100644 --- a/src/Common/FileRenamer.h +++ b/src/Common/FileRenamer.h @@ -9,6 +9,7 @@ namespace DB /** * The FileRenamer class provides functionality for renaming files based on given pattern with placeholders * The supported placeholders are: + * %a - Full original file name ("sample.csv") * %f - Original filename without extension ("sample") * %e - Original file extension with dot (".csv") * %t - Timestamp (in microseconds) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 658f3c8025b..099ad1ea649 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -733,7 +733,7 @@ class IColumn; M(String, workload, "default", "Name of workload to be used to access resources", 0) \ M(Milliseconds, storage_system_stack_trace_pipe_read_timeout_ms, 100, "Maximum time to read from a pipe for receiving information from the threads when querying the `system.stack_trace` table. This setting is used for testing purposes and not meant to be changed by users.", 0) \ \ - M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \ + M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%a` (full original file name), `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \ \ M(Bool, parallelize_output_from_storages, true, "Parallelize output for reading step from storage. It allows parallelizing query processing right after reading from storage if possible", 0) \ M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \ diff --git a/tests/queries/0_stateless/02732_rename_after_processing.reference b/tests/queries/0_stateless/02732_rename_after_processing.reference index 39cdb677e09..86f682d682c 100644 --- a/tests/queries/0_stateless/02732_rename_after_processing.reference +++ b/tests/queries/0_stateless/02732_rename_after_processing.reference @@ -19,3 +19,6 @@ OK tmp5.csv OK tmp5.csv +4 +tmp6.csv.processed +!tmp6.csv diff --git a/tests/queries/0_stateless/02732_rename_after_processing.sh b/tests/queries/0_stateless/02732_rename_after_processing.sh index c4f80d3462b..cdbc9892bc7 100755 --- a/tests/queries/0_stateless/02732_rename_after_processing.sh +++ b/tests/queries/0_stateless/02732_rename_after_processing.sh @@ -29,6 +29,7 @@ cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp3_1.csv cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp3_2.csv cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp4.csv cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp5.csv +cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp6.csv ### Checking that renaming works @@ -115,5 +116,14 @@ if [ -e "${tmp_dir}/tmp5.csv" ]; then echo "tmp5.csv" fi +# check full file name placeholder +${CLICKHOUSE_CLIENT} --rename-files-after-processing="%a.processed" -q "SELECT COUNT(*) FROM file('${unique_name}/tmp6.csv')" +if [ -e "${tmp_dir}/tmp6.csv.processed" ]; then + echo "tmp6.csv.processed" +fi +if [ ! -e "${tmp_dir}/tmp6.csv" ]; then + echo "!tmp6.csv" +fi + # Clean rm -rd $tmp_dir From 5656d18690bd00b0db48c1e12e48e0446e263465 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 29 Jun 2023 13:36:55 +0000 Subject: [PATCH 1010/1997] Update version_date.tsv and changelogs after v23.5.4.25-stable --- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v23.5.4.25-stable.md | 31 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 5 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v23.5.4.25-stable.md diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 367f6043b90..6d53a6f4c51 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.5.3.24" +ARG VERSION="23.5.4.25" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index e7e879fa95f..91b22346f13 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.5.3.24" +ARG VERSION="23.5.4.25" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 42ae81655d2..0ed0e4e1168 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.5.3.24" +ARG VERSION="23.5.4.25" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v23.5.4.25-stable.md b/docs/changelogs/v23.5.4.25-stable.md new file mode 100644 index 00000000000..53d3a7c9c0a --- /dev/null +++ b/docs/changelogs/v23.5.4.25-stable.md @@ -0,0 +1,31 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.5.4.25-stable (190f962abcf) FIXME as compared to v23.5.3.24-stable (76f54616d3b) + +#### Improvement +* Backported in [#51235](https://github.com/ClickHouse/ClickHouse/issues/51235): Improve the progress bar for file/s3/hdfs/url table functions by using chunk size from source data and using incremental total size counting in each thread. Fix the progress bar for *Cluster functions. This closes [#47250](https://github.com/ClickHouse/ClickHouse/issues/47250). [#51088](https://github.com/ClickHouse/ClickHouse/pull/51088) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#51255](https://github.com/ClickHouse/ClickHouse/issues/51255): Disable cache setting `do_not_evict_index_and_mark_files` (Was enabled in `23.5`). [#51222](https://github.com/ClickHouse/ClickHouse/pull/51222) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Build/Testing/Packaging Improvement +* Backported in [#51531](https://github.com/ClickHouse/ClickHouse/issues/51531): Split huge `RUN` in Dockerfile into smaller conditional. Install the necessary tools on demand in the same `RUN` layer, and remove them after that. Upgrade the OS only once at the beginning. Use a modern way to check the signed repository. Downgrade the base repo to ubuntu:20.04 to address the issues on older docker versions. Upgrade golang version to address golang vulnerabilities. [#51504](https://github.com/ClickHouse/ClickHouse/pull/51504) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#51572](https://github.com/ClickHouse/ClickHouse/issues/51572): This a follow-up for [#51504](https://github.com/ClickHouse/ClickHouse/issues/51504), the cleanup was lost during refactoring. [#51564](https://github.com/ClickHouse/ClickHouse/pull/51564) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Query Cache: Try to fix bad cast from ColumnConst to ColumnVector [#50704](https://github.com/ClickHouse/ClickHouse/pull/50704) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix type of LDAP server params hash in cache entry [#50865](https://github.com/ClickHouse/ClickHouse/pull/50865) ([Julian Maicher](https://github.com/jmaicher)). +* Fallback to parsing big integer from String instead of exception in Parquet format [#50873](https://github.com/ClickHouse/ClickHouse/pull/50873) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not apply projection if read-in-order was enabled. [#50923](https://github.com/ClickHouse/ClickHouse/pull/50923) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix race azure blob storage iterator [#50936](https://github.com/ClickHouse/ClickHouse/pull/50936) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix ineffective query cache for SELECTs with subqueries [#51132](https://github.com/ClickHouse/ClickHouse/pull/51132) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix fuzzer failure in ActionsDAG [#51301](https://github.com/ClickHouse/ClickHouse/pull/51301) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix ParallelReadBuffer seek [#50820](https://github.com/ClickHouse/ClickHouse/pull/50820) ([Michael Kolupaev](https://github.com/al13n321)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 307ed97068f..5c8dd0d2481 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v23.5.4.25-stable 2023-06-29 v23.5.3.24-stable 2023-06-17 v23.5.2.7-stable 2023-06-10 v23.5.1.3174-stable 2023-06-09 From 44246363e236bcdcfcd928b855b00a1fa1ba932c Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 29 Jun 2023 14:53:12 +0200 Subject: [PATCH 1011/1997] Fix test --- src/Databases/DatabaseFilesystem.cpp | 8 +++----- .../0_stateless/02722_database_filesystem.sh | 16 ++++++++-------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index 001aa1f9ef6..7eaf474eea0 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -23,7 +23,7 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int UNKNOWN_TABLE; - extern const int DATABASE_ACCESS_DENIED; + extern const int PATH_ACCESS_DENIED; extern const int BAD_ARGUMENTS; extern const int FILE_DOESNT_EXIST; } @@ -76,7 +76,7 @@ bool DatabaseFilesystem::checkTableFilePath(const std::string & table_path, Cont if (check_path && !fileOrSymlinkPathStartsWith(table_path, user_files_path)) { if (throw_on_error) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "File is not inside {}", user_files_path); + throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File is not inside {}", user_files_path); else return false; } @@ -175,9 +175,7 @@ StoragePtr DatabaseFilesystem::tryGetTable(const String & name, ContextPtr conte { /// Ignore exceptions thrown by TableFunctionFile, which indicate that there is no table /// see tests/02722_database_filesystem.sh for more details. - if (e.code() == ErrorCodes::BAD_ARGUMENTS - || e.code() == ErrorCodes::DATABASE_ACCESS_DENIED - || e.code() == ErrorCodes::FILE_DOESNT_EXIST) + if (e.code() == ErrorCodes::FILE_DOESNT_EXIST) { return nullptr; } diff --git a/tests/queries/0_stateless/02722_database_filesystem.sh b/tests/queries/0_stateless/02722_database_filesystem.sh index 4ff659ee746..3b7a41bb39e 100755 --- a/tests/queries/0_stateless/02722_database_filesystem.sh +++ b/tests/queries/0_stateless/02722_database_filesystem.sh @@ -40,31 +40,31 @@ ${CLICKHOUSE_LOCAL} -q "SELECT COUNT(*) FROM \"${tmp_dir}/tmp.csv\"" ################# echo "Test 2: check DatabaseFilesystem access rights and errors handling on server" # DATABASE_ACCESS_DENIED: Allows list files only inside user_files -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../tmp.csv\`;" 2>&1| grep -F "Code: 291" > /dev/null && echo "OK" -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`/tmp/tmp.csv\`;" 2>&1| grep -F "Code: 291" > /dev/null && echo "OK" +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../tmp.csv\`;" 2>&1| grep -F "Code: 481" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`/tmp/tmp.csv\`;" 2>&1| grep -F "Code: 481" > /dev/null && echo "OK" || echo 'FAIL' ||: ${CLICKHOUSE_CLIENT} --multiline --multiquery --query """ USE test1; SELECT COUNT(*) FROM \"../${tmp_dir}/tmp.csv\"; -""" 2>&1| grep -F "Code: 291" > /dev/null && echo "OK" -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../../../../../../tmp.csv\`;" 2>&1| grep -F "Code: 291" > /dev/null && echo "OK" +""" 2>&1| grep -F "Code: 481" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../../../../../../tmp.csv\`;" 2>&1| grep -F "Code: 481" > /dev/null && echo "OK" || echo 'FAIL' ||: # BAD_ARGUMENTS: path should be inside user_files ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test2; CREATE DATABASE test2 ENGINE = Filesystem('/tmp'); -""" 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" +""" 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" || echo 'FAIL' ||: # BAD_ARGUMENTS: .../user_files/relative_unknown_dir does not exists ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test2; CREATE DATABASE test2 ENGINE = Filesystem('relative_unknown_dir'); -""" 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" +""" 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" || echo 'FAIL' ||: # FILE_DOESNT_EXIST: unknown file -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp2.csv\`;" 2>&1| grep -F "Code: 107" > /dev/null && echo "OK" +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp2.csv\`;" 2>&1| grep -F "Code: 60" > /dev/null && echo "OK" || echo 'FAIL' ||: # BAD_ARGUMENTS: Cannot determine the file format by it's extension -${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp.myext\`;" 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp.myext\`;" 2>&1| grep -F "Code: 36" > /dev/null && echo "OK" || echo 'FAIL' ||: # Clean ${CLICKHOUSE_CLIENT} --query "DROP DATABASE test1;" From 6ec85f9faa548e50ec1a6b4a2d7868c9f7e0079a Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Thu, 29 Jun 2023 11:06:28 -0300 Subject: [PATCH 1012/1997] Update settings.md --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index cff65e049f3..cff13302cdc 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1322,7 +1322,7 @@ Connection pool size for PostgreSQL table engine and database engine. Default value: 16 -## postgresql_connection_pool_size {#postgresql-connection-pool-size} +## postgresql_connection_pool_wait_timeout {#postgresql-connection-pool-wait-timeout} Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool. From 5f650cc4464b2905531efd10f5b7407238f557ec Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 29 Jun 2023 16:33:58 +0200 Subject: [PATCH 1013/1997] Update 02808_filesystem_cache_drop_query.sh --- tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh index 6388bf5ee0c..5fd426ca1c2 100755 --- a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh +++ b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh @@ -1,3 +1,4 @@ +#!/usr/bin/env bash # Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings # set -x From d1bb8f24c0f99ad4af37747306e06691b34c8955 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 29 Jun 2023 08:55:06 +0000 Subject: [PATCH 1014/1997] Retry chroot creation in ZK --- docker/test/stateless/run.sh | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 21cb3168083..914e51a9f66 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -90,6 +90,30 @@ sleep 5 attach_gdb_to_clickhouse || true # FIXME: to not break old builds, clean on 2023-09-01 +function run_with_retry() +{ + set +e + + local total_retries="$1" + shift + + local retry=0 + + until [ "$retry" -ge "$total_retries" ] + do + if "$@"; then + set -e + return + else + retry=$((retry + 1)) + sleep 3 + fi + done + + echo "Command '$*' failed after $total_retries retries, exiting" + exit 1 +} + function run_tests() { set -x @@ -138,7 +162,8 @@ function run_tests() ADDITIONAL_OPTIONS+=('--report-logs-stats') clickhouse-test "00001_select_1" > /dev/null ||: - clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" ||: + + run_with_retry 5 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ From 65ecf79a0ffedf142902f5ece9db6f9d9954496c Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 29 Jun 2023 17:52:59 +0200 Subject: [PATCH 1015/1997] Update getOrCreateDiskFromAST.cpp --- src/Disks/getOrCreateDiskFromAST.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Disks/getOrCreateDiskFromAST.cpp b/src/Disks/getOrCreateDiskFromAST.cpp index 691a51d8b48..81d5b7372f3 100644 --- a/src/Disks/getOrCreateDiskFromAST.cpp +++ b/src/Disks/getOrCreateDiskFromAST.cpp @@ -40,8 +40,8 @@ namespace { static constexpr std::string_view custom_disk_prefix = "disk_"; - if (disk_name.size() <= custom_disk_prefix.size() || !disk_name.starts_with(custom_disk_prefix)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid disk name: {}", disk_name); + if (function.name.size() <= custom_disk_prefix.size() || !function.name.starts_with(custom_disk_prefix)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid disk name: {}", function.name); disk_name = function.name.substr(custom_disk_prefix.size()); } From 4ee094cab1a4aed4091679e54e6ac146b2423693 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Thu, 29 Jun 2023 12:13:26 -0400 Subject: [PATCH 1016/1997] review comments --- .../table-engines/mergetree-family/mergetree.md | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 1b7f3263ab9..a1c2fbdbe50 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -758,12 +758,14 @@ If you perform the `SELECT` query between merges, you may get expired data. To a ## Disk types -In addition to local block devices, ClickHouse supports other device types through table engines. These are the types: -- [S3](#table_engine-mergetree-s3) -- GCS (also supported using the [S3 table engine](#table_engine-mergetree-s3)) -- [Azure Blob Storage](#table_engine-mergetree-azure-blob-storage) -- [HDFS](#hdfs-storage) -- [Web (read-only)](#web-storage) +In addition to local block devices, ClickHouse supports these storage types: +- [`s3` for S3 and MinIO](#table_engine-mergetree-s3) +- [`gcs` for GCS](/docs/en/integrations/data-ingestion/gcs/index.md/#creating-a-disk) +- [`blob_storage_disk` for Azure Blob Storage](#table_engine-mergetree-azure-blob-storage) +- [`hdfs` for HDFS](#hdfs-storage) +- [`web` for read-only from web](#web-storage) +- [`cache` for local caching](/docs/en/operations/storing-data.md/#using-local-cache) +- [`s3_plain` for backups to S3](/docs/en/operations/backup#backuprestore-using-an-s3-disk) ## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes} From 843e910309c1ed6d488fce35230a0f658dee33f1 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 29 Jun 2023 18:24:22 +0200 Subject: [PATCH 1017/1997] impl --- tests/queries/0_stateless/00474_readonly_settings.sh | 2 -- tests/queries/0_stateless/01455_opentelemetry_distributed.sh | 2 -- tests/queries/0_stateless/01526_initial_query_id.sh | 2 -- .../0_stateless/01732_race_condition_storage_join_long.sh | 2 -- tests/queries/0_stateless/01872_initial_query_start_time.sh | 2 -- tests/queries/0_stateless/02030_rocksdb_race_long.sh | 2 -- tests/queries/0_stateless/02151_hash_table_sizes_stats.sh | 2 -- .../0_stateless/02151_hash_table_sizes_stats_distributed.sh | 2 -- .../02377_extend_protocol_with_query_parameters.sh | 2 -- ...empty_blocks_from_ConvertingAggregatedToChunksTransform.sh | 2 -- tests/queries/0_stateless/02473_functions_in_readonly_mode.sh | 4 +--- .../0_stateless/02499_monotonicity_toUnixTimestamp64.sh | 2 -- .../queries/0_stateless/02681_final_excessive_reading_bug.sh | 2 -- tests/queries/1_stateful/00177_memory_bound_merging.sh | 2 -- 14 files changed, 1 insertion(+), 29 deletions(-) diff --git a/tests/queries/0_stateless/00474_readonly_settings.sh b/tests/queries/0_stateless/00474_readonly_settings.sh index 07b78c64a7e..9432579f9e6 100755 --- a/tests/queries/0_stateless/00474_readonly_settings.sh +++ b/tests/queries/0_stateless/00474_readonly_settings.sh @@ -1,7 +1,5 @@ #!/usr/bin/env bash -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh index 0dfec6097db..806da902a3c 100755 --- a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh +++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh @@ -3,8 +3,6 @@ set -ue -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01526_initial_query_id.sh b/tests/queries/0_stateless/01526_initial_query_id.sh index f9d739b57cd..e77764ee34e 100755 --- a/tests/queries/0_stateless/01526_initial_query_id.sh +++ b/tests/queries/0_stateless/01526_initial_query_id.sh @@ -1,8 +1,6 @@ #!/usr/bin/env bash set -ue -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01732_race_condition_storage_join_long.sh b/tests/queries/0_stateless/01732_race_condition_storage_join_long.sh index 5bb10220f7f..48e726aca9d 100755 --- a/tests/queries/0_stateless/01732_race_condition_storage_join_long.sh +++ b/tests/queries/0_stateless/01732_race_condition_storage_join_long.sh @@ -1,8 +1,6 @@ #!/usr/bin/env bash # Tags: race -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01872_initial_query_start_time.sh b/tests/queries/0_stateless/01872_initial_query_start_time.sh index dbfb89a05a1..6a935602ea4 100755 --- a/tests/queries/0_stateless/01872_initial_query_start_time.sh +++ b/tests/queries/0_stateless/01872_initial_query_start_time.sh @@ -3,8 +3,6 @@ set -ue # this test doesn't need 'current_database = currentDatabase()', -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02030_rocksdb_race_long.sh b/tests/queries/0_stateless/02030_rocksdb_race_long.sh index 88c30852c86..da31861991c 100755 --- a/tests/queries/0_stateless/02030_rocksdb_race_long.sh +++ b/tests/queries/0_stateless/02030_rocksdb_race_long.sh @@ -1,8 +1,6 @@ #!/usr/bin/env bash # Tags: race -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh b/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh index 4a1eea0a238..fd6e44577d9 100755 --- a/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh +++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh @@ -3,8 +3,6 @@ # shellcheck disable=SC2154 -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh b/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh index 237bbe9edd9..703b2c4357c 100755 --- a/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh +++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh @@ -5,8 +5,6 @@ # shellcheck disable=SC2154 -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh b/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh index e61dc337d2a..71e3b6961f8 100755 --- a/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh +++ b/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh @@ -2,8 +2,6 @@ # shellcheck disable=SC2154 -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02418_do_not_return_empty_blocks_from_ConvertingAggregatedToChunksTransform.sh b/tests/queries/0_stateless/02418_do_not_return_empty_blocks_from_ConvertingAggregatedToChunksTransform.sh index 08c7e18e12c..32693adff24 100755 --- a/tests/queries/0_stateless/02418_do_not_return_empty_blocks_from_ConvertingAggregatedToChunksTransform.sh +++ b/tests/queries/0_stateless/02418_do_not_return_empty_blocks_from_ConvertingAggregatedToChunksTransform.sh @@ -1,8 +1,6 @@ #!/usr/bin/env bash set -ue -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02473_functions_in_readonly_mode.sh b/tests/queries/0_stateless/02473_functions_in_readonly_mode.sh index 5e11704e6ce..da3429a1d3e 100755 --- a/tests/queries/0_stateless/02473_functions_in_readonly_mode.sh +++ b/tests/queries/0_stateless/02473_functions_in_readonly_mode.sh @@ -1,7 +1,5 @@ #!/usr/bin/env bash -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh @@ -11,4 +9,4 @@ $CLICKHOUSE_CLIENT --query="SELECT * from format('TSV', '123')" $CLICKHOUSE_CLIENT --readonly=1 --query="SELECT * from numbers(1)" $CLICKHOUSE_CLIENT --readonly=1 --query="SELECT * from format('TSV', '123')" 2>&1 | grep -Fq "Cannot execute query in readonly mode. (READONLY)" && echo 'ERROR' || echo 'OK' -$CLICKHOUSE_CLIENT --readonly=1 --query="INSERT INTO FUNCTION null('x String') (x) FORMAT TSV '123'" 2>&1 | grep -Fq "Cannot execute query in readonly mode. (READONLY)" && echo 'ERROR' || echo 'OK' \ No newline at end of file +$CLICKHOUSE_CLIENT --readonly=1 --query="INSERT INTO FUNCTION null('x String') (x) FORMAT TSV '123'" 2>&1 | grep -Fq "Cannot execute query in readonly mode. (READONLY)" && echo 'ERROR' || echo 'OK' diff --git a/tests/queries/0_stateless/02499_monotonicity_toUnixTimestamp64.sh b/tests/queries/0_stateless/02499_monotonicity_toUnixTimestamp64.sh index 5d787aa0d8e..59b6e2abb06 100755 --- a/tests/queries/0_stateless/02499_monotonicity_toUnixTimestamp64.sh +++ b/tests/queries/0_stateless/02499_monotonicity_toUnixTimestamp64.sh @@ -3,8 +3,6 @@ # shellcheck disable=SC2154 -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02681_final_excessive_reading_bug.sh b/tests/queries/0_stateless/02681_final_excessive_reading_bug.sh index a795b9ec5a0..120666d6156 100755 --- a/tests/queries/0_stateless/02681_final_excessive_reading_bug.sh +++ b/tests/queries/0_stateless/02681_final_excessive_reading_bug.sh @@ -3,8 +3,6 @@ # shellcheck disable=SC2154 -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/1_stateful/00177_memory_bound_merging.sh b/tests/queries/1_stateful/00177_memory_bound_merging.sh index 774f005b8eb..2c531b064db 100755 --- a/tests/queries/1_stateful/00177_memory_bound_merging.sh +++ b/tests/queries/1_stateful/00177_memory_bound_merging.sh @@ -2,8 +2,6 @@ # shellcheck disable=SC2154 -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh From a28728b7e537f26faf0c4785fc9fb96218178594 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 29 Jun 2023 17:40:10 +0300 Subject: [PATCH 1018/1997] Update DatabaseReplicatedWorker.cpp (cherry picked from commit 43fc1af1b3855652536037cc2b5111f5ae1ac983) --- src/Databases/DatabaseReplicatedWorker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index 593d0655777..4976f54e417 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -161,7 +161,7 @@ bool DatabaseReplicatedDDLWorker::waitForReplicaToProcessAllEntries(UInt64 timeo LOG_TRACE(log, "Waiting for worker thread to process all entries before {}, current task is {}", max_log, current_task); bool processed = wait_current_task_change.wait_for(lock, std::chrono::milliseconds(timeout_ms), [&]() { - return zookeeper->expired() || current_task == max_log || stop_flag; + return zookeeper->expired() || current_task >= max_log || stop_flag; }); if (!processed) From 5b21a58df47ff89811d73e6412522e2d659c1c62 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 29 Jun 2023 17:12:55 +0000 Subject: [PATCH 1019/1997] Do not apply PredicateExpressionsOptimizer for ASOF/ANTI join --- src/Interpreters/PredicateExpressionsOptimizer.cpp | 5 ++++- tests/queries/0_stateless/00976_asof_join_on.reference | 1 + tests/queries/0_stateless/00976_asof_join_on.sql | 5 +++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/PredicateExpressionsOptimizer.cpp b/src/Interpreters/PredicateExpressionsOptimizer.cpp index 6606e64f689..e64ff34b11f 100644 --- a/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -118,7 +118,10 @@ bool PredicateExpressionsOptimizer::tryRewritePredicatesToTables(ASTs & tables_e if (table_element->table_join && isLeft(table_element->table_join->as()->kind)) continue; /// Skip right table optimization - if (table_element->table_join && isFull(table_element->table_join->as()->kind)) + if (table_element->table_join && ( + isFull(table_element->table_join->as()->kind) + || table_element->table_join->as()->strictness == JoinStrictness::Asof + || table_element->table_join->as()->strictness == JoinStrictness::Anti)) break; /// Skip left and right table optimization is_rewrite_tables |= tryRewritePredicatesToTable(tables_element[table_pos], tables_predicates[table_pos], diff --git a/tests/queries/0_stateless/00976_asof_join_on.reference b/tests/queries/0_stateless/00976_asof_join_on.reference index 4d1b1273363..433d896426c 100644 --- a/tests/queries/0_stateless/00976_asof_join_on.reference +++ b/tests/queries/0_stateless/00976_asof_join_on.reference @@ -33,3 +33,4 @@ 1 3 1 4 2 1 2 3 2 2 2 3 +1 2 1 2 diff --git a/tests/queries/0_stateless/00976_asof_join_on.sql b/tests/queries/0_stateless/00976_asof_join_on.sql index 8060fb86831..afa125a9271 100644 --- a/tests/queries/0_stateless/00976_asof_join_on.sql +++ b/tests/queries/0_stateless/00976_asof_join_on.sql @@ -23,5 +23,10 @@ SELECT count() FROM A ASOF JOIN B ON A.a == B.b AND A.t != B.t; -- { serverError SELECT A.a, A.t, B.b, B.t FROM A ASOF JOIN B ON A.a == B.b AND A.t < B.t OR A.a == B.b + 1 ORDER BY (A.a, A.t); -- { serverError 48 } +SELECT A.a, A.t, B.b, B.t FROM A +ASOF INNER JOIN (SELECT * FROM B UNION ALL SELECT 1, 3) AS B ON B.t <= A.t AND A.a == B.b +WHERE B.t != 3 ORDER BY (A.a, A.t) +; + DROP TABLE A; DROP TABLE B; From f12c257dc95de8188e2be8a70344b0bd0b1f6204 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 29 Jun 2023 19:25:46 +0200 Subject: [PATCH 1020/1997] Fix --- src/Interpreters/Cache/FileCache.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index dc1f012f5c6..463d0d6605c 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1017,7 +1017,8 @@ void FileCache::deactivateBackgroundOperations() metadata.cancelDownload(); for (auto & thread : download_threads) - thread.join(); + if (thread.joinable()) + thread.join(); } void FileCache::cleanup() From 7d416cbc60c89ce34e03e7e9de5045e0120af119 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Thu, 29 Jun 2023 13:39:39 -0400 Subject: [PATCH 1021/1997] add exception docs for hasAll --- .../sql-reference/functions/array-functions.md | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index bdd1445c990..7f2b8f3c605 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -230,13 +230,15 @@ hasAll(set, subset) **Arguments** - `set` – Array of any type with a set of elements. -- `subset` – Array of any type with elements that should be tested to be a subset of `set`. +- `subset` – Array of any type that shares a common supertype with `set` containing elements that should be tested to be a subset of `set`. **Return values** - `1`, if `set` contains all of the elements from `subset`. - `0`, otherwise. +Raises an exception `NO_COMMON_TYPE` if the set and subset elements do not share a common supertype. + **Peculiar properties** - An empty array is a subset of any array. @@ -253,7 +255,7 @@ hasAll(set, subset) `SELECT hasAll(['a', 'b'], ['a'])` returns 1. -`SELECT hasAll([1], ['a'])` returns 0. +`SELECT hasAll([1], ['a'])` raises a `NO_COMMON_TYPE` exception. `SELECT hasAll([[1, 2], [3, 4]], [[1, 2], [3, 5]])` returns 0. @@ -268,13 +270,15 @@ hasAny(array1, array2) **Arguments** - `array1` – Array of any type with a set of elements. -- `array2` – Array of any type with a set of elements. +- `array2` – Array of any type that shares a common supertype with `array1`. **Return values** - `1`, if `array1` and `array2` have one similar element at least. - `0`, otherwise. +Raises an exception `NO_COMMON_TYPE` if the array1 and array2 elements do not share a common supertype. + **Peculiar properties** - `Null` processed as a value. @@ -288,7 +292,7 @@ hasAny(array1, array2) `SELECT hasAny([-128, 1., 512], [1])` returns `1`. -`SELECT hasAny([[1, 2], [3, 4]], ['a', 'c'])` returns `0`. +`SELECT hasAny([[1, 2], [3, 4]], ['a', 'c'])` raises a `NO_COMMON_TYPE` exception. `SELECT hasAll([[1, 2], [3, 4]], [[1, 2], [1, 2]])` returns `1`. @@ -318,6 +322,8 @@ For Example: - `1`, if `array1` contains `array2`. - `0`, otherwise. +Raises an exception `NO_COMMON_TYPE` if the array1 and array2 elements do not share a common supertype. + **Peculiar properties** - The function will return `1` if `array2` is empty. @@ -339,6 +345,9 @@ For Example: `SELECT hasSubstr(['a', 'b' , 'c'], ['a', 'c'])` returns 0. `SELECT hasSubstr([[1, 2], [3, 4], [5, 6]], [[1, 2], [3, 4]])` returns 1. +i +`SELECT hasSubstr([1, 2, NULL, 3, 4], ['a'])` raises a `NO_COMMON_TYPE` exception. + ## indexOf(arr, x) From a8172ca5d2557673d370de4f4551f756ff0722a7 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Thu, 29 Jun 2023 14:47:23 -0400 Subject: [PATCH 1022/1997] update spelling list --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 526e674a154..f25d082e5a6 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2264,6 +2264,7 @@ summap summingmergetree sumwithoverflow superaggregates +supertype supremum symlink symlinks From 90129b92c1dd47c0aa86cd5ed7c8107758a74b51 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 29 Jun 2023 21:19:41 +0200 Subject: [PATCH 1023/1997] Update 23.6 changelog --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a2e7b021081..c1e0dba4465 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,7 +21,7 @@ * Added `Overlay` database engine to combine multiple databases into one. Added `Filesystem` database engine to represent a directory in the filesystem as a set of implicitly available tables with auto-detected formats and structures. A new `S3` database engine allows to read-only interact with s3 storage by representing a prefix as a set of tables. A new `HDFS` database engine allows to interact with HDFS storage in the same way. [#48821](https://github.com/ClickHouse/ClickHouse/pull/48821) ([alekseygolub](https://github.com/alekseygolub)). * The function `transform` as well as `CASE` with value matching started to support all data types. This closes [#29730](https://github.com/ClickHouse/ClickHouse/issues/29730). This closes [#32387](https://github.com/ClickHouse/ClickHouse/issues/32387). This closes [#50827](https://github.com/ClickHouse/ClickHouse/issues/50827). This closes [#31336](https://github.com/ClickHouse/ClickHouse/issues/31336). This closes [#40493](https://github.com/ClickHouse/ClickHouse/issues/40493). [#51351](https://github.com/ClickHouse/ClickHouse/pull/51351) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Added option `--rename_files_after_processing `. This closes [#34207](https://github.com/ClickHouse/ClickHouse/issues/34207). [#49626](https://github.com/ClickHouse/ClickHouse/pull/49626) ([alekseygolub](https://github.com/alekseygolub)). -* Add support for `APPEND` modifier in `INTO OUTFILE` clause. Suggest using `APPEND` or `TRUNCATE` for `INTO OUTFILE` when file exists. [#50950](https://github.com/ClickHouse/ClickHouse/pull/50950) ([alekar](https://github.com/alekar)). +* Add support for `TRUNCATE` modifier in `INTO OUTFILE` clause. Suggest using `APPEND` or `TRUNCATE` for `INTO OUTFILE` when file exists. [#50950](https://github.com/ClickHouse/ClickHouse/pull/50950) ([alekar](https://github.com/alekar)). * Add table engine `Redis` and table function `redis`. It allows querying external Redis servers. [#50150](https://github.com/ClickHouse/ClickHouse/pull/50150) ([JackyWoo](https://github.com/JackyWoo)). * Allow to skip empty files in file/s3/url/hdfs table functions using settings `s3_skip_empty_files`, `hdfs_skip_empty_files`, `engine_file_skip_empty_files`, `engine_url_skip_empty_files`. [#50364](https://github.com/ClickHouse/ClickHouse/pull/50364) ([Kruglov Pavel](https://github.com/Avogar)). * Add a new setting named `use_mysql_types_in_show_columns` to alter the `SHOW COLUMNS` SQL statement to display MySQL equivalent types when a client is connected via the MySQL compatibility port. [#49577](https://github.com/ClickHouse/ClickHouse/pull/49577) ([Thomas Panetti](https://github.com/tpanetti)). @@ -40,12 +40,12 @@ * Make multiple list requests to ZooKeeper in parallel to speed up reading from system.zookeeper table. [#51042](https://github.com/ClickHouse/ClickHouse/pull/51042) ([Alexander Gololobov](https://github.com/davenger)). * Speedup initialization of DateTime lookup tables for time zones. This should reduce startup/connect time of clickhouse-client especially in debug build as it is rather heavy. [#51347](https://github.com/ClickHouse/ClickHouse/pull/51347) ([Alexander Gololobov](https://github.com/davenger)). * Fix data lakes slowness because of synchronous head requests. (Related to Iceberg/Deltalake/Hudi being slow with a lot of files). [#50976](https://github.com/ClickHouse/ClickHouse/pull/50976) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Do not replicate `ALTER PARTITION` queries and mutations through `Replicated` database if it has only one shard and the underlying table is `ReplicatedMergeTree`. [#51049](https://github.com/ClickHouse/ClickHouse/pull/51049) ([Alexander Tokmakov](https://github.com/tavplubix)). * Do not read all the columns from right GLOBAL JOIN table. [#50721](https://github.com/ClickHouse/ClickHouse/pull/50721) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). #### Experimental Feature * Support parallel replicas with the analyzer. [#50441](https://github.com/ClickHouse/ClickHouse/pull/50441) ([Raúl Marín](https://github.com/Algunenano)). * Add random sleep before large merges/mutations execution to split load more evenly between replicas in case of zero-copy replication. [#51282](https://github.com/ClickHouse/ClickHouse/pull/51282) ([alesapin](https://github.com/alesapin)). +* Do not replicate `ALTER PARTITION` queries and mutations through `Replicated` database if it has only one shard and the underlying table is `ReplicatedMergeTree`. [#51049](https://github.com/ClickHouse/ClickHouse/pull/51049) ([Alexander Tokmakov](https://github.com/tavplubix)). #### Improvement * Relax the thresholds for "too many parts" to be more modern. Return the backpressure during long-running insert queries. [#50856](https://github.com/ClickHouse/ClickHouse/pull/50856) ([Alexey Milovidov](https://github.com/alexey-milovidov)). From 49ab480d40f268df1e597dfe14426eb5416a5fd6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 29 Jun 2023 23:09:58 +0300 Subject: [PATCH 1024/1997] Update 00416_pocopatch_progress_in_http_headers.sh --- .../0_stateless/00416_pocopatch_progress_in_http_headers.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh index ad7e89a7357..2b0cae3c1d4 100755 --- a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh +++ b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh @@ -9,7 +9,7 @@ RETRIES=5 result="" lines_expected=4 counter=0 -while [ $counter -lt $RETRIES ] && [ $(echo "$result" | wc -l) != "$lines_expected" ]; do +while [ $counter -lt $RETRIES ] && [ "$(echo "$result" | wc -l)" != "$lines_expected" ]; do result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=5&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT max(number) FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') let counter=counter+1 done @@ -18,7 +18,7 @@ echo "$result" result="" lines_expected=12 counter=0 -while [ $counter -lt $RETRIES ] && [ $(echo "$result" | wc -l) != "$lines_expected" ]; do +while [ $counter -lt $RETRIES ] && [ "$(echo "$result" | wc -l)" != "$lines_expected" ]; do result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') let counter=counter+1 done From f48de18640467e6302c1bd48799e386d4aa39437 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 29 Jun 2023 20:59:01 +0000 Subject: [PATCH 1025/1997] Update version_date.tsv and changelogs after v23.4.5.22-stable --- docs/changelogs/v23.4.5.22-stable.md | 27 +++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 2 ++ 2 files changed, 29 insertions(+) create mode 100644 docs/changelogs/v23.4.5.22-stable.md diff --git a/docs/changelogs/v23.4.5.22-stable.md b/docs/changelogs/v23.4.5.22-stable.md new file mode 100644 index 00000000000..2d61f5b11cf --- /dev/null +++ b/docs/changelogs/v23.4.5.22-stable.md @@ -0,0 +1,27 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.4.5.22-stable (0ced5d6a8da) FIXME as compared to v23.4.4.16-stable (747ba4fc6a0) + +#### Build/Testing/Packaging Improvement +* Backported in [#51530](https://github.com/ClickHouse/ClickHouse/issues/51530): Split huge `RUN` in Dockerfile into smaller conditional. Install the necessary tools on demand in the same `RUN` layer, and remove them after that. Upgrade the OS only once at the beginning. Use a modern way to check the signed repository. Downgrade the base repo to ubuntu:20.04 to address the issues on older docker versions. Upgrade golang version to address golang vulnerabilities. [#51504](https://github.com/ClickHouse/ClickHouse/pull/51504) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#51570](https://github.com/ClickHouse/ClickHouse/issues/51570): This a follow-up for [#51504](https://github.com/ClickHouse/ClickHouse/issues/51504), the cleanup was lost during refactoring. [#51564](https://github.com/ClickHouse/ClickHouse/pull/51564) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix broken index analysis when binary operator contains a null constant argument [#50177](https://github.com/ClickHouse/ClickHouse/pull/50177) ([Amos Bird](https://github.com/amosbird)). +* Fix reconnecting of HTTPS session when target host IP was changed [#50240](https://github.com/ClickHouse/ClickHouse/pull/50240) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Fix incorrect constant folding [#50536](https://github.com/ClickHouse/ClickHouse/pull/50536) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix type of LDAP server params hash in cache entry [#50865](https://github.com/ClickHouse/ClickHouse/pull/50865) ([Julian Maicher](https://github.com/jmaicher)). +* Fallback to parsing big integer from String instead of exception in Parquet format [#50873](https://github.com/ClickHouse/ClickHouse/pull/50873) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not apply projection if read-in-order was enabled. [#50923](https://github.com/ClickHouse/ClickHouse/pull/50923) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix fuzzer failure in ActionsDAG [#51301](https://github.com/ClickHouse/ClickHouse/pull/51301) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Increase max array size in group bitmap [#50620](https://github.com/ClickHouse/ClickHouse/pull/50620) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 5c8dd0d2481..4ca5d1d7497 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -2,10 +2,12 @@ v23.5.4.25-stable 2023-06-29 v23.5.3.24-stable 2023-06-17 v23.5.2.7-stable 2023-06-10 v23.5.1.3174-stable 2023-06-09 +v23.4.5.22-stable 2023-06-29 v23.4.4.16-stable 2023-06-17 v23.4.3.48-stable 2023-06-12 v23.4.2.11-stable 2023-05-02 v23.4.1.1943-stable 2023-04-27 +v23.3.7.5-lts 2023-06-29 v23.3.6.7-lts 2023-06-28 v23.3.5.9-lts 2023-06-22 v23.3.4.17-lts 2023-06-17 From a0bf1708837667c1f3d00dded295ec5ade7a4ac9 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 29 Jun 2023 21:01:19 +0000 Subject: [PATCH 1026/1997] Update version_date.tsv and changelogs after v23.3.7.5-lts --- docs/changelogs/v23.3.7.5-lts.md | 16 ++++++++++++++++ utils/list-versions/version_date.tsv | 2 ++ 2 files changed, 18 insertions(+) create mode 100644 docs/changelogs/v23.3.7.5-lts.md diff --git a/docs/changelogs/v23.3.7.5-lts.md b/docs/changelogs/v23.3.7.5-lts.md new file mode 100644 index 00000000000..7a5fd5a19b6 --- /dev/null +++ b/docs/changelogs/v23.3.7.5-lts.md @@ -0,0 +1,16 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.7.5-lts (bc683c11c92) FIXME as compared to v23.3.6.7-lts (7e3f0a271b7) + +#### Build/Testing/Packaging Improvement +* Backported in [#51568](https://github.com/ClickHouse/ClickHouse/issues/51568): This a follow-up for [#51504](https://github.com/ClickHouse/ClickHouse/issues/51504), the cleanup was lost during refactoring. [#51564](https://github.com/ClickHouse/ClickHouse/pull/51564) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix fuzzer failure in ActionsDAG [#51301](https://github.com/ClickHouse/ClickHouse/pull/51301) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 5c8dd0d2481..4ca5d1d7497 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -2,10 +2,12 @@ v23.5.4.25-stable 2023-06-29 v23.5.3.24-stable 2023-06-17 v23.5.2.7-stable 2023-06-10 v23.5.1.3174-stable 2023-06-09 +v23.4.5.22-stable 2023-06-29 v23.4.4.16-stable 2023-06-17 v23.4.3.48-stable 2023-06-12 v23.4.2.11-stable 2023-05-02 v23.4.1.1943-stable 2023-04-27 +v23.3.7.5-lts 2023-06-29 v23.3.6.7-lts 2023-06-28 v23.3.5.9-lts 2023-06-22 v23.3.4.17-lts 2023-06-17 From 3c4491b706e0cbd89086db845eb582e1227f3a74 Mon Sep 17 00:00:00 2001 From: Manas Alekar Date: Thu, 29 Jun 2023 14:31:40 -0700 Subject: [PATCH 1027/1997] Ignore APPEND and TRUNCATE modifiers if file does not exist. --- src/Client/ClientBase.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 34b3b1e228a..a8bdc5d0b08 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -575,9 +575,11 @@ try } auto flags = O_WRONLY | O_EXCL; - if (query_with_output->is_outfile_append) + + auto file_exists = fs::exists(out_file); + if (file_exists && query_with_output->is_outfile_append) flags |= O_APPEND; - else if (query_with_output->is_outfile_truncate) + else if (file_exists && query_with_output->is_outfile_truncate) flags |= O_TRUNC; else flags |= O_CREAT; From 42febefa966e89089065ecb6c7691731de4dde5c Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 29 Jun 2023 21:34:40 +0000 Subject: [PATCH 1028/1997] Try to fix flaky 02210_processors_profile_log --- tests/queries/0_stateless/02210_processors_profile_log.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02210_processors_profile_log.sql b/tests/queries/0_stateless/02210_processors_profile_log.sql index 44e563ef57b..92f6ab94293 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log.sql +++ b/tests/queries/0_stateless/02210_processors_profile_log.sql @@ -15,7 +15,7 @@ SELECT multiIf( -- ExpressionTransform executes sleep(), -- so IProcessor::work() will spend 1 sec. - name = 'ExpressionTransform', elapsed_us>1e6, + name = 'ExpressionTransform', elapsed_us>=1e6, -- SourceFromSingleChunk, that feed data to ExpressionTransform, -- will feed first block and then wait in PortFull. name = 'SourceFromSingleChunk', output_wait_elapsed_us>1e6, From a705b08bd81658e878d7b7d214b057c661bbed69 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 29 Jun 2023 22:30:15 +0000 Subject: [PATCH 1029/1997] Update reference --- .../0_stateless/02210_processors_profile_log.reference | 6 +++--- tests/queries/0_stateless/02210_processors_profile_log.sql | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02210_processors_profile_log.reference b/tests/queries/0_stateless/02210_processors_profile_log.reference index 181022d2421..41543d0706a 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log.reference +++ b/tests/queries/0_stateless/02210_processors_profile_log.reference @@ -18,13 +18,13 @@ SELECT multiIf( -- ExpressionTransform executes sleep(), -- so IProcessor::work() will spend 1 sec. - name = 'ExpressionTransform', elapsed_us>1e6, + name = 'ExpressionTransform', elapsed_us>=1e6, -- SourceFromSingleChunk, that feed data to ExpressionTransform, -- will feed first block and then wait in PortFull. - name = 'SourceFromSingleChunk', output_wait_elapsed_us>1e6, + name = 'SourceFromSingleChunk', output_wait_elapsed_us>=1e6, -- NullSource/LazyOutputFormatLazyOutputFormat are the outputs -- so they cannot starts to execute before sleep(1) will be executed. - input_wait_elapsed_us>1e6) + input_wait_elapsed_us>=1e6) elapsed, input_rows, input_bytes, diff --git a/tests/queries/0_stateless/02210_processors_profile_log.sql b/tests/queries/0_stateless/02210_processors_profile_log.sql index 92f6ab94293..a15ed26fd67 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log.sql +++ b/tests/queries/0_stateless/02210_processors_profile_log.sql @@ -18,10 +18,10 @@ SELECT name = 'ExpressionTransform', elapsed_us>=1e6, -- SourceFromSingleChunk, that feed data to ExpressionTransform, -- will feed first block and then wait in PortFull. - name = 'SourceFromSingleChunk', output_wait_elapsed_us>1e6, + name = 'SourceFromSingleChunk', output_wait_elapsed_us>=1e6, -- NullSource/LazyOutputFormatLazyOutputFormat are the outputs -- so they cannot starts to execute before sleep(1) will be executed. - input_wait_elapsed_us>1e6) + input_wait_elapsed_us>=1e6) elapsed, input_rows, input_bytes, From fd5f6ee2f99989f9ab5c80ecbce1a0b6ab7ae109 Mon Sep 17 00:00:00 2001 From: Ramazan Polat Date: Fri, 30 Jun 2023 04:32:41 +0300 Subject: [PATCH 1030/1997] Update parts.md Fix misalignment of `active` --- docs/en/operations/system-tables/parts.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md index 9159d1e9284..5829e5ad313 100644 --- a/docs/en/operations/system-tables/parts.md +++ b/docs/en/operations/system-tables/parts.md @@ -27,7 +27,7 @@ Columns: Data storing format is controlled by the `min_bytes_for_wide_part` and `min_rows_for_wide_part` settings of the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table. - - `active` ([UInt8](../../sql-reference/data-types/int-uint.md)) – Flag that indicates whether the data part is active. If a data part is active, it’s used in a table. Otherwise, it’s deleted. Inactive data parts remain after merging. +- `active` ([UInt8](../../sql-reference/data-types/int-uint.md)) – Flag that indicates whether the data part is active. If a data part is active, it’s used in a table. Otherwise, it’s deleted. Inactive data parts remain after merging. - `marks` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The number of marks. To get the approximate number of rows in a data part, multiply `marks` by the index granularity (usually 8192) (this hint does not work for adaptive granularity). From 901089bfab56e3d3cd905abf671ca1d774634a4d Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Fri, 30 Jun 2023 05:15:41 +0000 Subject: [PATCH 1031/1997] Remove comment --- src/Common/ProgressIndication.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp index 29766fd1a14..c9068720f8f 100644 --- a/src/Common/ProgressIndication.cpp +++ b/src/Common/ProgressIndication.cpp @@ -166,8 +166,6 @@ void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message) if (memory_usage > 0) profiling_msg_builder << ", " << formatReadableSizeWithDecimalSuffix(memory_usage) << " RAM"; - // profiling_msg_builder << ", " << formatReadableSizeWithDecimalSuffix(memory_usage) << ", " - // << formatReadableSizeWithDecimalSuffix(peak_usage) << " Peak RAM"; if (max_host_usage < memory_usage) profiling_msg_builder << ", " << formatReadableSizeWithDecimalSuffix(max_host_usage) << " max/host"; From f497ba88c2ea5a0088d955ceec78e271a8bf2e94 Mon Sep 17 00:00:00 2001 From: Alex Cheng Date: Fri, 30 Jun 2023 14:52:47 +0800 Subject: [PATCH 1032/1997] correct an exception message. --- src/Functions/nested.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/nested.cpp b/src/Functions/nested.cpp index 0a094176a55..679bb4f73d8 100644 --- a/src/Functions/nested.cpp +++ b/src/Functions/nested.cpp @@ -119,7 +119,7 @@ public: if (!lhs_array->hasEqualOffsets(*rhs_array)) throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, - "The argument 1 and argument {} of function {} have different array offsets", + "The argument 2 and argument {} of function {} have different array offsets", i + 1, getName()); From 710911f7dc18ecaa5a400d4efe6587a9b656d937 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 30 Jun 2023 11:39:08 +0300 Subject: [PATCH 1033/1997] Update CHANGELOG.md --- CHANGELOG.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c1e0dba4465..bf6b309ef2c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,6 @@ * CGroups metrics related to CPU are replaced with one metric, `CGroupMaxCPU` for better usability. The `Normalized` CPU usage metrics will be normalized to CGroups limits instead of the total number of CPUs when they are set. This closes [#50836](https://github.com/ClickHouse/ClickHouse/issues/50836). [#50835](https://github.com/ClickHouse/ClickHouse/pull/50835) ([Alexey Milovidov](https://github.com/alexey-milovidov)). #### New Feature -* Added `Overlay` database engine to combine multiple databases into one. Added `Filesystem` database engine to represent a directory in the filesystem as a set of implicitly available tables with auto-detected formats and structures. A new `S3` database engine allows to read-only interact with s3 storage by representing a prefix as a set of tables. A new `HDFS` database engine allows to interact with HDFS storage in the same way. [#48821](https://github.com/ClickHouse/ClickHouse/pull/48821) ([alekseygolub](https://github.com/alekseygolub)). * The function `transform` as well as `CASE` with value matching started to support all data types. This closes [#29730](https://github.com/ClickHouse/ClickHouse/issues/29730). This closes [#32387](https://github.com/ClickHouse/ClickHouse/issues/32387). This closes [#50827](https://github.com/ClickHouse/ClickHouse/issues/50827). This closes [#31336](https://github.com/ClickHouse/ClickHouse/issues/31336). This closes [#40493](https://github.com/ClickHouse/ClickHouse/issues/40493). [#51351](https://github.com/ClickHouse/ClickHouse/pull/51351) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Added option `--rename_files_after_processing `. This closes [#34207](https://github.com/ClickHouse/ClickHouse/issues/34207). [#49626](https://github.com/ClickHouse/ClickHouse/pull/49626) ([alekseygolub](https://github.com/alekseygolub)). * Add support for `TRUNCATE` modifier in `INTO OUTFILE` clause. Suggest using `APPEND` or `TRUNCATE` for `INTO OUTFILE` when file exists. [#50950](https://github.com/ClickHouse/ClickHouse/pull/50950) ([alekar](https://github.com/alekar)). @@ -26,7 +25,7 @@ * Allow to skip empty files in file/s3/url/hdfs table functions using settings `s3_skip_empty_files`, `hdfs_skip_empty_files`, `engine_file_skip_empty_files`, `engine_url_skip_empty_files`. [#50364](https://github.com/ClickHouse/ClickHouse/pull/50364) ([Kruglov Pavel](https://github.com/Avogar)). * Add a new setting named `use_mysql_types_in_show_columns` to alter the `SHOW COLUMNS` SQL statement to display MySQL equivalent types when a client is connected via the MySQL compatibility port. [#49577](https://github.com/ClickHouse/ClickHouse/pull/49577) ([Thomas Panetti](https://github.com/tpanetti)). * Clickhouse-client can now be called with a connection string instead of "--host", "--port", "--user" etc. [#50689](https://github.com/ClickHouse/ClickHouse/pull/50689) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Add setting `session_timezone`, it is used as default timezone for session when not explicitly specified. [#44149](https://github.com/ClickHouse/ClickHouse/pull/44149) ([Andrey Zvonov](https://github.com/zvonand)). +* Add setting `session_timezone`; it is used as the default timezone for a session when not explicitly specified. [#44149](https://github.com/ClickHouse/ClickHouse/pull/44149) ([Andrey Zvonov](https://github.com/zvonand)). * Codec DEFLATE_QPL is now controlled via server setting "enable_deflate_qpl_codec" (default: false) instead of setting "allow_experimental_codecs". This marks DEFLATE_QPL non-experimental. [#50775](https://github.com/ClickHouse/ClickHouse/pull/50775) ([Robert Schulze](https://github.com/rschu1ze)). #### Performance Improvement From 8f240ffcce6230636de57fe8a8638df3a29ac5e3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 30 Jun 2023 10:50:44 +0200 Subject: [PATCH 1034/1997] tests: fix 02050_client_profile_events flakiness Signed-off-by: Azat Khuzhin --- tests/queries/0_stateless/02050_client_profile_events.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02050_client_profile_events.sh b/tests/queries/0_stateless/02050_client_profile_events.sh index dce0c80525a..05e48de771d 100755 --- a/tests/queries/0_stateless/02050_client_profile_events.sh +++ b/tests/queries/0_stateless/02050_client_profile_events.sh @@ -25,7 +25,7 @@ profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events - test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)" echo 'print each 100 ms' -profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events --profile-events-delay-ms=100 -q 'select sleep(1) from numbers(2) format Null' |& grep -c 'SelectedRows')" +profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events --profile-events-delay-ms=100 -q 'select sleep(0.2) from numbers(10) format Null' |& grep -c 'SelectedRows')" test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)" echo 'check that ProfileEvents is new for each query' From fe93e687de7f2be7796e1034a553fa9f01040e5d Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 30 Jun 2023 10:02:24 +0200 Subject: [PATCH 1035/1997] Add comments. --- base/base/hex.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/base/base/hex.h b/base/base/hex.h index 0780e6159a1..937218fec5a 100644 --- a/base/base/hex.h +++ b/base/base/hex.h @@ -176,7 +176,7 @@ namespace impl }; /// Helper template class to convert a value of any supported type to hexadecimal representation and back. - template + template struct HexConversion; template @@ -185,7 +185,7 @@ namespace impl template struct HexConversion> : public HexConversionUInt> {}; - template + template /// Partial specialization here allows not to include in this header. struct HexConversion>> { static const constexpr size_t num_hex_digits = 32; @@ -208,6 +208,8 @@ namespace impl /// Produces a hexadecimal representation of an integer value with leading zeros (for checksums). /// The function supports native integer types, wide::integer, CityHash_v1_0_2::uint128. +/// It can be used with signed types as well, however they are written as corresponding unsigned numbers +/// using two's complement (i.e. for example "-1" is written as "0xFF", not as "-0x01"). template void writeHexUIntUppercase(const T & value, char * out) { From 8af96f7a177c02edc8425bc7bcd1aa9dad6f086c Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Fri, 30 Jun 2023 09:49:29 +0000 Subject: [PATCH 1036/1997] Add uuid to uint128 converting --- src/Functions/FunctionsConversion.h | 15 +++++++++++++++ .../02810_convert_uuid_to_uint128.reference | 6 ++++++ .../0_stateless/02810_convert_uuid_to_uint128.sql | 8 ++++++++ 3 files changed, 29 insertions(+) create mode 100644 tests/queries/0_stateless/02810_convert_uuid_to_uint128.reference create mode 100644 tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 00c4cfe7284..a79fd6c2e1b 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -203,6 +203,21 @@ struct ConvertImpl } } + if constexpr (std::is_same_v && std::is_same_v) + { + static_assert(std::is_same_v, "Can be used only to serialize to ColumnString or ColumnFixedString"); + if constexpr (std::endian::native == std::endian::little) + { + vec_to[i].items[1] = vec_from[i].toUnderType().items[0]; + vec_to[i].items[0] = vec_from[i].toUnderType().items[1]; + } + else + { + vec_to[i] = vec_from[i].toUnderType(); + } + continue; + } + if constexpr (std::is_same_v != std::is_same_v) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, diff --git a/tests/queries/0_stateless/02810_convert_uuid_to_uint128.reference b/tests/queries/0_stateless/02810_convert_uuid_to_uint128.reference new file mode 100644 index 00000000000..3b44d4ba086 --- /dev/null +++ b/tests/queries/0_stateless/02810_convert_uuid_to_uint128.reference @@ -0,0 +1,6 @@ +0 +329871470813054077831677335124932328170 +340282366920938463463374607431768211455 +329871470813054077831677335124932328170 +329871470813054077831677335124932328170 +329871470813054077831677335124932328170 diff --git a/tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql b/tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql new file mode 100644 index 00000000000..5350ef99ed3 --- /dev/null +++ b/tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql @@ -0,0 +1,8 @@ +SELECT toUInt128(toUUID('00000000-0000-0000-0000-000000000000')); +SELECT toUInt128(toUUID('f82aef31-279e-431f-8b00-2899ad387aea')); +SELECT toUInt128(toUUID('ffffffff-ffff-ffff-ffff-ffffffffffff')); +SELECT toUInt64(toUUID('00000000-0000-0000-0000-000000000000')); -- { serverError NOT_IMPLEMENTED } +SELECT toInt128(toUUID('00000000-0000-0000-0000-000000000000')); -- { serverError NOT_IMPLEMENTED } +SELECT cast(toUUID('f82aef31-279e-431f-8b00-2899ad387aea'), 'UInt128'); +select accurateCast(toUUID('f82aef31-279e-431f-8b00-2899ad387aea'), 'UInt128'); +select toUUID('f82aef31-279e-431f-8b00-2899ad387aea')::UInt128; \ No newline at end of file From 2b55734ccf533a4fd9180b57672bc35823ed66d0 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Fri, 30 Jun 2023 02:52:03 -0700 Subject: [PATCH 1037/1997] Fix MergeTreeMarksLoader segfaulting if marks file is longer than expected (#51636) Co-authored-by: Nikita Mikhaylov --- .../MergeTree/MergeTreeMarksLoader.cpp | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp index 9a5576f0ad2..5c722eec380 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp @@ -135,6 +135,7 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl() if (!index_granularity_info.mark_type.adaptive) { /// Read directly to marks. + chassert(expected_uncompressed_size == plain_marks.size() * sizeof(MarkInCompressedFile)); reader->readStrict(reinterpret_cast(plain_marks.data()), expected_uncompressed_size); if (!reader->eof()) @@ -148,23 +149,25 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl() } else { - size_t i = 0; - size_t granularity; - while (!reader->eof()) + for (size_t i = 0; i < marks_count; ++i) { + if (reader->eof()) + throw Exception( + ErrorCodes::CANNOT_READ_ALL_DATA, + "Cannot read all marks from file {}, marks expected {} (bytes size {}), marks read {} (bytes size {})", + mrk_path, marks_count, expected_uncompressed_size, i, reader->count()); + + size_t granularity; reader->readStrict( reinterpret_cast(plain_marks.data() + i * columns_in_mark), columns_in_mark * sizeof(MarkInCompressedFile)); readIntBinary(granularity, *reader); - ++i; } - if (i * mark_size != expected_uncompressed_size) - { + if (!reader->eof()) throw Exception( ErrorCodes::CANNOT_READ_ALL_DATA, - "Cannot read all marks from file {}, marks expected {} (bytes size {}), marks read {} (bytes size {})", - mrk_path, marks_count, expected_uncompressed_size, i, reader->count()); - } + "Too many marks in file {}, marks expected {} (bytes size {})", + mrk_path, marks_count, expected_uncompressed_size); } auto res = std::make_shared(plain_marks); From 407a7e3cc50633f6ac73cbda6412db0ec28286f5 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Fri, 30 Jun 2023 09:55:55 +0000 Subject: [PATCH 1038/1997] Edit assert message --- src/Functions/FunctionsConversion.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index a79fd6c2e1b..b272e88d17d 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -205,7 +205,7 @@ struct ConvertImpl if constexpr (std::is_same_v && std::is_same_v) { - static_assert(std::is_same_v, "Can be used only to serialize to ColumnString or ColumnFixedString"); + static_assert(std::is_same_v, "UInt128 and UUID types must be same"); if constexpr (std::endian::native == std::endian::little) { vec_to[i].items[1] = vec_from[i].toUnderType().items[0]; From b0d4c9c83b210f9d266cc448df4641a07fdfb08c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Jun 2023 03:04:48 +0200 Subject: [PATCH 1039/1997] Disable hedged requests under TSan --- src/Core/SettingsQuirks.cpp | 9 ++- .../integration/test_hedged_requests/test.py | 75 +++++++++++++++++++ .../test_hedged_requests_parallel/test.py | 25 +++++++ tests/integration/test_secure_socket/test.py | 5 ++ ...851_hedged_connections_external_tables.sql | 1 + 5 files changed, 114 insertions(+), 1 deletion(-) diff --git a/src/Core/SettingsQuirks.cpp b/src/Core/SettingsQuirks.cpp index 3326f42adf5..5b981c71403 100644 --- a/src/Core/SettingsQuirks.cpp +++ b/src/Core/SettingsQuirks.cpp @@ -1,10 +1,11 @@ +#include #include #include #include #include #include #include -#include + namespace { @@ -71,6 +72,12 @@ void applySettingsQuirks(Settings & settings, Poco::Logger * log) } } +#if defined(THREAD_SANITIZER) + settings.use_hedged_requests = false; + if (log) + LOG_WARNING(log, "use_hedged_requests has been disabled for the build with Thread Sanitizer, because they are using fibers, leading to a failed assertion inside TSan"); +#endif + if (!queryProfilerWorks()) { if (settings.query_profiler_real_time_period_ns) diff --git a/tests/integration/test_hedged_requests/test.py b/tests/integration/test_hedged_requests/test.py index be6cea80f87..5de92f437c5 100644 --- a/tests/integration/test_hedged_requests/test.py +++ b/tests/integration/test_hedged_requests/test.py @@ -203,6 +203,11 @@ def update_configs( def test_stuck_replica(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + update_configs() cluster.pause_container("node_1") @@ -233,6 +238,11 @@ def test_stuck_replica(started_cluster): def test_long_query(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + update_configs() # Restart to reset pool states. @@ -249,12 +259,22 @@ def test_long_query(started_cluster): def test_send_table_status_sleep(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + update_configs(node_1_sleep_in_send_tables_status=sleep_time) check_query(expected_replica="node_2") check_changing_replica_events(1) def test_send_table_status_sleep2(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + update_configs( node_1_sleep_in_send_tables_status=sleep_time, node_2_sleep_in_send_tables_status=sleep_time, @@ -264,12 +284,22 @@ def test_send_table_status_sleep2(started_cluster): def test_send_data(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + update_configs(node_1_sleep_in_send_data=sleep_time) check_query(expected_replica="node_2") check_changing_replica_events(1) def test_send_data2(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + update_configs( node_1_sleep_in_send_data=sleep_time, node_2_sleep_in_send_data=sleep_time ) @@ -278,6 +308,11 @@ def test_send_data2(started_cluster): def test_combination1(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + update_configs( node_1_sleep_in_send_tables_status=sleep_time, node_2_sleep_in_send_data=sleep_time, @@ -287,6 +322,11 @@ def test_combination1(started_cluster): def test_combination2(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + update_configs( node_1_sleep_in_send_data=sleep_time, node_2_sleep_in_send_tables_status=sleep_time, @@ -296,6 +336,11 @@ def test_combination2(started_cluster): def test_combination3(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + update_configs( node_1_sleep_in_send_data=sleep_time, node_2_sleep_in_send_tables_status=1000, @@ -306,6 +351,11 @@ def test_combination3(started_cluster): def test_combination4(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + update_configs( node_1_sleep_in_send_tables_status=1000, node_1_sleep_in_send_data=sleep_time, @@ -317,6 +367,11 @@ def test_combination4(started_cluster): def test_receive_timeout1(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + # Check the situation when first two replicas get receive timeout # in establishing connection, but the third replica is ok. update_configs( @@ -329,6 +384,11 @@ def test_receive_timeout1(started_cluster): def test_receive_timeout2(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + # Check the situation when first replica get receive timeout # in packet receiving but there are replicas in process of # connection establishing. @@ -342,6 +402,11 @@ def test_receive_timeout2(started_cluster): def test_initial_receive_timeout(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + # Check the situation when replicas don't respond after # receiving query (so, no packets were send to initiator) update_configs( @@ -360,6 +425,11 @@ def test_initial_receive_timeout(started_cluster): def test_async_connect(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + update_configs() NODES["node"].restart_clickhouse() @@ -390,6 +460,11 @@ def test_async_connect(started_cluster): def test_async_query_sending(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + update_configs( node_1_sleep_after_receiving_query=5000, node_2_sleep_after_receiving_query=5000, diff --git a/tests/integration/test_hedged_requests_parallel/test.py b/tests/integration/test_hedged_requests_parallel/test.py index 492b869614f..34cad7bb754 100644 --- a/tests/integration/test_hedged_requests_parallel/test.py +++ b/tests/integration/test_hedged_requests_parallel/test.py @@ -172,6 +172,11 @@ def update_configs( def test_send_table_status_sleep(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + update_configs( node_1_sleep_in_send_tables_status=sleep_time, node_2_sleep_in_send_tables_status=sleep_time, @@ -181,6 +186,11 @@ def test_send_table_status_sleep(started_cluster): def test_send_data(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + update_configs( node_1_sleep_in_send_data=sleep_time, node_2_sleep_in_send_data=sleep_time ) @@ -189,6 +199,11 @@ def test_send_data(started_cluster): def test_combination1(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + update_configs( node_1_sleep_in_send_tables_status=1000, node_2_sleep_in_send_tables_status=1000, @@ -199,6 +214,11 @@ def test_combination1(started_cluster): def test_combination2(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + update_configs( node_1_sleep_in_send_data=sleep_time, node_2_sleep_in_send_tables_status=1000, @@ -210,6 +230,11 @@ def test_combination2(started_cluster): def test_query_with_no_data_to_sample(started_cluster): + if NODES["node"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + update_configs( node_1_sleep_in_send_data=sleep_time, node_2_sleep_in_send_data=sleep_time ) diff --git a/tests/integration/test_secure_socket/test.py b/tests/integration/test_secure_socket/test.py index 2dffbed03d6..827e4b81cfc 100644 --- a/tests/integration/test_secure_socket/test.py +++ b/tests/integration/test_secure_socket/test.py @@ -58,6 +58,11 @@ def test(started_cluster): config.format(sleep_in_send_data_ms=1000000), ) + if NODES["node1"].is_built_with_thread_sanitizer(): + pytest.skip( + "Hedged requests don't work under Thread Sanitizer" + ) + attempts = 0 while attempts < 1000: setting = NODES["node2"].http_query( diff --git a/tests/queries/0_stateless/01851_hedged_connections_external_tables.sql b/tests/queries/0_stateless/01851_hedged_connections_external_tables.sql index c4625720e59..22888d5e68c 100644 --- a/tests/queries/0_stateless/01851_hedged_connections_external_tables.sql +++ b/tests/queries/0_stateless/01851_hedged_connections_external_tables.sql @@ -1 +1,2 @@ +-- Tags: no-tsan select number from remote('127.0.0.{3|2}', numbers(2)) where number global in (select number from numbers(1)) settings async_socket_for_remote=1, use_hedged_requests = 1, sleep_in_send_data_ms=10, receive_data_timeout_ms=1; From 6b7c17fb4c863b5145dca71daaf8c14cf4fa8a42 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 26 Jun 2023 01:24:09 +0000 Subject: [PATCH 1040/1997] Automatic style fix --- .../integration/test_hedged_requests/test.py | 62 +++++-------------- .../test_hedged_requests_parallel/test.py | 20 ++---- tests/integration/test_secure_socket/test.py | 4 +- 3 files changed, 22 insertions(+), 64 deletions(-) diff --git a/tests/integration/test_hedged_requests/test.py b/tests/integration/test_hedged_requests/test.py index 5de92f437c5..18ea3e50619 100644 --- a/tests/integration/test_hedged_requests/test.py +++ b/tests/integration/test_hedged_requests/test.py @@ -204,9 +204,7 @@ def update_configs( def test_stuck_replica(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") update_configs() @@ -239,9 +237,7 @@ def test_stuck_replica(started_cluster): def test_long_query(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") update_configs() @@ -260,9 +256,7 @@ def test_long_query(started_cluster): def test_send_table_status_sleep(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") update_configs(node_1_sleep_in_send_tables_status=sleep_time) check_query(expected_replica="node_2") @@ -271,9 +265,7 @@ def test_send_table_status_sleep(started_cluster): def test_send_table_status_sleep2(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") update_configs( node_1_sleep_in_send_tables_status=sleep_time, @@ -285,9 +277,7 @@ def test_send_table_status_sleep2(started_cluster): def test_send_data(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") update_configs(node_1_sleep_in_send_data=sleep_time) check_query(expected_replica="node_2") @@ -296,9 +286,7 @@ def test_send_data(started_cluster): def test_send_data2(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") update_configs( node_1_sleep_in_send_data=sleep_time, node_2_sleep_in_send_data=sleep_time @@ -309,9 +297,7 @@ def test_send_data2(started_cluster): def test_combination1(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") update_configs( node_1_sleep_in_send_tables_status=sleep_time, @@ -323,9 +309,7 @@ def test_combination1(started_cluster): def test_combination2(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") update_configs( node_1_sleep_in_send_data=sleep_time, @@ -337,9 +321,7 @@ def test_combination2(started_cluster): def test_combination3(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") update_configs( node_1_sleep_in_send_data=sleep_time, @@ -352,9 +334,7 @@ def test_combination3(started_cluster): def test_combination4(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") update_configs( node_1_sleep_in_send_tables_status=1000, @@ -368,9 +348,7 @@ def test_combination4(started_cluster): def test_receive_timeout1(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") # Check the situation when first two replicas get receive timeout # in establishing connection, but the third replica is ok. @@ -385,9 +363,7 @@ def test_receive_timeout1(started_cluster): def test_receive_timeout2(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") # Check the situation when first replica get receive timeout # in packet receiving but there are replicas in process of @@ -403,9 +379,7 @@ def test_receive_timeout2(started_cluster): def test_initial_receive_timeout(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") # Check the situation when replicas don't respond after # receiving query (so, no packets were send to initiator) @@ -426,9 +400,7 @@ def test_initial_receive_timeout(started_cluster): def test_async_connect(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") update_configs() @@ -461,10 +433,8 @@ def test_async_connect(started_cluster): def test_async_query_sending(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) - + pytest.skip("Hedged requests don't work under Thread Sanitizer") + update_configs( node_1_sleep_after_receiving_query=5000, node_2_sleep_after_receiving_query=5000, diff --git a/tests/integration/test_hedged_requests_parallel/test.py b/tests/integration/test_hedged_requests_parallel/test.py index 34cad7bb754..728697c690d 100644 --- a/tests/integration/test_hedged_requests_parallel/test.py +++ b/tests/integration/test_hedged_requests_parallel/test.py @@ -173,9 +173,7 @@ def update_configs( def test_send_table_status_sleep(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") update_configs( node_1_sleep_in_send_tables_status=sleep_time, @@ -187,9 +185,7 @@ def test_send_table_status_sleep(started_cluster): def test_send_data(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") update_configs( node_1_sleep_in_send_data=sleep_time, node_2_sleep_in_send_data=sleep_time @@ -200,9 +196,7 @@ def test_send_data(started_cluster): def test_combination1(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") update_configs( node_1_sleep_in_send_tables_status=1000, @@ -215,9 +209,7 @@ def test_combination1(started_cluster): def test_combination2(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") update_configs( node_1_sleep_in_send_data=sleep_time, @@ -231,9 +223,7 @@ def test_combination2(started_cluster): def test_query_with_no_data_to_sample(started_cluster): if NODES["node"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") update_configs( node_1_sleep_in_send_data=sleep_time, node_2_sleep_in_send_data=sleep_time diff --git a/tests/integration/test_secure_socket/test.py b/tests/integration/test_secure_socket/test.py index 827e4b81cfc..123715e5f05 100644 --- a/tests/integration/test_secure_socket/test.py +++ b/tests/integration/test_secure_socket/test.py @@ -59,9 +59,7 @@ def test(started_cluster): ) if NODES["node1"].is_built_with_thread_sanitizer(): - pytest.skip( - "Hedged requests don't work under Thread Sanitizer" - ) + pytest.skip("Hedged requests don't work under Thread Sanitizer") attempts = 0 while attempts < 1000: From a628bbb1f55bea23662ff65512b2e892310c13af Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 28 Jun 2023 13:58:32 +0200 Subject: [PATCH 1041/1997] Fix tests --- src/Core/SettingsQuirks.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsQuirks.cpp b/src/Core/SettingsQuirks.cpp index 5b981c71403..37a0f2db3e2 100644 --- a/src/Core/SettingsQuirks.cpp +++ b/src/Core/SettingsQuirks.cpp @@ -73,7 +73,7 @@ void applySettingsQuirks(Settings & settings, Poco::Logger * log) } #if defined(THREAD_SANITIZER) - settings.use_hedged_requests = false; + settings.use_hedged_requests.value = false; if (log) LOG_WARNING(log, "use_hedged_requests has been disabled for the build with Thread Sanitizer, because they are using fibers, leading to a failed assertion inside TSan"); #endif From c33cd92a694bea64fe0b4e3a4023c97475557963 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 30 Jun 2023 12:58:17 +0200 Subject: [PATCH 1042/1997] Fix --- tests/queries/0_stateless/02724_database_s3.sh | 4 ++-- tests/queries/0_stateless/02725_database_hdfs.sh | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/02724_database_s3.sh b/tests/queries/0_stateless/02724_database_s3.sh index 79199b43571..bb8f1f5f7ee 100755 --- a/tests/queries/0_stateless/02724_database_s3.sh +++ b/tests/queries/0_stateless/02724_database_s3.sh @@ -46,12 +46,12 @@ DROP DATABASE IF EXISTS test3; CREATE DATABASE test3 ENGINE = S3; USE test3; SELECT * FROM \"http://localhost:11111/test/a.myext\" -""" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK" +""" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK" ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ USE test3; SELECT * FROM \"abacaba\" -""" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK" +""" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK" # Cleanup ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh index a78f3e6bbdc..89ff7421a6f 100755 --- a/tests/queries/0_stateless/02725_database_hdfs.sh +++ b/tests/queries/0_stateless/02725_database_hdfs.sh @@ -43,12 +43,12 @@ DROP DATABASE IF EXISTS test4; CREATE DATABASE test4 ENGINE = HDFS; USE test4; SELECT * FROM \"abacaba/file.tsv\" -""" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK1" +""" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK1" -${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK2" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK3" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1| grep -F "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK4" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222\`" 2>&1| grep -F "BAD_ARGUMENTS" > /dev/null && echo "OK5" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM test4.\`http://localhost:11111/test/a.tsv\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK2" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/file.myext\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK3" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK4" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test4.\`hdfs://localhost:12222\`" 2>&1| grep -F "UNKNOWN_TABLE" > /dev/null && echo "OK5" # Cleanup @@ -57,4 +57,4 @@ DROP DATABASE IF EXISTS test1; DROP DATABASE IF EXISTS test2; DROP DATABASE IF EXISTS test3; DROP DATABASE IF EXISTS test4; -""" \ No newline at end of file +""" From 24ae56b5dd383ab4577747a873d9bc4aae9f7f16 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 30 Jun 2023 11:02:16 +0000 Subject: [PATCH 1043/1997] Correctly apply async deduplication for nonordinary ReplicatedMergeTree tables --- src/Storages/MergeTree/MergeTreeDataWriter.h | 5 ++ .../MergeTree/ReplicatedMergeTreeSink.cpp | 85 ++++++++++++------- 2 files changed, 61 insertions(+), 29 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index ed7151886f5..795453b2afa 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -80,6 +80,11 @@ public: */ TemporaryPart writeTempPart(BlockWithPartition & block, const StorageMetadataPtr & metadata_snapshot, ContextPtr context); + MergeTreeData::MergingParams::Mode getMergingMode() const + { + return data.merging_params.mode; + } + TemporaryPart writeTempPartWithoutPrefix(BlockWithPartition & block, const StorageMetadataPtr & metadata_snapshot, int64_t block_number, ContextPtr context); /// For insertion. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index c93077fb4fb..718e3fc95ce 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -7,6 +7,8 @@ #include #include #include +#include "Storages/MergeTree/MergeAlgorithm.h" +#include "Storages/MergeTree/MergeTreeDataWriter.h" #include #include #include @@ -54,6 +56,9 @@ struct ReplicatedMergeTreeSinkImpl::DelayedChunk UInt64 elapsed_ns; BlockIDsType block_id; BlockWithPartition block_with_partition; + /// Some merging algorithms can mofidy the block which loses the information about the async insert offsets + /// when preprocessing or filtering data for asnyc inserts deduplication we want to use the initial, unmerged block + std::optional unmerged_block_with_partition; std::unordered_map> block_id_to_offset_idx; ProfileEvents::Counters part_counters; @@ -63,12 +68,14 @@ struct ReplicatedMergeTreeSinkImpl::DelayedChunk UInt64 elapsed_ns_, BlockIDsType && block_id_, BlockWithPartition && block_, + std::optional && unmerged_block_with_partition_, ProfileEvents::Counters && part_counters_) : log(log_), temp_part(std::move(temp_part_)), elapsed_ns(elapsed_ns_), block_id(std::move(block_id_)), block_with_partition(std::move(block_)), + unmerged_block_with_partition(std::move(unmerged_block_with_partition_)), part_counters(std::move(part_counters_)) { initBlockIDMap(); @@ -113,6 +120,7 @@ struct ReplicatedMergeTreeSinkImpl::DelayedChunk { if constexpr (async_insert) { + auto * current_block_with_partition = unmerged_block_with_partition.has_value() ? &unmerged_block_with_partition.value() : &block_with_partition; std::vector offset_idx; for (const auto & raw_path : block_paths) { @@ -127,14 +135,14 @@ struct ReplicatedMergeTreeSinkImpl::DelayedChunk } std::sort(offset_idx.begin(), offset_idx.end()); - auto & offsets = block_with_partition.offsets; + auto & offsets = current_block_with_partition->offsets; size_t idx = 0, remove_count = 0; auto it = offset_idx.begin(); std::vector new_offsets; std::vector new_block_ids; /// construct filter - size_t rows = block_with_partition.block.rows(); + size_t rows = current_block_with_partition->block.rows(); auto filter_col = ColumnUInt8::create(rows, 1u); ColumnUInt8::Container & vec = filter_col->getData(); UInt8 * pos = vec.data(); @@ -162,18 +170,21 @@ struct ReplicatedMergeTreeSinkImpl::DelayedChunk LOG_TRACE(log, "New block IDs: {}, new offsets: {}, size: {}", toString(new_block_ids), toString(new_offsets), new_offsets.size()); - block_with_partition.offsets = std::move(new_offsets); + current_block_with_partition->offsets = std::move(new_offsets); block_id = std::move(new_block_ids); - auto cols = block_with_partition.block.getColumns(); + auto cols = current_block_with_partition->block.getColumns(); for (auto & col : cols) { col = col->filter(vec, rows - remove_count); } - block_with_partition.block.setColumns(cols); + current_block_with_partition->block.setColumns(cols); - LOG_TRACE(log, "New block rows {}", block_with_partition.block.rows()); + LOG_TRACE(log, "New block rows {}", current_block_with_partition->block.rows()); initBlockIDMap(); + + if (unmerged_block_with_partition.has_value()) + block_with_partition.block = unmerged_block_with_partition->block; } else { @@ -202,7 +213,7 @@ std::vector testSelfDeduplicate(std::vector data, std::vector::DelayedChunk::Partition part( - &Poco::Logger::get("testSelfDeduplicate"), MergeTreeDataWriter::TemporaryPart(), 0, std::move(hashes), std::move(block1), std::move(profile_counters)); + &Poco::Logger::get("testSelfDeduplicate"), MergeTreeDataWriter::TemporaryPart(), 0, std::move(hashes), std::move(block1), std::nullopt, std::move(profile_counters)); part.filterSelfDuplicate(); @@ -235,8 +246,10 @@ namespace { SipHash hash; for (size_t i = start; i < offset; ++i) + { for (const auto & col : cols) col->updateHashWithValue(i, hash); + } union { char bytes[16]; @@ -432,8 +445,17 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) ProfileEvents::Counters part_counters; auto profile_events_scope = std::make_unique(&part_counters); - /// Write part to the filesystem under temporary name. Calculate a checksum. + /// Some merging algorithms can mofidy the block which loses the information about the async insert offsets + /// when preprocessing or filtering data for asnyc inserts deduplication we want to use the initial, unmerged block + std::optional unmerged_block; + if constexpr (async_insert) + { + if (storage.writer.getMergingMode() != MergeTreeData::MergingParams::Mode::Ordinary) + unmerged_block.emplace(current_block); + } + + /// Write part to the filesystem under temporary name. Calculate a checksum. auto temp_part = storage.writer.writeTempPart(current_block, metadata_snapshot, context); /// If optimize_on_insert setting is true, current_block could become empty after merge @@ -446,31 +468,35 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) if constexpr (async_insert) { /// TODO consider insert_deduplication_token - block_id = getHashesForBlocks(current_block, temp_part.part->info.partition_id); + block_id = getHashesForBlocks(unmerged_block.has_value() ? *unmerged_block : current_block, temp_part.part->info.partition_id); LOG_TRACE(log, "async insert part, part id {}, block id {}, offsets {}, size {}", temp_part.part->info.partition_id, toString(block_id), toString(current_block.offsets), current_block.offsets.size()); } - else if (deduplicate) - { - String block_dedup_token; - - /// We add the hash from the data and partition identifier to deduplication ID. - /// That is, do not insert the same data to the same partition twice. - - const String & dedup_token = settings.insert_deduplication_token; - if (!dedup_token.empty()) - { - /// multiple blocks can be inserted within the same insert query - /// an ordinal number is added to dedup token to generate a distinctive block id for each block - block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); - ++chunk_dedup_seqnum; - } - - block_id = temp_part.part->getZeroLevelPartBlockID(block_dedup_token); - LOG_DEBUG(log, "Wrote block with ID '{}', {} rows{}", block_id, current_block.block.rows(), quorumLogMessage(replicas_num)); - } else { - LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); + + if (deduplicate) + { + String block_dedup_token; + + /// We add the hash from the data and partition identifier to deduplication ID. + /// That is, do not insert the same data to the same partition twice. + + const String & dedup_token = settings.insert_deduplication_token; + if (!dedup_token.empty()) + { + /// multiple blocks can be inserted within the same insert query + /// an ordinal number is added to dedup token to generate a distinctive block id for each block + block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); + ++chunk_dedup_seqnum; + } + + block_id = temp_part.part->getZeroLevelPartBlockID(block_dedup_token); + LOG_DEBUG(log, "Wrote block with ID '{}', {} rows{}", block_id, current_block.block.rows(), quorumLogMessage(replicas_num)); + } + else + { + LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); + } } profile_events_scope.reset(); @@ -501,6 +527,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) elapsed_ns, std::move(block_id), std::move(current_block), + std::move(unmerged_block), std::move(part_counters) /// profile_events_scope must be reset here. )); } From 3b73e112165833e8baece650021adbb19e8f635f Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 30 Jun 2023 11:04:40 +0000 Subject: [PATCH 1044/1997] Fix tests --- tests/queries/0_stateless/01361_fover_remote_num_tries.sh | 2 +- ...6_skip_unavailable_shards_excessive_attempts.reference | 8 ++++---- .../01956_skip_unavailable_shards_excessive_attempts.sh | 2 +- .../02226_parallel_reading_from_replicas_benchmark.sh | 2 ++ 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/01361_fover_remote_num_tries.sh b/tests/queries/0_stateless/01361_fover_remote_num_tries.sh index 2ee2ec1bc76..f07ffc02e4f 100755 --- a/tests/queries/0_stateless/01361_fover_remote_num_tries.sh +++ b/tests/queries/0_stateless/01361_fover_remote_num_tries.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --connections_with_failover_max_tries 10 --query "SELECT hostName() FROM remote('128.1.2.3', default.tmp)" 2>&1 | grep -o -P 'Timeout exceeded while connecting to socket|Network is unreachable' | wc -l +$CLICKHOUSE_CLIENT --connections_with_failover_max_tries 10 --query "SELECT hostName() FROM remote('128.1.2.3', default.tmp)" 2>&1 | grep -o -P 'Timeout exceeded while connecting to socket|Network is unreachable|Timeout: connect timed out' | wc -l diff --git a/tests/queries/0_stateless/01956_skip_unavailable_shards_excessive_attempts.reference b/tests/queries/0_stateless/01956_skip_unavailable_shards_excessive_attempts.reference index e39f4b962e6..ad409aac5b8 100644 --- a/tests/queries/0_stateless/01956_skip_unavailable_shards_excessive_attempts.reference +++ b/tests/queries/0_stateless/01956_skip_unavailable_shards_excessive_attempts.reference @@ -1,10 +1,10 @@ 255.255.255.255 -HedgedConnectionsFactory: Connection failed at try №1 +ConnectionPoolWithFailover: Connection failed at try №1 executeQuery: Code: 519.: All attempts to get table structure failed. 127.2,255.255.255.255 0 -HedgedConnectionsFactory: Connection failed at try №1 +ConnectionPoolWithFailover: Connection failed at try №1 255.255.255.255,127.2 0 -HedgedConnectionsFactory: Connection failed at try №1 -HedgedConnectionsFactory: Connection failed at try №1 +ConnectionPoolWithFailover: Connection failed at try №1 +ConnectionPoolWithFailover: Connection failed at try №1 diff --git a/tests/queries/0_stateless/01956_skip_unavailable_shards_excessive_attempts.sh b/tests/queries/0_stateless/01956_skip_unavailable_shards_excessive_attempts.sh index 488e2fe106a..9f9de96ca6e 100755 --- a/tests/queries/0_stateless/01956_skip_unavailable_shards_excessive_attempts.sh +++ b/tests/queries/0_stateless/01956_skip_unavailable_shards_excessive_attempts.sh @@ -25,7 +25,7 @@ function execute_query() # clickhouse-client 2> >(wc -l) # # May dump output of "wc -l" after some other programs. - $CLICKHOUSE_CLIENT "${opts[@]}" --query "select * from remote('$hosts', system.one)" 2>"$stderr" + $CLICKHOUSE_CLIENT "${opts[@]}" --query "select * from remote('$hosts', system.one) settings use_hedged_requests=0" 2>"$stderr" process_log_safe "$stderr" } execute_query 255.255.255.255 diff --git a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh index 4b8f8da5480..941f024825a 100755 --- a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh +++ b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh @@ -17,6 +17,8 @@ opts=( --allow_experimental_parallel_reading_from_replicas 1 --parallel_replicas_for_non_replicated_merge_tree 1 --max_parallel_replicas 3 + --use_hedged_requests 0 + --cluster_for_parallel_replicas parallel_replicas --iterations 1 ) From 0fea8c6d1c9080e5855b2f4532c255aa6d40c5e5 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 30 Jun 2023 13:08:11 +0200 Subject: [PATCH 1045/1997] Update 02808_custom_disk_with_user_defined_name.sh --- .../0_stateless/02808_custom_disk_with_user_defined_name.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh index 50dee04f6a6..537e117adb9 100755 --- a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh +++ b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh @@ -12,7 +12,7 @@ $CLICKHOUSE_CLIENT -nm --query """ DROP TABLE IF EXISTS test; CREATE TABLE test (a Int32, b String) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS disk = disk_s3disk(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3disk); +SETTINGS disk = disk_s3disk(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); """ 2>&1 | grep -q "Disk with name \`s3disk\` already exist" && echo 'OK' || echo 'FAIL' disk_name="${CLICKHOUSE_TEST_UNIQUE_NAME}" From cfa15d6393a975c7ee3edb3a38b0b8219f0af7a3 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 30 Jun 2023 13:09:06 +0200 Subject: [PATCH 1046/1997] Update 02808_filesystem_cache_drop_query.sh --- tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh index 5fd426ca1c2..9d987d0ebf2 100755 --- a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh +++ b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh @@ -13,7 +13,7 @@ $CLICKHOUSE_CLIENT -nm --query """ DROP TABLE IF EXISTS test; CREATE TABLE test (a Int32, b String) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS disk = disk_$disk_name(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3disk); +SETTINGS disk = disk_$disk_name(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); INSERT INTO test SELECT 1, 'test'; """ From f134153f4e39befdbed703eda439d96e7d539a49 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 30 Jun 2023 13:12:04 +0200 Subject: [PATCH 1047/1997] Fix race with "user_was_dropped" in ContextAccess. --- src/Access/ContextAccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index 63604a03b4e..1259d8d72eb 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -221,7 +221,7 @@ private: mutable Poco::Logger * trace_log = nullptr; mutable UserPtr user; mutable String user_name; - mutable bool user_was_dropped = false; + mutable std::atomic user_was_dropped = false; mutable scope_guard subscription_for_user_change; mutable std::shared_ptr enabled_roles; mutable scope_guard subscription_for_roles_changes; From 58f291997176b230395bc906fe642c5dfad4657c Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 30 Jun 2023 12:46:45 +0200 Subject: [PATCH 1048/1997] Fix --- src/Interpreters/Cache/FileSegment.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index e97d708ba74..5c7fb4c8109 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -648,8 +648,6 @@ void FileSegment::complete() if (segment_kind == FileSegmentKind::Temporary && is_last_holder) { LOG_TEST(log, "Removing temporary file segment: {}", getInfoForLogUnlocked(segment_lock)); - detach(segment_lock, *locked_key); - setDownloadState(State::DETACHED, segment_lock); locked_key->removeFileSegment(offset(), segment_lock); return; } @@ -798,7 +796,6 @@ bool FileSegment::assertCorrectnessUnlocked(const FileSegmentGuard::Lock &) cons } chassert(reserved_size >= downloaded_size); - chassert((reserved_size == 0) || queue_iterator); check_iterator(queue_iterator); } From 96cc02ae0c83581b051ea49fd11a3c1aa97f6c54 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 30 Jun 2023 11:45:10 +0000 Subject: [PATCH 1049/1997] Add test --- ...sert_dedup_replicated_collapsing.reference | 8 ++++ ...sync_insert_dedup_replicated_collapsing.sh | 39 +++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.reference create mode 100755 tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.sh diff --git a/tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.reference b/tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.reference new file mode 100644 index 00000000000..74624d246de --- /dev/null +++ b/tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.reference @@ -0,0 +1,8 @@ +string1 +------------ +string1 +------------ +string1 +string1 +string2 +------------ diff --git a/tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.sh b/tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.sh new file mode 100755 index 00000000000..69e0e3ecc32 --- /dev/null +++ b/tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS 02810_async_insert_dedup_collapsing" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE 02810_async_insert_dedup_collapsing (stringvalue String, sign Int8) ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/02810_async_insert_dedup/' || currentDatabase(), 'r1', sign) ORDER BY stringvalue" + +url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1&async_insert_busy_timeout_ms=3000&async_insert_deduplicate=1" + +# insert value with same key and sign so it's collapsed on insert +${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO 02810_async_insert_dedup_collapsing VALUES ('string1', 1)" & +${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO 02810_async_insert_dedup_collapsing VALUES ('string1', 1)" & + +wait + +${CLICKHOUSE_CLIENT} -q "SELECT stringvalue FROM 02810_async_insert_dedup_collapsing ORDER BY stringvalue" +${CLICKHOUSE_CLIENT} -q "SELECT '------------'" + +# trigger same collaps algorithm but also deduplication +${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO 02810_async_insert_dedup_collapsing VALUES ('string1', 1)" & +${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO 02810_async_insert_dedup_collapsing VALUES ('string1', 1)" & + +wait + +${CLICKHOUSE_CLIENT} -q "SELECT stringvalue FROM 02810_async_insert_dedup_collapsing ORDER BY stringvalue" +${CLICKHOUSE_CLIENT} -q "SELECT '------------'" + +${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO 02810_async_insert_dedup_collapsing VALUES ('string2', 1)" & +${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO 02810_async_insert_dedup_collapsing VALUES ('string2', 1), ('string1', 1)" & +${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO 02810_async_insert_dedup_collapsing VALUES ('string2', 1)" & + +wait + +${CLICKHOUSE_CLIENT} -q "SELECT stringvalue FROM 02810_async_insert_dedup_collapsing ORDER BY stringvalue" +${CLICKHOUSE_CLIENT} -q "SELECT '------------'" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE 02810_async_insert_dedup_collapsing" \ No newline at end of file From 9a79fd6f70b30cd56e37c8a41c8c14bf65ed11ff Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 30 Jun 2023 11:51:20 +0000 Subject: [PATCH 1050/1997] Better --- src/Processors/Transforms/ExpressionTransform.cpp | 1 - src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 7 ++++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 49988932947..0d3341b000c 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -39,7 +39,6 @@ void ConvertingTransform::onConsume(Chunk chunk) expression->execute(block, num_rows); chunk.setColumns(block.getColumns(), num_rows); - chunk.setChunkInfo(chunk.getChunkInfo()); cur_chunk = std::move(chunk); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 718e3fc95ce..a13a96ac65e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -7,8 +7,8 @@ #include #include #include -#include "Storages/MergeTree/MergeAlgorithm.h" -#include "Storages/MergeTree/MergeTreeDataWriter.h" +#include +#include #include #include #include @@ -451,8 +451,9 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) if constexpr (async_insert) { + /// we copy everything but offsets which we move because they are only used by async insert if (storage.writer.getMergingMode() != MergeTreeData::MergingParams::Mode::Ordinary) - unmerged_block.emplace(current_block); + unmerged_block.emplace(Block(current_block.block), Row(current_block.partition), std::move(current_block.offsets)); } /// Write part to the filesystem under temporary name. Calculate a checksum. From 042885c35e2e4c82e8594543619a83a0fcd4c0df Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 30 Jun 2023 14:25:24 +0200 Subject: [PATCH 1051/1997] fix test style --- .../02810_async_insert_dedup_replicated_collapsing.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.sh b/tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.sh index 69e0e3ecc32..804cd894ebc 100755 --- a/tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.sh +++ b/tests/queries/0_stateless/02810_async_insert_dedup_replicated_collapsing.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS 02810_async_insert_dedup_collapsing" -${CLICKHOUSE_CLIENT} -q "CREATE TABLE 02810_async_insert_dedup_collapsing (stringvalue String, sign Int8) ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/02810_async_insert_dedup/' || currentDatabase(), 'r1', sign) ORDER BY stringvalue" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE 02810_async_insert_dedup_collapsing (stringvalue String, sign Int8) ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/{database}/02810_async_insert_dedup', 'r1', sign) ORDER BY stringvalue" url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1&async_insert_busy_timeout_ms=3000&async_insert_deduplicate=1" From 84872517708ad03c764e616fd28d5d1d12593697 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elena=20Torr=C3=B3?= Date: Fri, 30 Jun 2023 14:46:50 +0200 Subject: [PATCH 1052/1997] Update File Cache disk configuration settings default values --- docs/en/operations/storing-data.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 5804ad8545b..3f4456d7e12 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -188,9 +188,9 @@ These settings should be defined in the disk configuration section. - `do_not_evict_index_and_mark_files` - do not evict small frequently used files according to cache policy. Default: `false`. This setting was added in version 22.8. If you used filesystem cache before this version, then it will not work on versions starting from 22.8 if this setting is set to `true`. If you want to use this setting, clear old cache created before version 22.8 before upgrading. -- `max_file_segment_size` - a maximum size of a single cache file in bytes or in readable format (`ki, Mi, Gi, etc`, example `10Gi`). Default: `104857600` (`100Mi`). +- `max_file_segment_size` - a maximum size of a single cache file in bytes or in readable format (`ki, Mi, Gi, etc`, example `10Gi`). Default: `8388608` (`8Mi`). -- `max_elements` - a limit for a number of cache files. Default: `1048576`. +- `max_elements` - a limit for a number of cache files. Default: `10000000`. File Cache **query/profile settings**: From bf06e18c348d6d8bd23d4184d4bf3eee89ed4835 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Fri, 30 Jun 2023 09:03:25 -0400 Subject: [PATCH 1053/1997] docs clickhouse-static-files-uploader and demo repo --- .../en/engines/table-engines/mergetree-family/mergetree.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index a1c2fbdbe50..67043ef1062 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -949,7 +949,14 @@ The example uses `type=web`, but any disk type can be configured as dynamic, eve #### Example dynamic web storage +:::tip +A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver) +::: + +In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content. + ```sql +# highlight-next-line ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' ( price UInt32, From 1cf021c0b11ef1f24312a98bb8e443067a4ad497 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Fri, 30 Jun 2023 13:11:06 +0000 Subject: [PATCH 1054/1997] Add initcap prototype / tests --- .../functions/string-functions.md | 4 + .../functions/string-functions.md | 4 + src/Functions/initcap.cpp | 78 +++++++++++++++++++ .../0_stateless/02810_initcap.reference | 6 ++ tests/queries/0_stateless/02810_initcap.sql | 6 ++ 5 files changed, 98 insertions(+) create mode 100644 src/Functions/initcap.cpp create mode 100644 tests/queries/0_stateless/02810_initcap.reference create mode 100644 tests/queries/0_stateless/02810_initcap.sql diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 5175bbf0615..d2180c9f3ea 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1253,3 +1253,7 @@ Result: │ A240 │ └──────────────────┘ ``` + +## initcap + +Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters. \ No newline at end of file diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index 9638e25d488..bd104b27bed 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -1113,3 +1113,7 @@ A text with tags . The content within CDATA Do Nothing for 2 Minutes 2:00   ``` + +## initcap {#initcap} + +Переводит первую букву каждого слова в строке в верхний регистр, а остальные — в нижний. Словами считаются последовательности алфавитно-цифровых символов, разделённые любыми другими символами. \ No newline at end of file diff --git a/src/Functions/initcap.cpp b/src/Functions/initcap.cpp new file mode 100644 index 00000000000..70c332d191d --- /dev/null +++ b/src/Functions/initcap.cpp @@ -0,0 +1,78 @@ +#include +#include + +namespace DB +{ +namespace +{ + +struct InitcapImpl +{ + static void vector(const ColumnString::Chars & data, + const ColumnString::Offsets & offsets, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + res_data.resize(data.size()); + res_offsets.assign(offsets); + array(data.data(), data.data() + data.size(), res_data.data()); + } + + static void vectorFixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data) + { + res_data.resize(data.size()); + array(data.data(), data.data() + data.size(), res_data.data()); + } + +private: + static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst) + { + const auto flip_case_mask = 'A' ^ 'a'; + + auto is_lower_alpha = [](UInt8 c) { return c >= 'a' && c <= 'z'; }; + auto is_upper_alpha = [](UInt8 c) { return c >= 'A' && c <= 'Z'; }; + //auto is_digit = [](UInt8 c) { return c >= '0' && c <= '9'; }; + + bool prev_is_alpha = false; + + for (; src < src_end; ++src, ++dst) + { + bool lower = is_lower_alpha(*src); + bool is_alpha = lower || is_upper_alpha(*src); + if (!is_alpha) + { + *dst = *src; + } + else if (!prev_is_alpha) + { + if (lower) + *dst = *src ^ flip_case_mask; + else + *dst = *src; + } + else + { + if (!lower) + *dst = *src ^ flip_case_mask; + else + *dst = *src; + } + prev_is_alpha = is_alpha; + } + } +}; + +struct NameInitcap +{ + static constexpr auto name = "initcap"; +}; +using FunctionInitcap = FunctionStringToString; + +} + +REGISTER_FUNCTION(Initcap) +{ + factory.registerFunction({}, FunctionFactory::CaseInsensitive); +} + +} diff --git a/tests/queries/0_stateless/02810_initcap.reference b/tests/queries/0_stateless/02810_initcap.reference new file mode 100644 index 00000000000..9fda79e4afb --- /dev/null +++ b/tests/queries/0_stateless/02810_initcap.reference @@ -0,0 +1,6 @@ + +Hello +Hello +Hello World +Yeah, Well, I`M Gonna Go Build My Own Theme Park +Crc32ieee Is Best Function diff --git a/tests/queries/0_stateless/02810_initcap.sql b/tests/queries/0_stateless/02810_initcap.sql new file mode 100644 index 00000000000..810ea52dd6a --- /dev/null +++ b/tests/queries/0_stateless/02810_initcap.sql @@ -0,0 +1,6 @@ +select initcap(''); +select initcap('Hello'); +select initcap('hello'); +select initcap('hello world'); +select initcap('yeah, well, i`m gonna go build my own theme park'); +select initcap('CRC32IEEE is best function'); From 2311dd9b5ddbeb8587ce7201b889c0ab1442d003 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 30 Jun 2023 15:18:54 +0200 Subject: [PATCH 1055/1997] Use TSA_GUARDED_BY() macro in the definition of ContextAccess. --- src/Access/ContextAccess.cpp | 16 +++++++------- src/Access/ContextAccess.h | 43 ++++++++++++++++++++++-------------- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 0d28edc0a10..9c57853679f 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -228,6 +228,12 @@ ContextAccess::ContextAccess(const AccessControl & access_control_, const Params } +ContextAccess::ContextAccess(FullAccess) + : is_full_access(true), access(std::make_shared(AccessRights::getFullAccess())), access_with_implicit(access) +{ +} + + ContextAccess::~ContextAccess() { enabled_settings.reset(); @@ -413,14 +419,8 @@ std::optional ContextAccess::getQuotaUsage() const std::shared_ptr ContextAccess::getFullAccess() { - static const std::shared_ptr res = [] - { - auto full_access = std::make_shared(); - full_access->is_full_access = true; - full_access->access = std::make_shared(AccessRights::getFullAccess()); - full_access->access_with_implicit = full_access->access; - return full_access; - }(); + static const std::shared_ptr res = + [] { return std::shared_ptr(new ContextAccess{kFullAccess}); }(); return res; } diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index 1259d8d72eb..60bad0118fc 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -69,7 +69,6 @@ public: using Params = ContextAccessParams; const Params & getParams() const { return params; } - ContextAccess() { } /// NOLINT ContextAccess(const AccessControl & access_control_, const Params & params_); /// Returns the current user. Throws if user is nullptr. @@ -171,10 +170,17 @@ public: private: friend class AccessControl; + struct FullAccess {}; + static const FullAccess kFullAccess; + + /// Makes an instance of ContextAccess which provides full access to everything + /// without any limitations. This is used for the global context. + explicit ContextAccess(FullAccess); + void initialize(); - void setUser(const UserPtr & user_) const; - void setRolesInfo(const std::shared_ptr & roles_info_) const; - void calculateAccessRights() const; + void setUser(const UserPtr & user_) const TSA_REQUIRES(mutex); + void setRolesInfo(const std::shared_ptr & roles_info_) const TSA_REQUIRES(mutex); + void calculateAccessRights() const TSA_REQUIRES(mutex); template bool checkAccessImpl(const AccessFlags & flags) const; @@ -217,20 +223,23 @@ private: const AccessControl * access_control = nullptr; const Params params; - bool is_full_access = false; - mutable Poco::Logger * trace_log = nullptr; - mutable UserPtr user; - mutable String user_name; + const bool is_full_access = false; + mutable std::atomic user_was_dropped = false; - mutable scope_guard subscription_for_user_change; - mutable std::shared_ptr enabled_roles; - mutable scope_guard subscription_for_roles_changes; - mutable std::shared_ptr roles_info; - mutable std::shared_ptr access; - mutable std::shared_ptr access_with_implicit; - mutable std::shared_ptr enabled_row_policies; - mutable std::shared_ptr enabled_quota; - mutable std::shared_ptr enabled_settings; + mutable std::atomic trace_log = nullptr; + + mutable UserPtr user TSA_GUARDED_BY(mutex); + mutable String user_name TSA_GUARDED_BY(mutex); + mutable scope_guard subscription_for_user_change TSA_GUARDED_BY(mutex); + mutable std::shared_ptr enabled_roles TSA_GUARDED_BY(mutex); + mutable scope_guard subscription_for_roles_changes TSA_GUARDED_BY(mutex); + mutable std::shared_ptr roles_info TSA_GUARDED_BY(mutex); + mutable std::shared_ptr access TSA_GUARDED_BY(mutex); + mutable std::shared_ptr access_with_implicit TSA_GUARDED_BY(mutex); + mutable std::shared_ptr enabled_row_policies TSA_GUARDED_BY(mutex); + mutable std::shared_ptr enabled_quota TSA_GUARDED_BY(mutex); + mutable std::shared_ptr enabled_settings TSA_GUARDED_BY(mutex); + mutable std::mutex mutex; }; From 904c533a84e5766b4e5ca5e886d7a826cefdd0d4 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Fri, 30 Jun 2023 09:32:54 -0400 Subject: [PATCH 1056/1997] spelling list --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index f25d082e5a6..2802e52c288 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2459,6 +2459,7 @@ unrounded untracked untrusted untuple +uploader uploaders upperUTF uptime From 91579453895b7dc88b0aec78bdde6311a6fc5a39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elena=20Torr=C3=B3?= Date: Fri, 30 Jun 2023 15:43:04 +0200 Subject: [PATCH 1057/1997] Add missing settings --- docs/en/operations/storing-data.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 3f4456d7e12..fd82c955c40 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -184,7 +184,9 @@ These settings should be defined in the disk configuration section. - `enable_filesystem_query_cache_limit` - allow to limit the size of cache which is downloaded within each query (depends on user setting `max_query_cache_size`). Default: `false`. -- `enable_cache_hits_threshold` - number which defines how many times some data needs to be read before it will be cached. Default: `0`, e.g. the data is cached at the first attempt to read it. +- `enable_cache_hits_threshold` - number which defines how many times some data needs to be read before it will be cached, defined by `cache_hits_threshold`. Default: `0`, e.g. the data is cached at the first attempt to read it. + +- `enable_bypass_cache_with_threshold` - allows to skip cache completely in case the requested read range exceeds the threshold defined by `bypass_cache_threashold`. Default: `268435456` (`256Mi`). - `do_not_evict_index_and_mark_files` - do not evict small frequently used files according to cache policy. Default: `false`. This setting was added in version 22.8. If you used filesystem cache before this version, then it will not work on versions starting from 22.8 if this setting is set to `true`. If you want to use this setting, clear old cache created before version 22.8 before upgrading. From 982ded4e8f09891a968f72037637f7f9bfbd6647 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 30 Jun 2023 15:47:31 +0200 Subject: [PATCH 1058/1997] update timeouts in tests for transactions --- .../0_stateless/01169_alter_partition_isolation_stress.sh | 2 +- .../0_stateless/01171_mv_select_insert_isolation_long.sh | 2 +- tests/queries/0_stateless/01174_select_insert_isolation.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh index 508ad05224c..f2348c29146 100755 --- a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh +++ b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh @@ -240,7 +240,7 @@ kill -TERM $PID_1 kill -TERM $PID_2 wait ||: -wait_for_queries_to_finish +wait_for_queries_to_finish 40 $CLICKHOUSE_CLIENT -q "SELECT type, count(n) = countDistinct(n) FROM merge(currentDatabase(), '') GROUP BY type ORDER BY type" $CLICKHOUSE_CLIENT -q "SELECT DISTINCT arraySort(groupArrayIf(n, type=1)) = arraySort(groupArrayIf(n, type=2)) FROM merge(currentDatabase(), '') GROUP BY _table ORDER BY _table" diff --git a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh index 199c2b5389f..619e212c3ae 100755 --- a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh +++ b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh @@ -131,7 +131,7 @@ kill -TERM $PID_6 kill -TERM $PID_7 kill -TERM $PID_8 wait -wait_for_queries_to_finish +wait_for_queries_to_finish 40 $CLICKHOUSE_CLIENT --multiquery --query " BEGIN TRANSACTION; diff --git a/tests/queries/0_stateless/01174_select_insert_isolation.sh b/tests/queries/0_stateless/01174_select_insert_isolation.sh index 29ccfbb1ccb..6321f6ff01b 100755 --- a/tests/queries/0_stateless/01174_select_insert_isolation.sh +++ b/tests/queries/0_stateless/01174_select_insert_isolation.sh @@ -56,7 +56,7 @@ thread_select & PID_4=$! wait $PID_1 && wait $PID_2 && wait $PID_3 kill -TERM $PID_4 wait -wait_for_queries_to_finish +wait_for_queries_to_finish 40 $CLICKHOUSE_CLIENT --multiquery --query " BEGIN TRANSACTION; From d6dacd3ccfe340410ead90ffcadd769716a61ec7 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Fri, 30 Jun 2023 13:53:20 +0000 Subject: [PATCH 1059/1997] Fix test with num --- src/Functions/initcap.cpp | 38 ++++++------------- .../0_stateless/02810_initcap.reference | 1 + tests/queries/0_stateless/02810_initcap.sql | 1 + 3 files changed, 14 insertions(+), 26 deletions(-) diff --git a/src/Functions/initcap.cpp b/src/Functions/initcap.cpp index 70c332d191d..7d0749ecb12 100644 --- a/src/Functions/initcap.cpp +++ b/src/Functions/initcap.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB { @@ -27,37 +28,22 @@ struct InitcapImpl private: static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst) { - const auto flip_case_mask = 'A' ^ 'a'; - - auto is_lower_alpha = [](UInt8 c) { return c >= 'a' && c <= 'z'; }; - auto is_upper_alpha = [](UInt8 c) { return c >= 'A' && c <= 'Z'; }; - //auto is_digit = [](UInt8 c) { return c >= '0' && c <= '9'; }; - - bool prev_is_alpha = false; + bool prev_alphanum = false; for (; src < src_end; ++src, ++dst) { - bool lower = is_lower_alpha(*src); - bool is_alpha = lower || is_upper_alpha(*src); - if (!is_alpha) - { - *dst = *src; - } - else if (!prev_is_alpha) - { - if (lower) - *dst = *src ^ flip_case_mask; + char c = *src; + bool alphanum = isAlphaNumericASCII(c); + if (alphanum && !prev_alphanum) + if (isAlphaASCII(c)) + *dst = toUpperIfAlphaASCII(c); else - *dst = *src; - } + *dst = c; + else if (isAlphaASCII(c)) + *dst = toLowerIfAlphaASCII(c); else - { - if (!lower) - *dst = *src ^ flip_case_mask; - else - *dst = *src; - } - prev_is_alpha = is_alpha; + *dst = c; + prev_alphanum = alphanum; } } }; diff --git a/tests/queries/0_stateless/02810_initcap.reference b/tests/queries/0_stateless/02810_initcap.reference index 9fda79e4afb..4caa57e5ac0 100644 --- a/tests/queries/0_stateless/02810_initcap.reference +++ b/tests/queries/0_stateless/02810_initcap.reference @@ -4,3 +4,4 @@ Hello Hello World Yeah, Well, I`M Gonna Go Build My Own Theme Park Crc32ieee Is Best Function +42ok diff --git a/tests/queries/0_stateless/02810_initcap.sql b/tests/queries/0_stateless/02810_initcap.sql index 810ea52dd6a..f61fcddf4c1 100644 --- a/tests/queries/0_stateless/02810_initcap.sql +++ b/tests/queries/0_stateless/02810_initcap.sql @@ -4,3 +4,4 @@ select initcap('hello'); select initcap('hello world'); select initcap('yeah, well, i`m gonna go build my own theme park'); select initcap('CRC32IEEE is best function'); +select initcap('42oK'); \ No newline at end of file From 48eb1880651d381b39e2fe64f9ee6f8c64e1b303 Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Fri, 30 Jun 2023 15:53:37 +0200 Subject: [PATCH 1060/1997] Remove unused code --- src/Server/TCPHandler.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 629738fa16f..235f634afec 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -83,8 +83,6 @@ struct QueryState NOT_CANCELLED }; - static std::string cancellationStatusToName(CancellationStatus status); - /// Is request cancelled CancellationStatus cancellation_status = CancellationStatus::NOT_CANCELLED; bool is_connection_closed = false; From 9ddf0853bff9b5bbcbf92f7bcc61e7f5b97a4aea Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 30 Jun 2023 16:12:39 +0200 Subject: [PATCH 1061/1997] Update 02808_custom_disk_with_user_defined_name.sh --- .../0_stateless/02808_custom_disk_with_user_defined_name.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh index 537e117adb9..3fbcde66add 100755 --- a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh +++ b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh @@ -12,8 +12,8 @@ $CLICKHOUSE_CLIENT -nm --query """ DROP TABLE IF EXISTS test; CREATE TABLE test (a Int32, b String) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS disk = disk_s3disk(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); -""" 2>&1 | grep -q "Disk with name \`s3disk\` already exist" && echo 'OK' || echo 'FAIL' +SETTINGS disk = disk_s3_disk(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); +""" 2>&1 | grep -q "Disk with name \`s3_disk\` already exist" && echo 'OK' || echo 'FAIL' disk_name="${CLICKHOUSE_TEST_UNIQUE_NAME}" From 0b98406bd345f54c386435a11d126c07234f1aef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elena=20Torr=C3=B3?= Date: Fri, 30 Jun 2023 16:13:43 +0200 Subject: [PATCH 1062/1997] Add default values to False --- docs/en/operations/storing-data.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index fd82c955c40..17d62673a8e 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -184,9 +184,9 @@ These settings should be defined in the disk configuration section. - `enable_filesystem_query_cache_limit` - allow to limit the size of cache which is downloaded within each query (depends on user setting `max_query_cache_size`). Default: `false`. -- `enable_cache_hits_threshold` - number which defines how many times some data needs to be read before it will be cached, defined by `cache_hits_threshold`. Default: `0`, e.g. the data is cached at the first attempt to read it. +- `enable_cache_hits_threshold` - number which defines how many times some data needs to be read before it will be cached. Default :`false`. This threshold can be defined by `cache_hits_threshold`. Default: `0`, e.g. the data is cached at the first attempt to read it. -- `enable_bypass_cache_with_threshold` - allows to skip cache completely in case the requested read range exceeds the threshold defined by `bypass_cache_threashold`. Default: `268435456` (`256Mi`). +- `enable_bypass_cache_with_threshold` - allows to skip cache completely in case the requested read range exceeds the threshold. Default :`false`. This threshold can be defined by `bypass_cache_threashold`. Default: `268435456` (`256Mi`). - `do_not_evict_index_and_mark_files` - do not evict small frequently used files according to cache policy. Default: `false`. This setting was added in version 22.8. If you used filesystem cache before this version, then it will not work on versions starting from 22.8 if this setting is set to `true`. If you want to use this setting, clear old cache created before version 22.8 before upgrading. From 689ff6f996585da91cf80eb2916f1d23706c1f30 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 30 Jun 2023 16:15:26 +0200 Subject: [PATCH 1063/1997] Update storing-data.md --- docs/en/operations/storing-data.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 17d62673a8e..fe6e8e15b0c 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -184,9 +184,9 @@ These settings should be defined in the disk configuration section. - `enable_filesystem_query_cache_limit` - allow to limit the size of cache which is downloaded within each query (depends on user setting `max_query_cache_size`). Default: `false`. -- `enable_cache_hits_threshold` - number which defines how many times some data needs to be read before it will be cached. Default :`false`. This threshold can be defined by `cache_hits_threshold`. Default: `0`, e.g. the data is cached at the first attempt to read it. +- `enable_cache_hits_threshold` - number which defines how many times some data needs to be read before it will be cached. Default: `false`. This threshold can be defined by `cache_hits_threshold`. Default: `0`, e.g. the data is cached at the first attempt to read it. -- `enable_bypass_cache_with_threshold` - allows to skip cache completely in case the requested read range exceeds the threshold. Default :`false`. This threshold can be defined by `bypass_cache_threashold`. Default: `268435456` (`256Mi`). +- `enable_bypass_cache_with_threshold` - allows to skip cache completely in case the requested read range exceeds the threshold. Default: `false`. This threshold can be defined by `bypass_cache_threashold`. Default: `268435456` (`256Mi`). - `do_not_evict_index_and_mark_files` - do not evict small frequently used files according to cache policy. Default: `false`. This setting was added in version 22.8. If you used filesystem cache before this version, then it will not work on versions starting from 22.8 if this setting is set to `true`. If you want to use this setting, clear old cache created before version 22.8 before upgrading. From fcffe2b5a22a29e4821072df5dc3f716d3308b95 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Fri, 30 Jun 2023 16:31:18 +0200 Subject: [PATCH 1064/1997] Increase mmap trashold to unattainable value --- src/Common/Allocator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/Allocator.cpp b/src/Common/Allocator.cpp index 5a66ddb63a2..6779fee58e6 100644 --- a/src/Common/Allocator.cpp +++ b/src/Common/Allocator.cpp @@ -8,7 +8,7 @@ * See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html */ #ifdef NDEBUG - __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 64 * (1ULL << 20); + __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 64 * (1ULL << 32); #else /** * In debug build, use small mmap threshold to reproduce more memory From 542b03bfd1bf8b7f6af69c76c2ef1d7f86d535b0 Mon Sep 17 00:00:00 2001 From: Julio Jimenez Date: Fri, 30 Jun 2023 10:41:31 -0400 Subject: [PATCH 1065/1997] SonarCloud: Add C++23 Experimental Flag Signed-off-by: Julio Jimenez --- .github/workflows/nightly.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index acf6bbe8f6a..b508758ac7c 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -122,7 +122,7 @@ jobs: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" SonarCloud: # TODO: Remove if: whenever SonarCloud supports c++23 - if: ${{ false }} + # if: ${{ false }} runs-on: [self-hosted, builder] env: SONAR_SCANNER_VERSION: 4.8.0.2856 @@ -178,4 +178,5 @@ jobs: --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \ --define sonar.projectKey="ClickHouse_ClickHouse" \ --define sonar.organization="clickhouse-java" \ - --define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql" \ + --define sonar.cfamily.cpp23.enabled=true \ + --define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql" From 00aeb407e5f88440ea82c423ccb701678a3e8e3f Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 30 Jun 2023 14:41:27 +0000 Subject: [PATCH 1066/1997] Wait with retries --- docker/test/stateless/run.sh | 31 ++++--------------------------- tests/ci/attach_gdb.lib | 4 +++- tests/ci/utils.lib | 25 +++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 28 deletions(-) create mode 100644 tests/ci/utils.lib diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 914e51a9f66..7ccedb8c0b3 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -18,6 +18,9 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test # shellcheck disable=SC1091 source /usr/share/clickhouse-test/ci/attach_gdb.lib || true # FIXME: to not break old builds, clean on 2023-09-01 +# shellcheck disable=SC1091 +source /usr/share/clickhouse-test/ci/utils.lib + # install test configs /usr/share/clickhouse-test/config/install.sh @@ -90,30 +93,6 @@ sleep 5 attach_gdb_to_clickhouse || true # FIXME: to not break old builds, clean on 2023-09-01 -function run_with_retry() -{ - set +e - - local total_retries="$1" - shift - - local retry=0 - - until [ "$retry" -ge "$total_retries" ] - do - if "$@"; then - set -e - return - else - retry=$((retry + 1)) - sleep 3 - fi - done - - echo "Command '$*' failed after $total_retries retries, exiting" - exit 1 -} - function run_tests() { set -x @@ -161,9 +140,7 @@ function run_tests() ADDITIONAL_OPTIONS+=('--report-logs-stats') - clickhouse-test "00001_select_1" > /dev/null ||: - - run_with_retry 5 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" + run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ diff --git a/tests/ci/attach_gdb.lib b/tests/ci/attach_gdb.lib index 2df6243f796..e937cf6dba7 100644 --- a/tests/ci/attach_gdb.lib +++ b/tests/ci/attach_gdb.lib @@ -1,5 +1,7 @@ #!/bin/bash +source /usr/share/clickhouse-test/ci/utils.lib + function attach_gdb_to_clickhouse() { # Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog @@ -38,5 +40,5 @@ quit gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log & sleep 5 # gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s) - time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||: + run_with_retry 60 clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" } diff --git a/tests/ci/utils.lib b/tests/ci/utils.lib new file mode 100644 index 00000000000..95ad50f635b --- /dev/null +++ b/tests/ci/utils.lib @@ -0,0 +1,25 @@ +#!/bin/bash + +function run_with_retry() +{ + set +e + + local total_retries="$1" + shift + + local retry=0 + + until [ "$retry" -ge "$total_retries" ] + do + if "$@"; then + set -e + return + else + retry=$((retry + 1)) + sleep 5 + fi + done + + echo "Command '$*' failed after $total_retries retries, exiting" + exit 1 +} \ No newline at end of file From 185e106c1faff2201db3f33f7497ab4a43d1c7e6 Mon Sep 17 00:00:00 2001 From: Julio Jimenez Date: Fri, 30 Jun 2023 10:53:04 -0400 Subject: [PATCH 1067/1997] Please install NASM because NASM compiler cannot be found Signed-off-by: Julio Jimenez --- .github/workflows/nightly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index b508758ac7c..3f6d9b86fd6 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -159,7 +159,7 @@ jobs: - name: Set Up Build Tools run: | sudo apt-get update - sudo apt-get install -yq git cmake ccache ninja-build python3 yasm + sudo apt-get install -yq git cmake ccache ninja-build python3 yasm nasm sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" - name: Run build-wrapper run: | From 5ee7f47b03d3b741db80dae38dfb169f338d9dcc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 30 Jun 2023 17:09:56 +0200 Subject: [PATCH 1068/1997] Update autogenerated version to 23.7.1.1 and contributors --- cmake/autogenerated_versions.txt | 10 +++++----- .../System/StorageSystemContributors.generated.cpp | 9 +++++++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 015037b2de6..821b7b46855 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54475) +SET(VERSION_REVISION 54476) SET(VERSION_MAJOR 23) -SET(VERSION_MINOR 6) +SET(VERSION_MINOR 7) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 2fec796e73efda10a538a03af3205ce8ffa1b2de) -SET(VERSION_DESCRIBE v23.6.1.1-testing) -SET(VERSION_STRING 23.6.1.1) +SET(VERSION_GITHASH d1c7e13d08868cb04d3562dcced704dd577cb1df) +SET(VERSION_DESCRIBE v23.7.1.1-testing) +SET(VERSION_STRING 23.7.1.1) # end of autochange diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index f83ee3197fe..f84c554afc0 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -226,6 +226,7 @@ const char * auto_contributors[] { "Carbyn", "Carlos Rodríguez Hernández", "Caspian", + "Chang Chen", "Chao Ma", "Chao Wang", "CheSema", @@ -291,6 +292,7 @@ const char * auto_contributors[] { "Dmitry Belyavtsev", "Dmitry Bilunov", "Dmitry Galuza", + "Dmitry Kardymon", "Dmitry Krylov", "Dmitry Luhtionov", "Dmitry Moskowski", @@ -408,6 +410,7 @@ const char * auto_contributors[] { "HeenaBansal2009", "Hiroaki Nakamura", "Hongbin", + "Hongbin Ma", "Hosun Lee", "HuFuwang", "Hui Wang", @@ -491,6 +494,7 @@ const char * auto_contributors[] { "Josh Taylor", "João Figueiredo", "Julian Gilyadov", + "Julian Maicher", "Julian Zhou", "Julio Jimenez", "Junfu Wu", @@ -917,6 +921,7 @@ const char * auto_contributors[] { "Thom O'Connor", "Thomas Berdy", "Thomas Casteleyn", + "Thomas Panetti", "Tian Xinhui", "Tiaonmmn", "Tigran Khudaverdyan", @@ -1022,6 +1027,7 @@ const char * auto_contributors[] { "Y Lu", "Yakko Majuri", "Yakov Olkhovskiy", + "YalalovSM", "Yangkuan Liu", "Yatian Xu", "Yatsishin Ilya", @@ -1300,6 +1306,7 @@ const char * auto_contributors[] { "kashwy", "keenwolf", "kevin wan", + "kevinyhzou", "kgurjev", "khamadiev", "kigerzhang", @@ -1545,6 +1552,7 @@ const char * auto_contributors[] { "tiger.yan", "tison", "topvisor", + "tpanetti", "turbo jason", "tyrionhuang", "ubuntu", @@ -1673,6 +1681,7 @@ const char * auto_contributors[] { "董海镔", "谢磊", "贾顺名(Jarvis)", + "郭小龙", "陈小玉", "靳阳", "黄朝晖", From 8ed1ec49d181123227192e7dfac6ee0119a2e8d7 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 30 Jun 2023 15:21:13 +0000 Subject: [PATCH 1069/1997] Update version_date.tsv and changelogs after v23.6.1.1524-stable --- SECURITY.md | 1 + docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v23.6.1.1524-stable.md | 301 +++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 6 files changed, 306 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v23.6.1.1524-stable.md diff --git a/SECURITY.md b/SECURITY.md index 1864eb6e9e5..4ba5f13d09c 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,6 +13,7 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 23.6 | ✔️ | | 23.5 | ✔️ | | 23.4 | ✔️ | | 23.3 | ✔️ | diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 6d53a6f4c51..f13fcdc14d6 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.5.4.25" +ARG VERSION="23.6.1.1524" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 91b22346f13..5e5be3f6d73 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.5.4.25" +ARG VERSION="23.6.1.1524" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 0ed0e4e1168..8693193455f 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.5.4.25" +ARG VERSION="23.6.1.1524" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v23.6.1.1524-stable.md b/docs/changelogs/v23.6.1.1524-stable.md new file mode 100644 index 00000000000..6d295d61ef4 --- /dev/null +++ b/docs/changelogs/v23.6.1.1524-stable.md @@ -0,0 +1,301 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.6.1.1524-stable (d1c7e13d088) FIXME as compared to v23.5.1.3174-stable (2fec796e73e) + +#### Backward Incompatible Change +* Delete feature `do_not_evict_index_and_mark_files` in the fs cache. This feature was only making things worse. [#51253](https://github.com/ClickHouse/ClickHouse/pull/51253) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove ALTER support for experimental LIVE VIEW. [#51287](https://github.com/ClickHouse/ClickHouse/pull/51287) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Add setting `session_timezone`, it is used as default timezone for session when not explicitly specified. [#44149](https://github.com/ClickHouse/ClickHouse/pull/44149) ([Andrey Zvonov](https://github.com/zvonand)). +* Added overlay database engine and representation of a directory as a database This commit adds 4 databases: 1. DatabaseOverlay: Implements the IDatabase interface. Allow to combine multiple databases, such as FileSystem and Memory. Internally, it stores a vector with other database pointers and proxies requests to them in turn until it is executed successfully. 2. DatabaseFilesystem: allows to read-only interact with files stored on the file system. Internally, it uses TableFunctionFile to implicitly load file when a user requests the table. Result of TableFunctionFile call cached inside to provide quick access. 3. DatabaseS3: allows to read-only interact with s3 storage. It uses TableFunctionS3 to implicitly load table from s3 4. DatabaseHDFS: allows to interact with hdfs storage. It uses TableFunctionHDFS to implicitly load table from hdfs. [#48821](https://github.com/ClickHouse/ClickHouse/pull/48821) ([alekseygolub](https://github.com/alekseygolub)). +* Add a new setting named `use_mysql_types_in_show_columns` to alter the `SHOW COLUMNS` SQL statement to display MySQL equivalent types when a client is connected via the MySQL compatibility port. [#49577](https://github.com/ClickHouse/ClickHouse/pull/49577) ([Thomas Panetti](https://github.com/tpanetti)). +* Added option `--rename_files_after_processing `. This closes [#34207](https://github.com/ClickHouse/ClickHouse/issues/34207). [#49626](https://github.com/ClickHouse/ClickHouse/pull/49626) ([alekseygolub](https://github.com/alekseygolub)). +* 1. Add `TableFunctionRedis` 3. Add table engine Redis 4. Add `RedisCommon` which contains Redis related tools and types 5. Support `equals` and `in` filter push down into Redis. [#50150](https://github.com/ClickHouse/ClickHouse/pull/50150) ([JackyWoo](https://github.com/JackyWoo)). +* Allow to skip empty files in file/s3/url/hdfs table functions using settings `s3_skip_empty_files`, `hdfs_skip_empty_files`, `engine_file_skip_empty_files`, `engine_url_skip_empty_files`. [#50364](https://github.com/ClickHouse/ClickHouse/pull/50364) ([Kruglov Pavel](https://github.com/Avogar)). +* Clickhouse-client can now be called with a connection instead of "--host", "--port", "--user" etc. [#50689](https://github.com/ClickHouse/ClickHouse/pull/50689) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Codec DEFLATE_QPL is now controlled via server setting "enable_deflate_qpl_codec" (default: false) instead of setting "allow_experimental_codecs". This marks QPL_DEFLATE non-experimental. [#50775](https://github.com/ClickHouse/ClickHouse/pull/50775) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Performance Improvement +* Improve performance with enabled QueryProfiler using thread-local timer_id instead of global object. [#48778](https://github.com/ClickHouse/ClickHouse/pull/48778) ([Jiebin Sun](https://github.com/jiebinn)). +* Rewrite CapnProto input/output format to improve its performance. Map column names and CapnProto fields case insensitive, fix reading/writing of nested structure fields. [#49752](https://github.com/ClickHouse/ClickHouse/pull/49752) ([Kruglov Pavel](https://github.com/Avogar)). +* Optimize parquet write performance for parallel threads. [#50102](https://github.com/ClickHouse/ClickHouse/pull/50102) ([Hongbin Ma](https://github.com/binmahone)). +* ### Documentation entry for user-facing changes Disable `parallelize_output_from_storages` for processing MATERIALIZED VIEWs and storages with one block only. [#50214](https://github.com/ClickHouse/ClickHouse/pull/50214) ([Azat Khuzhin](https://github.com/azat)). +* Merge PR https://github.com/ClickHouse/ClickHouse/pull/46558 (Avoid processing already sorted data). Avoid block permutation during sort if the block is already sorted. [#50697](https://github.com/ClickHouse/ClickHouse/pull/50697) ([Maksim Kita](https://github.com/kitaisreal)). +* In the earlier PRs ([#50062](https://github.com/ClickHouse/ClickHouse/issues/50062), [#50307](https://github.com/ClickHouse/ClickHouse/issues/50307)), we used to propose an optimization pattern which transforms the predicates with toYear/toYYYYMM into its equivalent but converter-free form. This transformation could bring significant performance impact to some workloads, such as SSB. However, as issue [#50628](https://github.com/ClickHouse/ClickHouse/issues/50628) indicated, these two PRs would introduce some issues which may results in incomplete query results, and as a result, they were reverted by [#50629](https://github.com/ClickHouse/ClickHouse/issues/50629). [#50951](https://github.com/ClickHouse/ClickHouse/pull/50951) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Make multiple list requests to ZooKeeper in parallel to speed up reading from system.zookeeper table. [#51042](https://github.com/ClickHouse/ClickHouse/pull/51042) ([Alexander Gololobov](https://github.com/davenger)). +* Speedup initialization of DateTime lookup tables for time zones. This should reduce startup/connect time of clickhouse client especially in debug build as it is rather heavy. [#51347](https://github.com/ClickHouse/ClickHouse/pull/51347) ([Alexander Gololobov](https://github.com/davenger)). + +#### Improvement +* Allow to cast IPv6 to IPv4 address for CIDR ::ffff:0:0/96 (IPv4-mapped addresses). [#49759](https://github.com/ClickHouse/ClickHouse/pull/49759) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Update MongoDB protocol to support MongoDB 5.1 version and newer. Support for the versions with the old protocol (<3.6) is preserved. Closes [#45621](https://github.com/ClickHouse/ClickHouse/issues/45621), [#49879](https://github.com/ClickHouse/ClickHouse/issues/49879). [#50061](https://github.com/ClickHouse/ClickHouse/pull/50061) ([Nikolay Degterinsky](https://github.com/evillique)). +* Improved scheduling of merge selecting and cleanup tasks in `ReplicatedMergeTree`. The tasks will not be executed too frequently when there's nothing to merge or cleanup. Added settings `max_merge_selecting_sleep_ms`, `merge_selecting_sleep_slowdown_factor`, `max_cleanup_delay_period` and `cleanup_thread_preferred_points_per_iteration`. It should close [#31919](https://github.com/ClickHouse/ClickHouse/issues/31919). [#50107](https://github.com/ClickHouse/ClickHouse/pull/50107) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Support parallel replicas with the analyzer. [#50441](https://github.com/ClickHouse/ClickHouse/pull/50441) ([Raúl Marín](https://github.com/Algunenano)). +* Add setting `input_format_max_bytes_to_read_for_schema_inference` to limit the number of bytes to read in schema inference. Closes [#50577](https://github.com/ClickHouse/ClickHouse/issues/50577). [#50592](https://github.com/ClickHouse/ClickHouse/pull/50592) ([Kruglov Pavel](https://github.com/Avogar)). +* Respect setting input_format_as_default in schema inference. [#50602](https://github.com/ClickHouse/ClickHouse/pull/50602) ([Kruglov Pavel](https://github.com/Avogar)). +* Make filter push down through cross join. [#50605](https://github.com/ClickHouse/ClickHouse/pull/50605) ([Han Fei](https://github.com/hanfei1991)). +* Actual lz4 version is used now. [#50621](https://github.com/ClickHouse/ClickHouse/pull/50621) ([Nikita Taranov](https://github.com/nickitat)). +* Allow to skip trailing empty lines in CSV/TSV/CustomSeparated formats via settings `input_format_csv_skip_trailing_empty_lines`, `input_format_tsv_skip_trailing_empty_lines` and `input_format_custom_skip_trailing_empty_lines` (disabled by default). Closes [#49315](https://github.com/ClickHouse/ClickHouse/issues/49315). [#50635](https://github.com/ClickHouse/ClickHouse/pull/50635) ([Kruglov Pavel](https://github.com/Avogar)). +* Functions "toDateOrDefault|OrNull()" and "accuateCast[OrDefault|OrNull]()" now correctly parse numeric arguments. [#50709](https://github.com/ClickHouse/ClickHouse/pull/50709) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Currently, the csv input format can not parse the csv file with whitespace or \t field delimiter, and these delimiters is supported in spark. [#50712](https://github.com/ClickHouse/ClickHouse/pull/50712) ([KevinyhZou](https://github.com/KevinyhZou)). +* Settings `number_of_mutations_to_delay` and `number_of_mutations_to_throw` are enabled by default now with values 500 and 1000 respectively. [#50726](https://github.com/ClickHouse/ClickHouse/pull/50726) ([Anton Popov](https://github.com/CurtizJ)). +* Keeper improvement: add feature flags for Keeper API. Each feature flag can be disabled or enabled by defining it under `keeper_server.feature_flags` config. E.g. to enable `CheckNotExists` request, `keeper_server.feature_flags.check_not_exists` should be set to `1` on Keeper. [#50796](https://github.com/ClickHouse/ClickHouse/pull/50796) ([Antonio Andelic](https://github.com/antonio2368)). +* The dashboard correctly shows missing values. This closes [#50831](https://github.com/ClickHouse/ClickHouse/issues/50831). [#50832](https://github.com/ClickHouse/ClickHouse/pull/50832) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* CGroups metrics related to CPU are replaced with one metric, `CGroupMaxCPU` for better usability. The `Normalized` CPU usage metrics will be normalized to CGroups limits instead of the total number of CPUs when they are set. This closes [#50836](https://github.com/ClickHouse/ClickHouse/issues/50836). [#50835](https://github.com/ClickHouse/ClickHouse/pull/50835) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Relax the thresholds for "too many parts" to be more modern. Return the backpressure during long-running insert queries. [#50856](https://github.com/ClickHouse/ClickHouse/pull/50856) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added the possibility to use date and time arguments in syslog timestamp format in functions parseDateTimeBestEffort*() and parseDateTime64BestEffort*(). [#50925](https://github.com/ClickHouse/ClickHouse/pull/50925) ([Victor Krasnov](https://github.com/sirvickr)). +* Suggest using `APPEND` or `TRUNCATE` for `INTO OUTFILE` when file exists. [#50950](https://github.com/ClickHouse/ClickHouse/pull/50950) ([alekar](https://github.com/alekar)). +* Add embedded keeper-client to standalone keeper binary. [#50964](https://github.com/ClickHouse/ClickHouse/pull/50964) ([pufit](https://github.com/pufit)). +* Command line parameter "--password" in clickhouse-client can now be specified only once. [#50966](https://github.com/ClickHouse/ClickHouse/pull/50966) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix data lakes slowness because of synchronous head requests. (Related to Iceberg/Deltalake/Hudi being slow with a lot of files). [#50976](https://github.com/ClickHouse/ClickHouse/pull/50976) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Use `hash_of_all_files` from `system.parts` to check identity of parts during on-cluster backups. [#50997](https://github.com/ClickHouse/ClickHouse/pull/50997) ([Vitaly Baranov](https://github.com/vitlibar)). +* The system table zookeeper_connection connected_time identifies the time when the connection is established (standard format), and session_uptime_elapsed_seconds is added, which labels the duration of the established connection session (in seconds). [#51026](https://github.com/ClickHouse/ClickHouse/pull/51026) ([郭小龙](https://github.com/guoxiaolongzte)). +* Show halves of checksums in `system.parts`, `system.projection_parts` and in error messages in the correct order. [#51040](https://github.com/ClickHouse/ClickHouse/pull/51040) ([Vitaly Baranov](https://github.com/vitlibar)). +* Do not replicate `ALTER PARTITION` queries and mutations through `Replicated` database if it has only one shard and the underlying table is `ReplicatedMergeTree`. [#51049](https://github.com/ClickHouse/ClickHouse/pull/51049) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Improve the progress bar for file/s3/hdfs/url table functions by using chunk size from source data and using incremental total size counting in each thread. Fix the progress bar for *Cluster functions. This closes [#47250](https://github.com/ClickHouse/ClickHouse/issues/47250). [#51088](https://github.com/ClickHouse/ClickHouse/pull/51088) ([Kruglov Pavel](https://github.com/Avogar)). +* Add total_bytes_to_read to Progress packet in TCP protocol for better Progress bar. [#51158](https://github.com/ClickHouse/ClickHouse/pull/51158) ([Kruglov Pavel](https://github.com/Avogar)). +* Better checking of data parts on disks with filesystem cache. [#51164](https://github.com/ClickHouse/ClickHouse/pull/51164) ([Anton Popov](https://github.com/CurtizJ)). +* Disable cache setting `do_not_evict_index_and_mark_files` (Was enabled in `23.5`). [#51222](https://github.com/ClickHouse/ClickHouse/pull/51222) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix sometimes not correct current_elements_num in fs cache. [#51242](https://github.com/ClickHouse/ClickHouse/pull/51242) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add random sleep before merges/mutations execution to split load more evenly between replicas in case of zero-copy replication. [#51282](https://github.com/ClickHouse/ClickHouse/pull/51282) ([alesapin](https://github.com/alesapin)). +* The function `transform` as well as `CASE` with value matching started to support all data types. This closes [#29730](https://github.com/ClickHouse/ClickHouse/issues/29730). This closes [#32387](https://github.com/ClickHouse/ClickHouse/issues/32387). This closes [#50827](https://github.com/ClickHouse/ClickHouse/issues/50827). This closes [#31336](https://github.com/ClickHouse/ClickHouse/issues/31336). This closes [#40493](https://github.com/ClickHouse/ClickHouse/issues/40493). [#51351](https://github.com/ClickHouse/ClickHouse/pull/51351) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* We have found a bug in LLVM that makes the usage of `compile_expressions` setting unsafe. It is disabled by default. [#51368](https://github.com/ClickHouse/ClickHouse/pull/51368) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Issue [#50220](https://github.com/ClickHouse/ClickHouse/issues/50220) reports a core in `grace_hash` join. We finally reproduce the exception on local, and found that the issue is related to the failure of creating temporary file. Somehow this is triggered in https://github.com/ClickHouse/ClickHouse/pull/49816 https://github.com/ClickHouse/ClickHouse/pull/49483. [#51382](https://github.com/ClickHouse/ClickHouse/pull/51382) ([lgbo](https://github.com/lgbo-ustc)). + +#### Build/Testing/Packaging Improvement +* Update contrib/re2 to 2023-06-02. [#50949](https://github.com/ClickHouse/ClickHouse/pull/50949) ([Yuriy Chernyshov](https://github.com/georgthegreat)). +* ClickHouse server will print the list of changed settings on fatal errors. This closes [#51137](https://github.com/ClickHouse/ClickHouse/issues/51137). [#51138](https://github.com/ClickHouse/ClickHouse/pull/51138) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* In https://github.com/ClickHouse/ClickHouse/pull/51143 the fasstests failed, but the status wasn't created because of the chown `file not found`. This addresses it. Decrease the default values for `http-max-field-value-size` and `http_max_field_name_size` to 128K. [#51163](https://github.com/ClickHouse/ClickHouse/pull/51163) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update Ubuntu version in docker containers. [#51180](https://github.com/ClickHouse/ClickHouse/pull/51180) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Allow building ClickHouse with clang-17. [#51300](https://github.com/ClickHouse/ClickHouse/pull/51300) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* [SQLancer](https://github.com/sqlancer/sqlancer) check is considered stable as bugs that were triggered by it are fixed. Now failures of SQLancer check will be reported as failed check status. [#51340](https://github.com/ClickHouse/ClickHouse/pull/51340) ([Ilya Yatsishin](https://github.com/qoega)). +* Making our CI even better. [#51494](https://github.com/ClickHouse/ClickHouse/pull/51494) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Split huge `RUN` in Dockerfile into smaller conditional. Install the necessary tools on demand in the same `RUN` layer, and remove them after that. Upgrade the OS only once at the beginning. Use a modern way to check the signed repository. Downgrade the base repo to ubuntu:20.04 to address the issues on older docker versions. Upgrade golang version to address golang vulnerabilities. [#51504](https://github.com/ClickHouse/ClickHouse/pull/51504) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* This a follow-up for [#51504](https://github.com/ClickHouse/ClickHouse/issues/51504), the cleanup was lost during refactoring. [#51564](https://github.com/ClickHouse/ClickHouse/pull/51564) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Report loading status for executable dictionaries correctly [#48775](https://github.com/ClickHouse/ClickHouse/pull/48775) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Proper mutation of skip indices and projections [#50104](https://github.com/ClickHouse/ClickHouse/pull/50104) ([Amos Bird](https://github.com/amosbird)). +* Cleanup moving parts [#50489](https://github.com/ClickHouse/ClickHouse/pull/50489) ([vdimir](https://github.com/vdimir)). +* Fix backward compatibility for IP types hashing in aggregate functions [#50551](https://github.com/ClickHouse/ClickHouse/pull/50551) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix Log family table return wrong rows count after truncate [#50585](https://github.com/ClickHouse/ClickHouse/pull/50585) ([flynn](https://github.com/ucasfl)). +* Fix bug in `uniqExact` parallel merging [#50590](https://github.com/ClickHouse/ClickHouse/pull/50590) ([Nikita Taranov](https://github.com/nickitat)). +* Revert recent grace hash join changes [#50699](https://github.com/ClickHouse/ClickHouse/pull/50699) ([vdimir](https://github.com/vdimir)). +* Query Cache: Try to fix bad cast from ColumnConst to ColumnVector [#50704](https://github.com/ClickHouse/ClickHouse/pull/50704) ([Robert Schulze](https://github.com/rschu1ze)). +* Do not read all the columns from right GLOBAL JOIN table. [#50721](https://github.com/ClickHouse/ClickHouse/pull/50721) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Avoid storing logs in Keeper containing unknown operation [#50751](https://github.com/ClickHouse/ClickHouse/pull/50751) ([Antonio Andelic](https://github.com/antonio2368)). +* SummingMergeTree support for DateTime64 [#50797](https://github.com/ClickHouse/ClickHouse/pull/50797) ([Jordi Villar](https://github.com/jrdi)). +* Add compat setting for non-const timezones [#50834](https://github.com/ClickHouse/ClickHouse/pull/50834) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix type of LDAP server params hash in cache entry [#50865](https://github.com/ClickHouse/ClickHouse/pull/50865) ([Julian Maicher](https://github.com/jmaicher)). +* Fallback to parsing big integer from String instead of exception in Parquet format [#50873](https://github.com/ClickHouse/ClickHouse/pull/50873) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix checking the lock file too often while writing a backup [#50889](https://github.com/ClickHouse/ClickHouse/pull/50889) ([Vitaly Baranov](https://github.com/vitlibar)). +* Do not apply projection if read-in-order was enabled. [#50923](https://github.com/ClickHouse/ClickHouse/pull/50923) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix race azure blob storage iterator [#50936](https://github.com/ClickHouse/ClickHouse/pull/50936) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix erroneous `sort_description` propagation in `CreatingSets` [#50955](https://github.com/ClickHouse/ClickHouse/pull/50955) ([Nikita Taranov](https://github.com/nickitat)). +* Fix iceberg V2 optional metadata parsing [#50974](https://github.com/ClickHouse/ClickHouse/pull/50974) ([Kseniia Sumarokova](https://github.com/kssenii)). +* MaterializedMySQL: Keep parentheses for empty table overrides [#50977](https://github.com/ClickHouse/ClickHouse/pull/50977) ([Val Doroshchuk](https://github.com/valbok)). +* Fix crash in BackupCoordinationStageSync::setError() [#51012](https://github.com/ClickHouse/ClickHouse/pull/51012) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)). +* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fix ineffective query cache for SELECTs with subqueries [#51132](https://github.com/ClickHouse/ClickHouse/pull/51132) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix Set index with constant nullable comparison. [#51205](https://github.com/ClickHouse/ClickHouse/pull/51205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix a crash in s3 and s3Cluster functions [#51209](https://github.com/ClickHouse/ClickHouse/pull/51209) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix core dump when compile expression [#51231](https://github.com/ClickHouse/ClickHouse/pull/51231) ([LiuNeng](https://github.com/liuneng1994)). +* Fix use-after-free in StorageURL when switching URLs [#51260](https://github.com/ClickHouse/ClickHouse/pull/51260) ([Michael Kolupaev](https://github.com/al13n321)). +* Updated check for parameterized view [#51272](https://github.com/ClickHouse/ClickHouse/pull/51272) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix multiple writing of same file to backup [#51299](https://github.com/ClickHouse/ClickHouse/pull/51299) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix fuzzer failure in ActionsDAG [#51301](https://github.com/ClickHouse/ClickHouse/pull/51301) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove garbage from function `transform` [#51350](https://github.com/ClickHouse/ClickHouse/pull/51350) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix MSan report in lowerUTF8/upperUTF8 [#51371](https://github.com/ClickHouse/ClickHouse/pull/51371) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fs cache: fix a bit incorrect use_count after [#44985](https://github.com/ClickHouse/ClickHouse/issues/44985) [#51406](https://github.com/ClickHouse/ClickHouse/pull/51406) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix segfault in MathUnary [#51499](https://github.com/ClickHouse/ClickHouse/pull/51499) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix logical assert in `tupleElement()` with default values [#51534](https://github.com/ClickHouse/ClickHouse/pull/51534) ([Robert Schulze](https://github.com/rschu1ze)). +* fs cache: remove file from opened file cache immediately when evicting file [#51596](https://github.com/ClickHouse/ClickHouse/pull/51596) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Deprecate delete-on-destroy.txt [#49181](https://github.com/ClickHouse/ClickHouse/pull/49181) ([Alexander Gololobov](https://github.com/davenger)). +* Attempt to increase the general runners' survival rate [#49283](https://github.com/ClickHouse/ClickHouse/pull/49283) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Refactor subqueries for IN [#49570](https://github.com/ClickHouse/ClickHouse/pull/49570) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Test plan optimization analyzer [#50095](https://github.com/ClickHouse/ClickHouse/pull/50095) ([Igor Nikonov](https://github.com/devcrafter)). +* Implement endianness-independent serialization for quantileTiming [#50324](https://github.com/ClickHouse/ClickHouse/pull/50324) ([ltrk2](https://github.com/ltrk2)). +* require `finalize()` call before d-tor for all writes buffers [#50395](https://github.com/ClickHouse/ClickHouse/pull/50395) ([Sema Checherinda](https://github.com/CheSema)). +* Implement big-endian support for the deterministic reservoir sampler [#50405](https://github.com/ClickHouse/ClickHouse/pull/50405) ([ltrk2](https://github.com/ltrk2)). +* Fix compilation error on big-endian platforms [#50406](https://github.com/ClickHouse/ClickHouse/pull/50406) ([ltrk2](https://github.com/ltrk2)). +* Attach gdb in stateless tests [#50487](https://github.com/ClickHouse/ClickHouse/pull/50487) ([Kruglov Pavel](https://github.com/Avogar)). +* JIT infrastructure refactoring [#50531](https://github.com/ClickHouse/ClickHouse/pull/50531) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer: Do not apply Query Tree optimizations on shards [#50584](https://github.com/ClickHouse/ClickHouse/pull/50584) ([Dmitry Novik](https://github.com/novikd)). +* Increase max array size in group bitmap [#50620](https://github.com/ClickHouse/ClickHouse/pull/50620) ([Kruglov Pavel](https://github.com/Avogar)). +* Misc Annoy index improvements [#50661](https://github.com/ClickHouse/ClickHouse/pull/50661) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix reading negative decimals in avro format [#50668](https://github.com/ClickHouse/ClickHouse/pull/50668) ([Kruglov Pavel](https://github.com/Avogar)). +* Unify priorities for connection pools [#50675](https://github.com/ClickHouse/ClickHouse/pull/50675) ([Sergei Trifonov](https://github.com/serxa)). +* Prostpone check of outdated parts [#50676](https://github.com/ClickHouse/ClickHouse/pull/50676) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Unify priorities: `IExecutableTask`s [#50677](https://github.com/ClickHouse/ClickHouse/pull/50677) ([Sergei Trifonov](https://github.com/serxa)). +* Disable grace_hash join in stress tests [#50693](https://github.com/ClickHouse/ClickHouse/pull/50693) ([vdimir](https://github.com/vdimir)). +* ReverseTransform small improvement [#50698](https://github.com/ClickHouse/ClickHouse/pull/50698) ([Maksim Kita](https://github.com/kitaisreal)). +* Support OPTIMIZE for temporary tables [#50710](https://github.com/ClickHouse/ClickHouse/pull/50710) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Refactor reading from object storages [#50711](https://github.com/ClickHouse/ClickHouse/pull/50711) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix data race in log message of cached buffer [#50723](https://github.com/ClickHouse/ClickHouse/pull/50723) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add new keywords into projections documentation [#50743](https://github.com/ClickHouse/ClickHouse/pull/50743) ([YalalovSM](https://github.com/YalalovSM)). +* Fix build for aarch64 (temporary disable azure) [#50770](https://github.com/ClickHouse/ClickHouse/pull/50770) ([alesapin](https://github.com/alesapin)). +* Update version after release [#50772](https://github.com/ClickHouse/ClickHouse/pull/50772) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v23.5.1.3174-stable [#50774](https://github.com/ClickHouse/ClickHouse/pull/50774) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update CHANGELOG.md [#50788](https://github.com/ClickHouse/ClickHouse/pull/50788) ([Ilya Yatsishin](https://github.com/qoega)). +* Update version_date.tsv and changelogs after v23.2.7.32-stable [#50809](https://github.com/ClickHouse/ClickHouse/pull/50809) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Desctructing --> Destructing [#50810](https://github.com/ClickHouse/ClickHouse/pull/50810) ([Robert Schulze](https://github.com/rschu1ze)). +* Don't mark a part as broken on `Poco::TimeoutException` [#50811](https://github.com/ClickHouse/ClickHouse/pull/50811) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Rename azure_blob_storage to azureBlobStorage [#50812](https://github.com/ClickHouse/ClickHouse/pull/50812) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix ParallelReadBuffer seek [#50820](https://github.com/ClickHouse/ClickHouse/pull/50820) ([Michael Kolupaev](https://github.com/al13n321)). +* [RFC] Print git hash when crashing [#50823](https://github.com/ClickHouse/ClickHouse/pull/50823) ([Michael Kolupaev](https://github.com/al13n321)). +* Add tests for function "transform" [#50833](https://github.com/ClickHouse/ClickHouse/pull/50833) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v23.5.2.7-stable [#50844](https://github.com/ClickHouse/ClickHouse/pull/50844) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Updated changelog with azureBlobStorage table function & engine entry [#50850](https://github.com/ClickHouse/ClickHouse/pull/50850) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Update easy_tasks_sorted_ru.md [#50853](https://github.com/ClickHouse/ClickHouse/pull/50853) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Document x86 / ARM prerequisites for Docker image [#50867](https://github.com/ClickHouse/ClickHouse/pull/50867) ([Robert Schulze](https://github.com/rschu1ze)). +* MaterializedMySQL: Add test_named_collections [#50874](https://github.com/ClickHouse/ClickHouse/pull/50874) ([Val Doroshchuk](https://github.com/valbok)). +* Update version_date.tsv and changelogs after v22.8.18.31-lts [#50881](https://github.com/ClickHouse/ClickHouse/pull/50881) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.3.3.52-lts [#50882](https://github.com/ClickHouse/ClickHouse/pull/50882) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.4.3.48-stable [#50883](https://github.com/ClickHouse/ClickHouse/pull/50883) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* MaterializedMySQL: Add additional test case to insert_with_modify_binlog_checksum [#50884](https://github.com/ClickHouse/ClickHouse/pull/50884) ([Val Doroshchuk](https://github.com/valbok)). +* Update broken tests list [#50886](https://github.com/ClickHouse/ClickHouse/pull/50886) ([Dmitry Novik](https://github.com/novikd)). +* Fix LOGICAL_ERROR in snowflakeToDateTime*() [#50893](https://github.com/ClickHouse/ClickHouse/pull/50893) ([Robert Schulze](https://github.com/rschu1ze)). +* Tests with parallel replicas are no more "always green" [#50896](https://github.com/ClickHouse/ClickHouse/pull/50896) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Slightly more information in error message about cached disk [#50897](https://github.com/ClickHouse/ClickHouse/pull/50897) ([Michael Kolupaev](https://github.com/al13n321)). +* do not call finalize after exception [#50907](https://github.com/ClickHouse/ClickHouse/pull/50907) ([Sema Checherinda](https://github.com/CheSema)). +* Update Annoy docs [#50912](https://github.com/ClickHouse/ClickHouse/pull/50912) ([Robert Schulze](https://github.com/rschu1ze)). +* A bit safer UserDefinedSQLFunctionVisitor [#50913](https://github.com/ClickHouse/ClickHouse/pull/50913) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update contribe/orc in .gitmodules [#50920](https://github.com/ClickHouse/ClickHouse/pull/50920) ([San](https://github.com/santrancisco)). +* MaterializedMySQL: Add missing DROP DATABASE for tests [#50924](https://github.com/ClickHouse/ClickHouse/pull/50924) ([Val Doroshchuk](https://github.com/valbok)). +* Fix 'Illegal column timezone' in stress tests [#50929](https://github.com/ClickHouse/ClickHouse/pull/50929) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix tests sanity checks and avoid dropping system.query_log table [#50934](https://github.com/ClickHouse/ClickHouse/pull/50934) ([Azat Khuzhin](https://github.com/azat)). +* Fix tests for throttling by allowing more margin of error for trottling event [#50935](https://github.com/ClickHouse/ClickHouse/pull/50935) ([Azat Khuzhin](https://github.com/azat)). +* 01746_convert_type_with_default: Temporarily disable flaky test [#50937](https://github.com/ClickHouse/ClickHouse/pull/50937) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix the statless tests image for old commits [#50947](https://github.com/ClickHouse/ClickHouse/pull/50947) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix logic in `AsynchronousBoundedReadBuffer::seek` [#50952](https://github.com/ClickHouse/ClickHouse/pull/50952) ([Nikita Taranov](https://github.com/nickitat)). +* Uncomment flaky test (01746_convert_type_with_default) [#50954](https://github.com/ClickHouse/ClickHouse/pull/50954) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Fix keeper-client help message [#50965](https://github.com/ClickHouse/ClickHouse/pull/50965) ([pufit](https://github.com/pufit)). +* fix build issue on clang 15 [#50967](https://github.com/ClickHouse/ClickHouse/pull/50967) ([Chang chen](https://github.com/baibaichen)). +* Docs: Fix embedded video link [#50972](https://github.com/ClickHouse/ClickHouse/pull/50972) ([Robert Schulze](https://github.com/rschu1ze)). +* Change submodule capnproto to it's fork in ClickHouse [#50987](https://github.com/ClickHouse/ClickHouse/pull/50987) ([Kruglov Pavel](https://github.com/Avogar)). +* Attempt to make 01281_group_by_limit_memory_tracking not flaky [#50995](https://github.com/ClickHouse/ClickHouse/pull/50995) ([Dmitry Novik](https://github.com/novikd)). +* Fix flaky 02561_null_as_default_more_formats [#51001](https://github.com/ClickHouse/ClickHouse/pull/51001) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix flaky test_seekable_formats [#51002](https://github.com/ClickHouse/ClickHouse/pull/51002) ([Kruglov Pavel](https://github.com/Avogar)). +* Follow-up to [#50448](https://github.com/ClickHouse/ClickHouse/issues/50448) [#51006](https://github.com/ClickHouse/ClickHouse/pull/51006) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix a versions' tweak for tagged commits, improve version_helper [#51035](https://github.com/ClickHouse/ClickHouse/pull/51035) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Sqlancer has changed master to main [#51060](https://github.com/ClickHouse/ClickHouse/pull/51060) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Do not spam sqlancer build log [#51061](https://github.com/ClickHouse/ClickHouse/pull/51061) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Refactor IColumn::forEachSubcolumn to make it slightly harder to implement incorrectly [#51072](https://github.com/ClickHouse/ClickHouse/pull/51072) ([Michael Kolupaev](https://github.com/al13n321)). +* MaterializedMySQL: Rename materialize_with_ddl.py -> materialized_with_ddl [#51074](https://github.com/ClickHouse/ClickHouse/pull/51074) ([Val Doroshchuk](https://github.com/valbok)). +* Improve woboq browser report [#51077](https://github.com/ClickHouse/ClickHouse/pull/51077) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix for part_names_mutex used after destruction [#51099](https://github.com/ClickHouse/ClickHouse/pull/51099) ([Alexander Gololobov](https://github.com/davenger)). +* Fix ColumnConst::forEachSubcolumn missing from previous PR [#51102](https://github.com/ClickHouse/ClickHouse/pull/51102) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix the test 02783_parsedatetimebesteffort_syslog flakiness [#51112](https://github.com/ClickHouse/ClickHouse/pull/51112) ([Victor Krasnov](https://github.com/sirvickr)). +* Compatibility with clang-17 [#51114](https://github.com/ClickHouse/ClickHouse/pull/51114) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make more parallel get requests to ZooKeeper in system.zookeeper [#51118](https://github.com/ClickHouse/ClickHouse/pull/51118) ([Alexander Gololobov](https://github.com/davenger)). +* Fix 02703_max_local_write_bandwidth flakiness [#51120](https://github.com/ClickHouse/ClickHouse/pull/51120) ([Azat Khuzhin](https://github.com/azat)). +* Update version_date.tsv and changelogs after v23.5.3.24-stable [#51121](https://github.com/ClickHouse/ClickHouse/pull/51121) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.4.4.16-stable [#51122](https://github.com/ClickHouse/ClickHouse/pull/51122) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.3.4.17-lts [#51123](https://github.com/ClickHouse/ClickHouse/pull/51123) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.8.19.10-lts [#51124](https://github.com/ClickHouse/ClickHouse/pull/51124) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix typo [#51126](https://github.com/ClickHouse/ClickHouse/pull/51126) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Slightly better diagnostics [#51127](https://github.com/ClickHouse/ClickHouse/pull/51127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Small fix in `MergeTreePrefetchedReadPool` [#51131](https://github.com/ClickHouse/ClickHouse/pull/51131) ([Nikita Taranov](https://github.com/nickitat)). +* Don't report table function accesses to system.errors [#51147](https://github.com/ClickHouse/ClickHouse/pull/51147) ([Raúl Marín](https://github.com/Algunenano)). +* Fix SQLancer branch name [#51148](https://github.com/ClickHouse/ClickHouse/pull/51148) ([Ilya Yatsishin](https://github.com/qoega)). +* Revert "Added ability to implicitly use file/hdfs/s3 table functions in clickhouse-local" [#51149](https://github.com/ClickHouse/ClickHouse/pull/51149) ([Alexander Tokmakov](https://github.com/tavplubix)). +* More profile events for fs cache [#51161](https://github.com/ClickHouse/ClickHouse/pull/51161) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Unforget to pass callback to readBigAt() in ParallelReadBuffer [#51165](https://github.com/ClickHouse/ClickHouse/pull/51165) ([Michael Kolupaev](https://github.com/al13n321)). +* Update README.md [#51179](https://github.com/ClickHouse/ClickHouse/pull/51179) ([Tyler Hannan](https://github.com/tylerhannan)). +* Update exception message [#51187](https://github.com/ClickHouse/ClickHouse/pull/51187) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Split long test 02149_schema_inference_formats_with_schema into several tests to avoid timeout in debug [#51197](https://github.com/ClickHouse/ClickHouse/pull/51197) ([Kruglov Pavel](https://github.com/Avogar)). +* Avoid initializing DateLUT from emptyArray function registration [#51199](https://github.com/ClickHouse/ClickHouse/pull/51199) ([Alexander Gololobov](https://github.com/davenger)). +* Suppress check for covered parts in ZooKeeper [#51207](https://github.com/ClickHouse/ClickHouse/pull/51207) ([Alexander Tokmakov](https://github.com/tavplubix)). +* One more profile event for fs cache [#51223](https://github.com/ClickHouse/ClickHouse/pull/51223) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Typo: passowrd_sha256_hex --> password_sha256_hex [#51233](https://github.com/ClickHouse/ClickHouse/pull/51233) ([Robert Schulze](https://github.com/rschu1ze)). +* Introduce settings enum field with auto-generated values list [#51237](https://github.com/ClickHouse/ClickHouse/pull/51237) ([Sergei Trifonov](https://github.com/serxa)). +* Drop session if we fail to get Keeper API version [#51238](https://github.com/ClickHouse/ClickHouse/pull/51238) ([Alexander Gololobov](https://github.com/davenger)). +* Revert "Fix a crash in s3 and s3Cluster functions" [#51239](https://github.com/ClickHouse/ClickHouse/pull/51239) ([Alexander Tokmakov](https://github.com/tavplubix)). +* fix flaky `AsyncLoader` destructor [#51245](https://github.com/ClickHouse/ClickHouse/pull/51245) ([Sergei Trifonov](https://github.com/serxa)). +* Docs: little cleanup of configuration-files.md [#51249](https://github.com/ClickHouse/ClickHouse/pull/51249) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix a stupid bug on Replicated database recovery [#51252](https://github.com/ClickHouse/ClickHouse/pull/51252) ([Alexander Tokmakov](https://github.com/tavplubix)). +* FileCache: tryReserve() slight improvement [#51259](https://github.com/ClickHouse/ClickHouse/pull/51259) ([Igor Nikonov](https://github.com/devcrafter)). +* Ugly hotfix for "terminate on uncaught exception" in WriteBufferFromOStream [#51265](https://github.com/ClickHouse/ClickHouse/pull/51265) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Avoid too many calls to Poco::Logger::get [#51266](https://github.com/ClickHouse/ClickHouse/pull/51266) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update version_date.tsv and changelogs after v23.3.5.9-lts [#51269](https://github.com/ClickHouse/ClickHouse/pull/51269) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Better reporting of broken parts [#51270](https://github.com/ClickHouse/ClickHouse/pull/51270) ([Anton Popov](https://github.com/CurtizJ)). +* Update ext-dict-functions.md [#51283](https://github.com/ClickHouse/ClickHouse/pull/51283) ([Mike Kot](https://github.com/myrrc)). +* Disable table structure check for secondary queries from Replicated db [#51284](https://github.com/ClickHouse/ClickHouse/pull/51284) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Define Thrift version for parquet and use correct arrow version [#51285](https://github.com/ClickHouse/ClickHouse/pull/51285) ([Kruglov Pavel](https://github.com/Avogar)). +* Restore Azure build on ARM [#51288](https://github.com/ClickHouse/ClickHouse/pull/51288) ([Robert Schulze](https://github.com/rschu1ze)). +* Query Cache: Un-comment settings in server cfg [#51294](https://github.com/ClickHouse/ClickHouse/pull/51294) ([Robert Schulze](https://github.com/rschu1ze)). +* Require more checks [#51295](https://github.com/ClickHouse/ClickHouse/pull/51295) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix metadata loading test [#51297](https://github.com/ClickHouse/ClickHouse/pull/51297) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Scratch the strange Python code [#51302](https://github.com/ClickHouse/ClickHouse/pull/51302) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#47865](https://github.com/ClickHouse/ClickHouse/issues/47865) [#51306](https://github.com/ClickHouse/ClickHouse/pull/51306) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#48894](https://github.com/ClickHouse/ClickHouse/issues/48894) [#51307](https://github.com/ClickHouse/ClickHouse/pull/51307) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#48676](https://github.com/ClickHouse/ClickHouse/issues/48676) [#51308](https://github.com/ClickHouse/ClickHouse/pull/51308) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix long test `functions_bad_arguments` [#51310](https://github.com/ClickHouse/ClickHouse/pull/51310) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Unify merge predicate [#51344](https://github.com/ClickHouse/ClickHouse/pull/51344) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix using locks in ProcessList [#51348](https://github.com/ClickHouse/ClickHouse/pull/51348) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add a test for [#42631](https://github.com/ClickHouse/ClickHouse/issues/42631) [#51353](https://github.com/ClickHouse/ClickHouse/pull/51353) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix performance tests due to warnings from jemalloc about Per-CPU arena disabled [#51362](https://github.com/ClickHouse/ClickHouse/pull/51362) ([Azat Khuzhin](https://github.com/azat)). +* Fix "merge_truncate_long" test [#51369](https://github.com/ClickHouse/ClickHouse/pull/51369) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Increase timeout of Fast Test [#51372](https://github.com/ClickHouse/ClickHouse/pull/51372) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad tests for DNS [#51374](https://github.com/ClickHouse/ClickHouse/pull/51374) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Attempt to fix the `relax_too_many_parts` test [#51375](https://github.com/ClickHouse/ClickHouse/pull/51375) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix MySQL test in Debug mode [#51376](https://github.com/ClickHouse/ClickHouse/pull/51376) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad test `01018_Distributed__shard_num` [#51377](https://github.com/ClickHouse/ClickHouse/pull/51377) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix "logical error" in addressToLineWithInlines [#51379](https://github.com/ClickHouse/ClickHouse/pull/51379) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test 01280_ttl_where_group_by [#51380](https://github.com/ClickHouse/ClickHouse/pull/51380) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Attempt to fix `test_ssl_cert_authentication` [#51384](https://github.com/ClickHouse/ClickHouse/pull/51384) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Revert "Merge pull request [#50951](https://github.com/ClickHouse/ClickHouse/issues/50951) from ZhiguoZh/20230607-toyear-fix" [#51390](https://github.com/ClickHouse/ClickHouse/pull/51390) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Two tests are twice longer in average with Analyzer and sometimes failing [#51391](https://github.com/ClickHouse/ClickHouse/pull/51391) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix 00899_long_attach_memory_limit [#51395](https://github.com/ClickHouse/ClickHouse/pull/51395) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test 01293_optimize_final_force [#51396](https://github.com/ClickHouse/ClickHouse/pull/51396) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test 02481_parquet_list_monotonically_increasing_offsets [#51397](https://github.com/ClickHouse/ClickHouse/pull/51397) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test 02497_trace_events_stress_long [#51398](https://github.com/ClickHouse/ClickHouse/pull/51398) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix broken labeling for `manual approve` [#51405](https://github.com/ClickHouse/ClickHouse/pull/51405) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix parts lifetime in `MergeTreeTransaction` [#51407](https://github.com/ClickHouse/ClickHouse/pull/51407) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix flaky test test_skip_empty_files [#51409](https://github.com/ClickHouse/ClickHouse/pull/51409) ([Kruglov Pavel](https://github.com/Avogar)). +* fix flacky test test_profile_events_s3 [#51412](https://github.com/ClickHouse/ClickHouse/pull/51412) ([Sema Checherinda](https://github.com/CheSema)). +* Update README.md [#51413](https://github.com/ClickHouse/ClickHouse/pull/51413) ([Tyler Hannan](https://github.com/tylerhannan)). +* Replace try/catch logic in hasTokenOrNull() by something more lightweight [#51425](https://github.com/ClickHouse/ClickHouse/pull/51425) ([Robert Schulze](https://github.com/rschu1ze)). +* Add retries to `tlsv1_3` tests [#51434](https://github.com/ClickHouse/ClickHouse/pull/51434) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Update exception message [#51440](https://github.com/ClickHouse/ClickHouse/pull/51440) ([Kseniia Sumarokova](https://github.com/kssenii)). +* fs cache: add check for intersecting ranges [#51444](https://github.com/ClickHouse/ClickHouse/pull/51444) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Slightly better code around packets for parallel replicas [#51451](https://github.com/ClickHouse/ClickHouse/pull/51451) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Update system_warnings test [#51453](https://github.com/ClickHouse/ClickHouse/pull/51453) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Many fixes [#51455](https://github.com/ClickHouse/ClickHouse/pull/51455) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test 01605_adaptive_granularity_block_borders [#51457](https://github.com/ClickHouse/ClickHouse/pull/51457) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Try fix flaky 02497_storage_file_reader_selection [#51468](https://github.com/ClickHouse/ClickHouse/pull/51468) ([Kruglov Pavel](https://github.com/Avogar)). +* Try making Keeper in `DatabaseReplicated` tests more stable [#51473](https://github.com/ClickHouse/ClickHouse/pull/51473) ([Antonio Andelic](https://github.com/antonio2368)). +* Convert 02003_memory_limit_in_client from expect to sh test (to fix flakiness) [#51475](https://github.com/ClickHouse/ClickHouse/pull/51475) ([Azat Khuzhin](https://github.com/azat)). +* Fix test_disk_over_web_server [#51476](https://github.com/ClickHouse/ClickHouse/pull/51476) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Delay shutdown of system and temporary databases [#51479](https://github.com/ClickHouse/ClickHouse/pull/51479) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix memory leakage in CompressionCodecDeflateQpl [#51480](https://github.com/ClickHouse/ClickHouse/pull/51480) ([Vitaly Baranov](https://github.com/vitlibar)). +* Increase retries in test_multiple_disks/test.py::test_start_stop_moves [#51482](https://github.com/ClickHouse/ClickHouse/pull/51482) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix race in BoundedReadBuffer [#51484](https://github.com/ClickHouse/ClickHouse/pull/51484) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix flaky unit test [#51485](https://github.com/ClickHouse/ClickHouse/pull/51485) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix flaky test `test_host_regexp_multiple_ptr_records` [#51506](https://github.com/ClickHouse/ClickHouse/pull/51506) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add a comment [#51517](https://github.com/ClickHouse/ClickHouse/pull/51517) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Make `test_ssl_cert_authentication` similar to `test_tlvs1_3` [#51520](https://github.com/ClickHouse/ClickHouse/pull/51520) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix duplicate storage set logical error. [#51521](https://github.com/ClickHouse/ClickHouse/pull/51521) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update test_storage_postgresql/test.py::test_concurrent_queries [#51523](https://github.com/ClickHouse/ClickHouse/pull/51523) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix FATAL: query context is not detached from thread group [#51540](https://github.com/ClickHouse/ClickHouse/pull/51540) ([Igor Nikonov](https://github.com/devcrafter)). +* Update version_date.tsv and changelogs after v23.3.6.7-lts [#51548](https://github.com/ClickHouse/ClickHouse/pull/51548) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Decoupled commits from [#51180](https://github.com/ClickHouse/ClickHouse/issues/51180) for backports [#51561](https://github.com/ClickHouse/ClickHouse/pull/51561) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Try to fix deadlock in ZooKeeper client [#51563](https://github.com/ClickHouse/ClickHouse/pull/51563) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Retry chroot creation in ZK before stateless tests [#51585](https://github.com/ClickHouse/ClickHouse/pull/51585) ([Antonio Andelic](https://github.com/antonio2368)). +* use timeout instead trap in 01443_merge_truncate_long.sh [#51593](https://github.com/ClickHouse/ClickHouse/pull/51593) ([Sema Checherinda](https://github.com/CheSema)). +* Update version_date.tsv and changelogs after v23.5.4.25-stable [#51604](https://github.com/ClickHouse/ClickHouse/pull/51604) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix MergeTreeMarksLoader segfaulting if marks file is longer than expected [#51636](https://github.com/ClickHouse/ClickHouse/pull/51636) ([Michael Kolupaev](https://github.com/al13n321)). +* Update version_date.tsv and changelogs after v23.4.5.22-stable [#51638](https://github.com/ClickHouse/ClickHouse/pull/51638) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.3.7.5-lts [#51639](https://github.com/ClickHouse/ClickHouse/pull/51639) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update parts.md [#51643](https://github.com/ClickHouse/ClickHouse/pull/51643) ([Ramazan Polat](https://github.com/ramazanpolat)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 4ca5d1d7497..2a098d8c1da 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v23.6.1.1524-stable 2023-06-30 v23.5.4.25-stable 2023-06-29 v23.5.3.24-stable 2023-06-17 v23.5.2.7-stable 2023-06-10 From 34bf0284ad0b684e6ee2061bbb4d852d7a0ab79a Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 30 Jun 2023 16:18:30 +0000 Subject: [PATCH 1070/1997] Add RowBinaryWithDefaults format --- docs/en/interfaces/formats.md | 18 ++++++ .../Formats/Impl/BinaryRowInputFormat.cpp | 56 ++++++++++++++----- .../Formats/Impl/BinaryRowInputFormat.h | 4 +- src/TableFunctions/TableFunctionFormat.cpp | 13 ++++- .../02810_row_binary_with_defaults.reference | 6 ++ .../02810_row_binary_with_defaults.sql | 7 +++ 6 files changed, 89 insertions(+), 15 deletions(-) create mode 100644 tests/queries/0_stateless/02810_row_binary_with_defaults.reference create mode 100644 tests/queries/0_stateless/02810_row_binary_with_defaults.sql diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 378a1c46d93..3b7eab9a9d3 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -76,6 +76,7 @@ The supported formats are: | [RowBinary](#rowbinary) | ✔ | ✔ | | [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ | | [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | +| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | ✔ | | [Native](#native) | ✔ | ✔ | | [Null](#null) | ✗ | ✔ | | [XML](#xml) | ✗ | ✔ | @@ -1514,6 +1515,23 @@ If setting [input_format_with_types_use_header](/docs/en/operations/settings/set the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: +## RowBinaryWithDefaults {#rowbinarywithdefaults} + +Similar to [RowBinary](#rowbinary), but with an extra byte before each column that indicates if default value should be used. + +Examples: + +```sql +:) select * from format('RowBinaryWithDefaults', 'x UInt32 default 42, y UInt32', x'010001000000') + +┌──x─┬─y─┐ +│ 42 │ 1 │ +└────┴───┘ +``` + +For column `x` there is only one byte `01` that indicates that default value should be used and no other data after this byte is provided. +For column `y` data starts with byte `00` that indicates that column has actual value that should be read from the subsequent data `01000000`. + ## RowBinary format settings {#row-binary-format-settings} - [format_binary_max_string_size](/docs/en/operations/settings/settings-formats.md/#format_binary_max_string_size) - The maximum allowed size for String in RowBinary format. Default value - `1GiB`. diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index a4f779076eb..ac5da172210 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -13,7 +13,8 @@ namespace ErrorCodes extern const int CANNOT_SKIP_UNKNOWN_FIELD; } -BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) +template +BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) : RowInputFormatWithNamesAndTypes( header, in_, @@ -22,16 +23,17 @@ BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, const Block & heade with_names_, with_types_, format_settings_, - std::make_unique(in_, format_settings_)) + std::make_unique>(in_, format_settings_)) { } - -BinaryFormatReader::BinaryFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesReader(in_, format_settings_) +template +BinaryFormatReader::BinaryFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesReader(in_, format_settings_) { } -std::vector BinaryFormatReader::readHeaderRow() +template +std::vector BinaryFormatReader::readHeaderRow() { std::vector fields; String field; @@ -43,13 +45,15 @@ std::vector BinaryFormatReader::readHeaderRow() return fields; } -std::vector BinaryFormatReader::readNames() +template +std::vector BinaryFormatReader::readNames() { readVarUInt(read_columns, *in); return readHeaderRow(); } -std::vector BinaryFormatReader::readTypes() +template +std::vector BinaryFormatReader::readTypes() { auto types = readHeaderRow(); for (const auto & type_name : types) @@ -57,26 +61,40 @@ std::vector BinaryFormatReader::readTypes() return types; } -bool BinaryFormatReader::readField(IColumn & column, const DataTypePtr & /*type*/, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & /*column_name*/) +template +bool BinaryFormatReader::readField(IColumn & column, const DataTypePtr & /*type*/, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & /*column_name*/) { + if constexpr (with_defaults) + { + UInt8 is_default; + readBinary(is_default, *in); + if (is_default) + { + column.insertDefault(); + return false; + } + } serialization->deserializeBinary(column, *in, format_settings); return true; } -void BinaryFormatReader::skipHeaderRow() +template +void BinaryFormatReader::skipHeaderRow() { String tmp; for (size_t i = 0; i < read_columns; ++i) readStringBinary(tmp, *in); } -void BinaryFormatReader::skipNames() +template +void BinaryFormatReader::skipNames() { readVarUInt(read_columns, *in); skipHeaderRow(); } -void BinaryFormatReader::skipTypes() +template +void BinaryFormatReader::skipTypes() { if (read_columns == 0) { @@ -87,7 +105,8 @@ void BinaryFormatReader::skipTypes() skipHeaderRow(); } -void BinaryFormatReader::skipField(size_t file_column) +template +void BinaryFormatReader::skipField(size_t file_column) { if (file_column >= read_data_types.size()) throw Exception(ErrorCodes::CANNOT_SKIP_UNKNOWN_FIELD, @@ -111,12 +130,21 @@ void registerInputFormatRowBinary(FormatFactory & factory) const IRowInputFormat::Params & params, const FormatSettings & settings) { - return std::make_shared(buf, sample, params, with_names, with_types, settings); + return std::make_shared>(buf, sample, params, with_names, with_types, settings); }); }; registerWithNamesAndTypes("RowBinary", register_func); factory.registerFileExtension("bin", "RowBinary"); + + factory.registerInputFormat("RowBinaryWithDefaults", []( + ReadBuffer & buf, + const Block & sample, + const IRowInputFormat::Params & params, + const FormatSettings & settings) + { + return std::make_shared>(buf, sample, params, false, false, settings); + }); } void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory) @@ -125,6 +153,8 @@ void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory) { return std::make_shared(buf, settings); }); + + } diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.h b/src/Processors/Formats/Impl/BinaryRowInputFormat.h index 3d3d80f1043..6f2042d1315 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.h +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.h @@ -12,6 +12,7 @@ class ReadBuffer; /** A stream for inputting data in a binary line-by-line format. */ +template class BinaryRowInputFormat final : public RowInputFormatWithNamesAndTypes { public: @@ -25,6 +26,7 @@ public: std::string getDiagnosticInfo() override { return {}; } }; +template class BinaryFormatReader final : public FormatWithNamesAndTypesReader { public: @@ -54,7 +56,7 @@ public: BinaryWithNamesAndTypesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_); private: - BinaryFormatReader reader; + BinaryFormatReader reader; }; } diff --git a/src/TableFunctions/TableFunctionFormat.cpp b/src/TableFunctions/TableFunctionFormat.cpp index f5aff4bd098..2a46f839bbe 100644 --- a/src/TableFunctions/TableFunctionFormat.cpp +++ b/src/TableFunctions/TableFunctionFormat.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -72,7 +73,17 @@ Block TableFunctionFormat::parseData(ColumnsDescription columns, ContextPtr cont auto read_buf = std::make_unique(data); auto input_format = context->getInputFormat(format, *read_buf, block, context->getSettingsRef().max_block_size); - auto pipeline = std::make_unique(input_format); + QueryPipelineBuilder builder; + builder.init(Pipe(input_format)); + if (columns.hasDefaults()) + { + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, columns, *input_format, context); + }); + } + + auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); auto reader = std::make_unique(*pipeline); std::vector blocks; diff --git a/tests/queries/0_stateless/02810_row_binary_with_defaults.reference b/tests/queries/0_stateless/02810_row_binary_with_defaults.reference new file mode 100644 index 00000000000..5a556d1a6a9 --- /dev/null +++ b/tests/queries/0_stateless/02810_row_binary_with_defaults.reference @@ -0,0 +1,6 @@ +42 +1 +42 +1 +\N +[(42,42)] diff --git a/tests/queries/0_stateless/02810_row_binary_with_defaults.sql b/tests/queries/0_stateless/02810_row_binary_with_defaults.sql new file mode 100644 index 00000000000..73662352c0a --- /dev/null +++ b/tests/queries/0_stateless/02810_row_binary_with_defaults.sql @@ -0,0 +1,7 @@ +select * from format('RowBinaryWithDefaults', 'x UInt32 default 42', x'01'); +select * from format('RowBinaryWithDefaults', 'x UInt32 default 42', x'0001000000'); +select * from format('RowBinaryWithDefaults', 'x Nullable(UInt32) default 42', x'01'); +select * from format('RowBinaryWithDefaults', 'x Nullable(UInt32) default 42', x'000001000000'); +select * from format('RowBinaryWithDefaults', 'x Nullable(UInt32) default 42', x'0001'); +select * from format('RowBinaryWithDefaults', 'x Array(Tuple(UInt32, UInt32)) default [(42, 42)]', x'01'); + From 3f29bd1bd42f7ebff737d2196c285aa7be4c6989 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 30 Jun 2023 18:20:59 +0200 Subject: [PATCH 1071/1997] Try fix test --- tests/queries/0_stateless/02789_filesystem_cache_alignment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh b/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh index b9f9548a7b3..509d1a635b1 100755 --- a/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh +++ b/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-random-settings +# Tags: no-fasttest, no-parallel, no-random-settings, no-random-merge-tree-settings CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 32aa66922a5bd26b4ced763130a8a401516c365f Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 30 Jun 2023 18:31:46 +0200 Subject: [PATCH 1072/1997] Update 02808_custom_disk_with_user_defined_name.sh --- .../0_stateless/02808_custom_disk_with_user_defined_name.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh index 3fbcde66add..a43cd6deb9e 100755 --- a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh +++ b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh @@ -25,7 +25,7 @@ $CLICKHOUSE_CLIENT -nm --query """ DROP TABLE IF EXISTS test; CREATE TABLE test (a Int32, b String) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS disk = disk_$disk_name(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3disk); +SETTINGS disk = disk_$disk_name(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); """ $CLICKHOUSE_CLIENT -nm --query """ From 8632eab3142675b36378deb0d09bd6035436593e Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 30 Jun 2023 18:55:32 +0200 Subject: [PATCH 1073/1997] Add .reference --- .../0_stateless/02808_filesystem_cache_drop_query.reference | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference diff --git a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference new file mode 100644 index 00000000000..d80fc78e03d --- /dev/null +++ b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference @@ -0,0 +1,4 @@ +1 +0 +1 +0 From 50449cc68d03f213c6b128ed51416d1de21ad1cd Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 30 Jun 2023 19:07:32 +0200 Subject: [PATCH 1074/1997] fix write to finalized buffer --- src/Core/Settings.h | 2 +- src/Server/HTTPHandler.cpp | 7 +++---- .../0_stateless/00429_long_http_bufferization.sh | 13 ++++++++++++- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 658f3c8025b..288413857d4 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -274,7 +274,7 @@ class IColumn; \ M(UInt64, http_headers_progress_interval_ms, 100, "Do not send HTTP headers X-ClickHouse-Progress more frequently than at each specified interval.", 0) \ M(Bool, http_wait_end_of_query, false, "Enable HTTP response buffering on the server-side.", 0) \ - M(UInt64, http_response_buffer_size, false, "The number of bytes to buffer in the server memory before sending a HTTP response to the client or flushing to disk (when http_wait_end_of_query is enabled).", 0) \ + M(UInt64, http_response_buffer_size, 0, "The number of bytes to buffer in the server memory before sending a HTTP response to the client or flushing to disk (when http_wait_end_of_query is enabled).", 0) \ \ M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \ \ diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index fe98ae5f69e..a391e3bb2e4 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -902,10 +902,9 @@ try /// Destroy CascadeBuffer to actualize buffers' positions and reset extra references if (used_output.hasDelayed()) { - if (used_output.out_maybe_delayed_and_compressed) - { - used_output.out_maybe_delayed_and_compressed->finalize(); - } + /// do not call finalize here for CascadeWriteBuffer used_output.out_maybe_delayed_and_compressed, + /// exception is written into used_output.out_maybe_compressed later + /// HTTPHandler::trySendExceptionToClient is called with exception context, it is Ok to destroy buffers used_output.out_maybe_delayed_and_compressed.reset(); } diff --git a/tests/queries/0_stateless/00429_long_http_bufferization.sh b/tests/queries/0_stateless/00429_long_http_bufferization.sh index 34d07cef7e3..55192422389 100755 --- a/tests/queries/0_stateless/00429_long_http_bufferization.sh +++ b/tests/queries/0_stateless/00429_long_http_bufferization.sh @@ -7,9 +7,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +format="RowBinary" + function query { # bash isn't able to store \0 bytes, so use [1; 255] random range - echo "SELECT greatest(toUInt8(1), toUInt8(intHash64(number))) FROM system.numbers LIMIT $1 FORMAT RowBinary" + echo "SELECT greatest(toUInt8(1), toUInt8(intHash64(number))) FROM system.numbers LIMIT $1 FORMAT $format" } function ch_url() { @@ -42,6 +44,14 @@ function check_last_line_exception() { } function check_exception_handling() { + # it is impossible to override max_block_size, details here https://github.com/ClickHouse/ClickHouse/issues/51694 + # rebuild CLICKHOUSE_URL for one call in order to avoid using random parameters from CLICKHOUSE_URL_PARAMS + CLICKHOUSE_URL="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/?wait_end_of_query=0" \ + max_block_size=30000 \ + format=TSV \ + check_last_line_exception \ + "max_result_rows=400000&buffer_size=1048577&wait_end_of_query=0" 111222333444 + check_only_exception "max_result_bytes=1000" 1001 check_only_exception "max_result_bytes=1000&wait_end_of_query=1" 1001 @@ -60,6 +70,7 @@ check_exception_handling # Tune setting to speed up combinatorial test +# max_block_size has no effect here, that value has been set inside CLICKHOUSE_URL max_block_size=500000 corner_sizes="1048576 $(seq 500000 1000000 3500000)" From fe17a9b589a5bae6e2608fe101558fdcbd761107 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 30 Jun 2023 19:36:32 +0200 Subject: [PATCH 1075/1997] do not log aborted for pending mutate/merge when shutdown --- src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp index b569fa73a73..d4f8d1140a2 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp @@ -182,7 +182,6 @@ void MergeTreeBackgroundExecutor::removeTasksCorrespondingToStorage(Stora } catch (...) { - printExceptionWithRespectToAbort(log); pending.remove(id); } From 4511213a1f61ae7d67c8c44b6bab0ff3fe6ebaaa Mon Sep 17 00:00:00 2001 From: Julio Jimenez Date: Fri, 30 Jun 2023 13:52:22 -0400 Subject: [PATCH 1076/1997] job successful Signed-off-by: Julio Jimenez --- .github/workflows/nightly.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 3f6d9b86fd6..cf61012f2bc 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -121,8 +121,6 @@ jobs: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" SonarCloud: - # TODO: Remove if: whenever SonarCloud supports c++23 - # if: ${{ false }} runs-on: [self-hosted, builder] env: SONAR_SCANNER_VERSION: 4.8.0.2856 From 9a35921d005be1e7b34493d34429fb9dbf306ef7 Mon Sep 17 00:00:00 2001 From: Manas Alekar Date: Fri, 30 Jun 2023 13:16:02 -0700 Subject: [PATCH 1077/1997] Add tests. --- tests/queries/0_stateless/00415_into_outfile.reference | 4 ++++ tests/queries/0_stateless/00415_into_outfile.sh | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/tests/queries/0_stateless/00415_into_outfile.reference b/tests/queries/0_stateless/00415_into_outfile.reference index a609e77a50a..4576a2d9d60 100644 --- a/tests/queries/0_stateless/00415_into_outfile.reference +++ b/tests/queries/0_stateless/00415_into_outfile.reference @@ -1,5 +1,9 @@ performing test: select 1 2 3 +performing test: select_with_append +1 2 3 +performing test: select_with_truncate +1 2 3 performing test: union_all 1 2 3 4 diff --git a/tests/queries/0_stateless/00415_into_outfile.sh b/tests/queries/0_stateless/00415_into_outfile.sh index 77dc96a48e6..d360a29fa5a 100755 --- a/tests/queries/0_stateless/00415_into_outfile.sh +++ b/tests/queries/0_stateless/00415_into_outfile.sh @@ -21,6 +21,10 @@ function perform() perform "select" "SELECT 1, 2, 3 INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_select.out'" +perform "select_with_append" "SELECT 1, 2, 3 INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_select_with_append.out' APPEND" + +perform "select_with_truncate" "SELECT 1, 2, 3 INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_select_with_truncate.out' TRUNCATE" + perform "union_all" "SELECT 1, 2 UNION ALL SELECT 3, 4 INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_union_all.out' FORMAT TSV" | sort --numeric-sort perform "bad_union_all" "SELECT 1, 2 INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_bad_union_all.out' UNION ALL SELECT 3, 4" From 5d652c4fd5befcaabfdf0fd6c7f1beec160708b3 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 30 Jun 2023 20:30:50 +0000 Subject: [PATCH 1078/1997] Finalize ColumnObject with only default values --- src/Columns/ColumnObject.cpp | 14 ++------------ src/Processors/QueryPlan/AggregatingStep.cpp | 2 ++ ...02789_object_type_invalid_num_of_rows.reference | 1 + 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 4fa0c3ee41a..07872774559 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -626,9 +626,6 @@ ColumnObject::ColumnObject(Subcolumns && subcolumns_, bool is_nullable_) void ColumnObject::checkConsistency() const { - if (num_rows && subcolumns.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnObject is inconsistent: it has no subcolumns, but has {} rows", num_rows); - if (subcolumns.empty()) return; @@ -645,9 +642,9 @@ void ColumnObject::checkConsistency() const size_t ColumnObject::size() const { -// #ifndef NDEBUG +#ifndef NDEBUG checkConsistency(); -// #endif +#endif return num_rows; } @@ -714,8 +711,6 @@ void ColumnObject::insert(const Field & field) } ++num_rows; - - checkConsistency(); } void ColumnObject::insertDefault() @@ -724,8 +719,6 @@ void ColumnObject::insertDefault() entry->data.insertDefault(); ++num_rows; - - checkConsistency(); } Field ColumnObject::operator[](size_t n) const @@ -783,8 +776,6 @@ void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t len num_rows += length; finalize(); - - checkConsistency(); } void ColumnObject::popBack(size_t length) @@ -793,7 +784,6 @@ void ColumnObject::popBack(size_t length) entry->data.popBack(length); num_rows -= length; - checkConsistency(); } template diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 4ac972e2a79..eebbfc04304 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -319,6 +319,8 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B { auto column_with_default = col.column->cloneEmpty(); col.type->insertDefaultInto(*column_with_default); + column_with_default->finalize(); + auto column = ColumnConst::create(std::move(column_with_default), 0); const auto * node = &dag->addColumn({ColumnPtr(std::move(column)), col.type, col.name}); node = &dag->materializeNode(*node); diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference index e69de29bb2d..7dec35f7acb 100644 --- a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference @@ -0,0 +1 @@ +0.02 From 5e1cfb05318b8d3d511208146db020ae560e2ff7 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Sat, 1 Jul 2023 08:53:23 +0000 Subject: [PATCH 1079/1997] Avoid copying block if optimize_on_insert is false --- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index a13a96ac65e..adb162b9fe7 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -452,7 +452,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) if constexpr (async_insert) { /// we copy everything but offsets which we move because they are only used by async insert - if (storage.writer.getMergingMode() != MergeTreeData::MergingParams::Mode::Ordinary) + if (settings.optimize_on_insert && storage.writer.getMergingMode() != MergeTreeData::MergingParams::Mode::Ordinary) unmerged_block.emplace(Block(current_block.block), Row(current_block.partition), std::move(current_block.offsets)); } From 4f10a65a128160f4c561de0913a2ed8a7b8be284 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 1 Jul 2023 15:09:23 +0200 Subject: [PATCH 1080/1997] Fix --- src/Interpreters/Cache/FileCache.cpp | 7 ++++--- src/Interpreters/Cache/Metadata.cpp | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 58690ac4cb5..7d73a0824b8 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -48,6 +48,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } FileCache::FileCache(const FileCacheSettings & settings) @@ -811,9 +812,9 @@ void FileCache::removeKey(const Key & key) { assertInitialized(); - auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW); + auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::RETURN_NULL); if (!locked_key) - return; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}`", key); locked_key->removeAllReleasable(); } @@ -839,7 +840,7 @@ void FileCache::removeFileSegment(const Key & key, size_t offset) auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::RETURN_NULL); if (!locked_key) - return; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}`", key); locked_key->removeFileSegment(offset); } diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 30b2b1aafb9..e453ff06884 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -19,6 +19,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } FileSegmentMetadata::FileSegmentMetadata(FileSegmentPtr && file_segment_) @@ -385,7 +386,7 @@ KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset) { auto it = key_metadata->find(offset); if (it == key_metadata->end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no offset {}", offset); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no offset {}", offset); auto file_segment = it->second->file_segment; return removeFileSegmentImpl(it, file_segment->lock()); From a830ba399bd6979ab93646cd4fba6c7daf060b12 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Thu, 29 Jun 2023 16:20:54 +0200 Subject: [PATCH 1081/1997] Repro test --- .../02809_prewhere_and_in.reference | 8 +++ .../0_stateless/02809_prewhere_and_in.sql | 56 +++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 tests/queries/0_stateless/02809_prewhere_and_in.reference create mode 100644 tests/queries/0_stateless/02809_prewhere_and_in.sql diff --git a/tests/queries/0_stateless/02809_prewhere_and_in.reference b/tests/queries/0_stateless/02809_prewhere_and_in.reference new file mode 100644 index 00000000000..fb713be9991 --- /dev/null +++ b/tests/queries/0_stateless/02809_prewhere_and_in.reference @@ -0,0 +1,8 @@ +PREWHERE a IN (( +PREWHERE a IN (1, 2, 3) +PREWHERE a IN (t_02809_set) +PREWHERE a IN (t_02809_aux) +PREWHERE b NOT IN (( +PREWHERE b NOT IN (1, 2, 3) +PREWHERE b NOT IN (t_02809_set) +PREWHERE b NOT IN (t_02809_aux) diff --git a/tests/queries/0_stateless/02809_prewhere_and_in.sql b/tests/queries/0_stateless/02809_prewhere_and_in.sql new file mode 100644 index 00000000000..d2816cd5b52 --- /dev/null +++ b/tests/queries/0_stateless/02809_prewhere_and_in.sql @@ -0,0 +1,56 @@ +DROP TABLE IF EXISTS t_02809; + +CREATE TABLE t_02809(a Int64, b Int64, s String) +ENGINE=MergeTree order by tuple() +AS SELECT number, number%10, toString(arrayMap(i-> cityHash64(i*number), range(50))) FROM numbers(10000); + +CREATE TABLE t_02809_set(c Int64) +ENGINE=Set() +AS SELECT * FROM numbers(10); + +CREATE TABLE t_02809_aux(c Int64) +ENGINE=Memory() +AS SELECT * FROM numbers(10); + + +SET optimize_move_to_prewhere=1; + +-- Queries with 'IN' +SELECT * FROM (EXPLAIN SYNTAX + SELECT * FROM t_02809 WHERE a IN (SELECT * FROM system.one) +) WHERE explain LIKE '%WHERE%'; + +SELECT * FROM (EXPLAIN SYNTAX + SELECT * FROM t_02809 WHERE a IN (1,2,3) +) WHERE explain LIKE '%WHERE%'; + +SELECT * FROM (EXPLAIN SYNTAX + SELECT * FROM t_02809 WHERE a IN t_02809_set +) WHERE explain LIKE '%WHERE%'; + +SELECT * FROM (EXPLAIN SYNTAX + SELECT * FROM t_02809 WHERE a IN t_02809_aux +) WHERE explain LIKE '%WHERE%'; + + +-- Queries with 'NOT IN' +SELECT * FROM (EXPLAIN SYNTAX + SELECT * FROM t_02809 WHERE b NOT IN (SELECT * FROM system.one) +) WHERE explain LIKE '%WHERE%'; + +SELECT * FROM (EXPLAIN SYNTAX + SELECT * FROM t_02809 WHERE b NOT IN (1,2,3) +) WHERE explain LIKE '%WHERE%'; + +SELECT * FROM (EXPLAIN SYNTAX + SELECT * FROM t_02809 WHERE b NOT IN t_02809_set +) WHERE explain LIKE '%WHERE%'; + +SELECT * FROM (EXPLAIN SYNTAX + SELECT * FROM t_02809 WHERE b NOT IN t_02809_aux +) WHERE explain LIKE '%WHERE%'; + + +DROP TABLE t_02809; +DROP TABLE t_02809_set; +DROP TABLE t_02809_aux; From 05d399c0b34cd7a2e13cabbc1fd396f2b251fedb Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Thu, 29 Jun 2023 16:25:52 +0200 Subject: [PATCH 1082/1997] Handle subqueries and sets when collecting table columns --- .../MergeTree/MergeTreeWhereOptimizer.cpp | 3 +++ src/Storages/MergeTree/RPNBuilder.cpp | 15 +++++++++++++++ src/Storages/MergeTree/RPNBuilder.h | 2 ++ 3 files changed, 20 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 25a4579c73e..1620ba98d58 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -110,6 +110,9 @@ static void collectColumns(const RPNBuilderTreeNode & node, const NameSet & colu if (node.isConstant()) return; + if (node.isSubqueryOrSet()) + return; + if (!node.isFunction()) { auto column_name = node.getColumnName(); diff --git a/src/Storages/MergeTree/RPNBuilder.cpp b/src/Storages/MergeTree/RPNBuilder.cpp index 0b42111be45..a0c96c13d59 100644 --- a/src/Storages/MergeTree/RPNBuilder.cpp +++ b/src/Storages/MergeTree/RPNBuilder.cpp @@ -181,6 +181,21 @@ bool RPNBuilderTreeNode::isConstant() const } } +bool RPNBuilderTreeNode::isSubqueryOrSet() const +{ + if (ast_node) + { + return + typeid_cast(ast_node) || + typeid_cast(ast_node); + } + else + { + const auto * node_without_alias = getNodeWithoutAlias(dag_node); + return node_without_alias->result_type->getTypeId() == TypeIndex::Set; + } +} + ColumnWithTypeAndName RPNBuilderTreeNode::getConstantColumn() const { if (!isConstant()) diff --git a/src/Storages/MergeTree/RPNBuilder.h b/src/Storages/MergeTree/RPNBuilder.h index 6f624d93cd6..9eeb6deefd5 100644 --- a/src/Storages/MergeTree/RPNBuilder.h +++ b/src/Storages/MergeTree/RPNBuilder.h @@ -98,6 +98,8 @@ public: /// Is node constant bool isConstant() const; + bool isSubqueryOrSet() const; + /** Get constant as constant column. * Node must be constant before calling these method, otherwise logical exception is thrown. */ From f36f981f15f0a7aed86762c32ee706ccaa415df9 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 30 Jun 2023 18:22:47 +0200 Subject: [PATCH 1083/1997] Move adding CreateSets to the end of optimizations, after applying key conditions and indexes --- .../QueryPlan/Optimizations/Optimizations.h | 3 ++ .../optimizePrimaryKeyCondition.cpp | 2 -- .../QueryPlan/Optimizations/optimizeTree.cpp | 31 ++++++++++++++++++- src/Processors/QueryPlan/QueryPlan.cpp | 1 + .../QueryPlan/ReadFromMergeTree.cpp | 9 ++---- src/Processors/QueryPlan/ReadFromMergeTree.h | 2 +- .../QueryPlan/SourceStepWithFilter.h | 3 +- .../System/StorageSystemZooKeeper.cpp | 4 +-- 8 files changed, 42 insertions(+), 13 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index 2b934ec440b..bc47413cbb5 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -14,6 +14,9 @@ namespace QueryPlanOptimizations void optimizeTreeFirstPass(const QueryPlanOptimizationSettings & settings, QueryPlan::Node & root, QueryPlan::Nodes & nodes); /// Second pass is used to apply read-in-order and attach a predicate to PK. void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_settings, QueryPlan::Node & root, QueryPlan::Nodes & nodes); +/// Third pass is used to apply filters such as key conditions and skip indexes to the storages that support them. +/// After that it add CreateSetsStep for the subqueries that has not be used in the filters. +void optimizeTreeThirdPass(QueryPlan::Node & root, QueryPlan::Nodes & nodes); /// Optimization (first pass) is a function applied to QueryPlan::Node. /// It can read and update subtree of specified node. diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp index 5ef786ff975..e98386a6ee9 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp @@ -28,8 +28,6 @@ void optimizePrimaryKeyCondition(const Stack & stack) else break; } - - source_step_with_filter->onAddFilterFinish(); } } diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index d620170e29c..e788918703e 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -167,7 +167,6 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s optimizePrewhere(stack, nodes); optimizePrimaryKeyCondition(stack); enableMemoryBoundMerging(*stack.back().node, nodes); - addPlansForSets(*stack.back().node, nodes); stack.pop_back(); } @@ -178,5 +177,35 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s "No projection is used when optimize_use_projections = 1 and force_optimize_projection = 1"); } +void optimizeTreeThirdPass(QueryPlan::Node & root, QueryPlan::Nodes & nodes) +{ + Stack stack; + stack.push_back({.node = &root}); + + while (!stack.empty()) + { + /// NOTE: frame cannot be safely used after stack was modified. + auto & frame = stack.back(); + + /// Traverse all children first. + if (frame.next_child < frame.node->children.size()) + { + auto next_frame = Frame{.node = frame.node->children[frame.next_child]}; + ++frame.next_child; + stack.push_back(next_frame); + continue; + } + + if (auto * source_step_with_filter = dynamic_cast(frame.node->step.get())) + { + source_step_with_filter->applyFilters(); + } + + addPlansForSets(*frame.node, nodes); + + stack.pop_back(); + } +} + } } diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index cb732e58855..687260441ff 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -482,6 +482,7 @@ void QueryPlan::optimize(const QueryPlanOptimizationSettings & optimization_sett QueryPlanOptimizations::optimizeTreeFirstPass(optimization_settings, *root, nodes); QueryPlanOptimizations::optimizeTreeSecondPass(optimization_settings, *root, nodes); + QueryPlanOptimizations::optimizeTreeThirdPass(*root, nodes); updateDataStreams(*root); } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 1e21d13e2b1..e7bf1ce2edf 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1299,13 +1299,10 @@ static void buildIndexes( indexes->skip_indexes = std::move(skip_indexes); } -void ReadFromMergeTree::onAddFilterFinish() +void ReadFromMergeTree::applyFilters() { - if (!filter_nodes.nodes.empty()) - { - auto filter_actions_dag = buildFilterDAG(context, prewhere_info, filter_nodes, query_info); - buildIndexes(indexes, filter_actions_dag, data, context, query_info, metadata_for_reading); - } + auto filter_actions_dag = buildFilterDAG(context, prewhere_info, filter_nodes, query_info); + buildIndexes(indexes, filter_actions_dag, data, context, query_info, metadata_for_reading); } MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 06021cb1c5b..3e3edd4dc5c 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -226,7 +226,7 @@ public: size_t getNumStreams() const { return requested_num_streams; } bool isParallelReadingEnabled() const { return read_task_callback != std::nullopt; } - void onAddFilterFinish() override; + void applyFilters() override; private: static MergeTreeDataSelectAnalysisResultPtr selectRangesToReadImpl( diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.h b/src/Processors/QueryPlan/SourceStepWithFilter.h index 34b6e3c6a7b..dbaff06fde6 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.h +++ b/src/Processors/QueryPlan/SourceStepWithFilter.h @@ -37,7 +37,8 @@ public: filter_dags.push_back(std::move(filter_dag)); } - virtual void onAddFilterFinish() {} + /// Apply filters that can optimize reading from storage. + virtual void applyFilters() {} protected: std::vector filter_dags; diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index d361a4173c1..4d7f59b8ccd 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -184,7 +184,7 @@ public: void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) override; - void onAddFilterFinish() override; + void applyFilters() override; private: void fillData(MutableColumns & res_columns); @@ -421,7 +421,7 @@ static Paths extractPath(const ActionsDAG::NodeRawConstPtrs & filter_nodes, Cont } -void ReadFromSystemZooKeeper::onAddFilterFinish() +void ReadFromSystemZooKeeper::applyFilters() { paths = extractPath(getFilterNodes().nodes, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper); } From 9f53c48636f511d00187e1c80547b0ff236bcd39 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Sat, 1 Jul 2023 09:19:54 +0200 Subject: [PATCH 1084/1997] Fix test with analyzer --- .../0_stateless/02809_prewhere_and_in.reference | 16 ++++++++-------- .../0_stateless/02809_prewhere_and_in.sql | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/queries/0_stateless/02809_prewhere_and_in.reference b/tests/queries/0_stateless/02809_prewhere_and_in.reference index fb713be9991..3080ae862bb 100644 --- a/tests/queries/0_stateless/02809_prewhere_and_in.reference +++ b/tests/queries/0_stateless/02809_prewhere_and_in.reference @@ -1,8 +1,8 @@ -PREWHERE a IN (( -PREWHERE a IN (1, 2, 3) -PREWHERE a IN (t_02809_set) -PREWHERE a IN (t_02809_aux) -PREWHERE b NOT IN (( -PREWHERE b NOT IN (1, 2, 3) -PREWHERE b NOT IN (t_02809_set) -PREWHERE b NOT IN (t_02809_aux) +PREWHERE a IN +PREWHERE a IN +PREWHERE a IN +PREWHERE a IN +PREWHERE b NOT IN +PREWHERE b NOT IN +PREWHERE b NOT IN +PREWHERE b NOT IN diff --git a/tests/queries/0_stateless/02809_prewhere_and_in.sql b/tests/queries/0_stateless/02809_prewhere_and_in.sql index d2816cd5b52..345577d6c7c 100644 --- a/tests/queries/0_stateless/02809_prewhere_and_in.sql +++ b/tests/queries/0_stateless/02809_prewhere_and_in.sql @@ -16,37 +16,37 @@ AS SELECT * FROM numbers(10); SET optimize_move_to_prewhere=1; -- Queries with 'IN' -SELECT * FROM (EXPLAIN SYNTAX +SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX SELECT * FROM t_02809 WHERE a IN (SELECT * FROM system.one) ) WHERE explain LIKE '%WHERE%'; -SELECT * FROM (EXPLAIN SYNTAX +SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX SELECT * FROM t_02809 WHERE a IN (1,2,3) ) WHERE explain LIKE '%WHERE%'; -SELECT * FROM (EXPLAIN SYNTAX +SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX SELECT * FROM t_02809 WHERE a IN t_02809_set ) WHERE explain LIKE '%WHERE%'; -SELECT * FROM (EXPLAIN SYNTAX +SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX SELECT * FROM t_02809 WHERE a IN t_02809_aux ) WHERE explain LIKE '%WHERE%'; -- Queries with 'NOT IN' -SELECT * FROM (EXPLAIN SYNTAX +SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX SELECT * FROM t_02809 WHERE b NOT IN (SELECT * FROM system.one) ) WHERE explain LIKE '%WHERE%'; -SELECT * FROM (EXPLAIN SYNTAX +SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX SELECT * FROM t_02809 WHERE b NOT IN (1,2,3) ) WHERE explain LIKE '%WHERE%'; -SELECT * FROM (EXPLAIN SYNTAX +SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX SELECT * FROM t_02809 WHERE b NOT IN t_02809_set ) WHERE explain LIKE '%WHERE%'; -SELECT * FROM (EXPLAIN SYNTAX +SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX SELECT * FROM t_02809 WHERE b NOT IN t_02809_aux ) WHERE explain LIKE '%WHERE%'; From 8c0463fdd4bb99f707bcbb7b61b86ab8984ec6b7 Mon Sep 17 00:00:00 2001 From: FFFFFFFHHHHHHH <916677625@qq.com> Date: Sun, 2 Jul 2023 16:14:08 +0800 Subject: [PATCH 1085/1997] fix --- src/Functions/array/arrayJaccardIndex.cpp | 80 ++++++++++------------- 1 file changed, 35 insertions(+), 45 deletions(-) diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp index c1ec8b53d25..211680092b3 100644 --- a/src/Functions/array/arrayJaccardIndex.cpp +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -34,85 +34,75 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - DataTypes types; - for (size_t i = 0; i < 2; ++i) - { - const auto * array_type = checkAndGetDataType(arguments[i].get()); - if (!array_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be array, but it has type{}.", i + 1, getName(), arguments[i]->getName()); - } + FunctionArgumentDescriptors args{ + {"array_1", [](const IDataType & type) { return isArray(type.getPtr()); }, nullptr, "Array"}, + {"array_2", [](const IDataType & type) { return isArray(type.getPtr()); }, nullptr, "Array"}, + }; + validateFunctionArgumentTypes(*this, arguments, args); return std::make_shared>(); } template - static void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + static inline void getArraySize(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, size_t & left_size, size_t & right_size, const size_t & i) + { + if constexpr (is_const_left) + left_size = left_offsets[0]; + else + left_size = left_offsets[i] - left_offsets[i - 1]; + if constexpr (is_const_right) + right_size = right_offsets[0]; + else + right_size = right_offsets[i] - right_offsets[i - 1]; + } + + template + static inline void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) { size_t left_size; size_t right_size; for (size_t i = 0; i < res.size(); ++i) { - if constexpr (is_const_left) - left_size = left_offsets[0]; - else - left_size = left_offsets[i] - left_offsets[i - 1]; - if constexpr (is_const_right) - right_size = right_offsets[0]; - else - right_size = right_offsets[i] - right_offsets[i - 1]; - + getArraySize(left_offsets, right_offsets, left_size, right_size, i); size_t intersect_size = intersect_offsets[i] - intersect_offsets[i - 1]; res[i] = static_cast(intersect_size) / (left_size + right_size - intersect_size); - if (unlikely(isnan(res[i]))) - res[i] = 1; } } - template - static void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + template + static inline void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) { size_t left_size; size_t right_size; for (size_t i = 0; i < res.size(); ++i) { - if constexpr (is_const_left) - left_size = left_offsets[0]; - else - left_size = left_offsets[i] - left_offsets[i - 1]; - if constexpr (is_const_right) - right_size = right_offsets[0]; - else - right_size = right_offsets[i] - right_offsets[i - 1]; - - res[i] = static_cast(left_size + right_size == 0); + getArraySize(left_offsets, right_offsets, left_size, right_size, i); + if (unlikely(!left_size && !right_size)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "array aggregate functions cannot be performed on two empty arrays"); + res[i] = 0; } } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - bool is_const_left; - bool is_const_right; - const ColumnArray * left_array; - const ColumnArray * right_array; - - auto cast_array = [&](const ColumnWithTypeAndName & col) + auto cast_array = [&](const ColumnWithTypeAndName & col) -> std::pair { const ColumnArray * res; bool is_const = false; - if (typeid_cast(col.column.get())) + if (const ColumnConst * col_const = typeid_cast(col.column.get())) { - res = checkAndGetColumn(checkAndGetColumnConst(col.column.get())->getDataColumnPtr().get()); + res = checkAndGetColumn(col_const->getDataColumnPtr().get()); is_const = true; } else if (!(res = checkAndGetColumn(col.column.get()))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument for function {} must be array but it has type {}.", col.column->getName(), getName()); - return std::make_pair(res, is_const); + return {res, is_const}; }; - std::tie(left_array, is_const_left) = cast_array(arguments[0]); - std::tie(right_array, is_const_right) = cast_array(arguments[1]); + const auto & [left_array, is_const_left] = cast_array(arguments[0]); + const auto & [right_array, is_const_right] = cast_array(arguments[1]); auto intersect_array = FunctionFactory::instance().get("arrayIntersect", context)->build(arguments); ColumnWithTypeAndName intersect_column; @@ -131,8 +121,8 @@ public: vectorWithEmptyIntersect(left_array->getOffsets(), right_array->getOffsets(), vec_res); \ else \ { \ - const ColumnArray * col_array = checkAndGetColumn(intersect_column.column.get()); \ - vector(col_array->getOffsets(), left_array->getOffsets(), right_array->getOffsets(), vec_res); \ + const ColumnArray * intersect_column_array = checkAndGetColumn(intersect_column.column.get()); \ + vector(intersect_column_array->getOffsets(), left_array->getOffsets(), right_array->getOffsets(), vec_res); \ } if (!is_const_left && !is_const_right) From 309fbc45a23a5132be56e6b71c87955b05d0db7c Mon Sep 17 00:00:00 2001 From: FFFFFFFHHHHHHH <916677625@qq.com> Date: Sun, 2 Jul 2023 16:14:53 +0800 Subject: [PATCH 1086/1997] fix test --- .../02737_arrayJaccardIndex.reference | 52 +++++++------------ .../0_stateless/02737_arrayJaccardIndex.sql | 33 +++++++----- ...2737_arrayJaccardIndex_exception.reference | 2 + .../02737_arrayJaccardIndex_exception.sh | 14 +++++ 4 files changed, 55 insertions(+), 46 deletions(-) create mode 100644 tests/queries/0_stateless/02737_arrayJaccardIndex_exception.reference create mode 100644 tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex.reference b/tests/queries/0_stateless/02737_arrayJaccardIndex.reference index e6934bfe092..0b7969889c0 100644 --- a/tests/queries/0_stateless/02737_arrayJaccardIndex.reference +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex.reference @@ -1,32 +1,20 @@ -0 -0.5 -1 -0.67 -1 -0 -0 -0 -1 -0 -0 -0 -0 -0.5 -1 -0.67 -0.5 -0.5 -0.5 -0.5 -1 -1 -1 -1 -1 -1 -1 -1 -0.33 -0.2 -1 -1 +[1] [1,2] 0.5 +[1,2] [1,2] 1 +[1,2,3] [1,2] 0.67 +[1] [] 0 +[1,2] [] 0 +[1,2,3] [] 0 +[] [1] 0 +[] [1,2] 0 +[] [1,2,3] 0 +[1,2] [1] 0.5 +[1,2] [1,2] 1 +[1,2] [1,2,3] 0.67 +[1] [1] 1 +[1,2] [1,2] 1 +[1,2,3] [1,2,3] 1 +['a'] ['a','aa','aaa'] 0.33 +[1,1.1,2.2] [2.2,3.3,444] 0.2 +[1] [1] 1 +[1,2] [1,2,3,4] 0.5 +[[1,2],[3,4]] [[1,2],[3,5]] 0.33 diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex.sql b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql index c3f04ba0b10..000106e93b7 100644 --- a/tests/queries/0_stateless/02737_arrayJaccardIndex.sql +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql @@ -1,26 +1,31 @@ drop table if exists array_jaccard_index; -create table array_jaccard_index (arr Array(UInt8)) engine=MergeTree partition by arr order by arr; +create table array_jaccard_index (arr Array(UInt8)) engine = MergeTree order by arr; insert into array_jaccard_index values ([1,2,3]); + insert into array_jaccard_index values ([1,2]); + insert into array_jaccard_index values ([1]); -insert into array_jaccard_index values ([]); -select round(arrayJaccardIndex(arr, [1,2]), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex(arr, []), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex([], arr), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex([1,2], arr), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex([1,2], [1,2,3,4]), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex([], []), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex(arr, arr), 2) from array_jaccard_index order by arr; +select arr as arr_1, [1,2] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; -drop table if exists array_jaccard_index; +select arr as arr_1, [] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex(['a'], ['a', 'aa', 'aaa']), 2); +select [] as arr_1, arr as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex([1, 1.1, 2.2], [2.2, 3.3, 444]), 2); +select [1,2] as arr_1, arr as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex([], []), 2); +select arr as arr_1, arr as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex([toUInt16(1)], [toUInt32(1)]), 2); +drop table array_jaccard_index; + +select ['a'] as arr_1, ['a', 'aa', 'aaa'] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); + +select [1, 1.1, 2.2] as arr_1, [2.2, 3.3, 444] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); + +select [toUInt16(1)] as arr_1, [toUInt32(1)] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); + +select [1,2] as arr_1, [1,2,3,4] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); + +select [[1,2], [3,4]] as arr_1, [[1,2], [3,5]] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.reference b/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.reference new file mode 100644 index 00000000000..307d9a195b0 --- /dev/null +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.reference @@ -0,0 +1,2 @@ +Code: 43 +Code: 386 diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh b/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh new file mode 100644 index 00000000000..c36700c6e0f --- /dev/null +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo ${CLICKHOUSE_CLIENT} + +# Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: array aggregate functions cannot be performed on two empty arrays: While processing arrayJaccardIndex([], []). (ILLEGAL_TYPE_OF_ARGUMENT) +$CLICKHOUSE_CLIENT -q "SELECT arrayJaccardIndex([], [])" |& grep -o "Code: 43" + + +# Code: 386. DB::Exception: Received from localhost:9000. DB::Exception: There is no subtype for types UInt8, String because some of them are String/FixedString and some of them are not: While processing [1, 2] AS arr_1, ['1', '2'] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2). (NO_COMMON_TYPE) +$CLICKHOUSE_CLIENT -q "select [1,2] as arr_1, ['1','2'] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2)" |& grep -o "Code: 386" From 85053ef008295ec48e66a42a4d1dafa41ff22e6c Mon Sep 17 00:00:00 2001 From: FFFFFFFHHHHHHH <916677625@qq.com> Date: Sun, 2 Jul 2023 16:39:55 +0800 Subject: [PATCH 1087/1997] fix permission --- tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh b/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh old mode 100644 new mode 100755 From 2c09ea04048d664fad9c70de49c0ceff10c9ec22 Mon Sep 17 00:00:00 2001 From: FFFFFFFHHHHHHH <916677625@qq.com> Date: Sun, 2 Jul 2023 17:09:52 +0800 Subject: [PATCH 1088/1997] add doc --- .../sql-reference/functions/array-functions.md | 18 ++++++++++++++++++ .../02737_arrayJaccardIndex_exception.sh | 3 --- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 7f2b8f3c605..921e9765080 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -996,6 +996,24 @@ SELECT └──────────────┴───────────┘ ``` +## arrayJaccardIndex + +Returns the jaccard similarity between two arrays. + +**Example** + +Query: +``` sql +SELECT arrayJaccardIndex([1, 2], [2, 3]) AS res +``` + +Result: +``` text +┌─res────────────────┐ +│ 0.3333333333333333 │ +└────────────────────┘ +``` + ## arrayReduce Applies an aggregate function to array elements and returns its result. The name of the aggregation function is passed as a string in single quotes `'max'`, `'sum'`. When using parametric aggregate functions, the parameter is indicated after the function name in parentheses `'uniqUpTo(6)'`. diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh b/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh index c36700c6e0f..49e80e06cba 100755 --- a/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh @@ -4,11 +4,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -echo ${CLICKHOUSE_CLIENT} - # Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: array aggregate functions cannot be performed on two empty arrays: While processing arrayJaccardIndex([], []). (ILLEGAL_TYPE_OF_ARGUMENT) $CLICKHOUSE_CLIENT -q "SELECT arrayJaccardIndex([], [])" |& grep -o "Code: 43" - # Code: 386. DB::Exception: Received from localhost:9000. DB::Exception: There is no subtype for types UInt8, String because some of them are String/FixedString and some of them are not: While processing [1, 2] AS arr_1, ['1', '2'] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2). (NO_COMMON_TYPE) $CLICKHOUSE_CLIENT -q "select [1,2] as arr_1, ['1','2'] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2)" |& grep -o "Code: 386" From c06afc53b4a37c526eb8b628b4102785baedee11 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Sun, 2 Jul 2023 09:52:50 +0000 Subject: [PATCH 1089/1997] Make scripts backwards compatible --- docker/test/stateless/run.sh | 20 ++++++++++++++++++-- tests/ci/utils.lib | 4 ++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 7ccedb8c0b3..fe53925ecc8 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -19,7 +19,7 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test source /usr/share/clickhouse-test/ci/attach_gdb.lib || true # FIXME: to not break old builds, clean on 2023-09-01 # shellcheck disable=SC1091 -source /usr/share/clickhouse-test/ci/utils.lib +source /usr/share/clickhouse-test/ci/utils.lib || true # FIXME: to not break old builds, clean on 2023-09-01 # install test configs /usr/share/clickhouse-test/config/install.sh @@ -93,6 +93,22 @@ sleep 5 attach_gdb_to_clickhouse || true # FIXME: to not break old builds, clean on 2023-09-01 +function fn_exists() { + declare -F "$1" > /dev/null; +} + +# FIXME: to not break old builds, clean on 2023-09-01 +function try_run_with_retry() { + local total_retries="$1" + shift + + if fn_exists run_with_retry; then + run_with_retry "$total_retries" "$@" + else + "$@" + fi +} + function run_tests() { set -x @@ -140,7 +156,7 @@ function run_tests() ADDITIONAL_OPTIONS+=('--report-logs-stats') - run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" + try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ diff --git a/tests/ci/utils.lib b/tests/ci/utils.lib index 95ad50f635b..b5ce4ae0d78 100644 --- a/tests/ci/utils.lib +++ b/tests/ci/utils.lib @@ -22,4 +22,8 @@ function run_with_retry() echo "Command '$*' failed after $total_retries retries, exiting" exit 1 +} + +function fn_exists() { + declare -F "$1" > /dev/null; } \ No newline at end of file From 91d091a80607a8c9c9e9edbc02392172825f5299 Mon Sep 17 00:00:00 2001 From: FFFFFFFHHHHHHH <916677625@qq.com> Date: Sun, 2 Jul 2023 18:58:58 +0800 Subject: [PATCH 1090/1997] fix style --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 2802e52c288..6231e8a07f3 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1034,6 +1034,7 @@ arrayFirst arrayFirstIndex arrayFlatten arrayIntersect +arrayJaccardIndex arrayJoin arrayLast arrayLastIndex @@ -1607,6 +1608,7 @@ isNull isValidJSON isValidUTF iteratively +jaccard javaHash javaHashUTF jbod From 4a8fa4b35a84d0dc27d3645e59a4ab415fbc86f0 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sun, 2 Jul 2023 11:42:52 -0300 Subject: [PATCH 1091/1997] test for full_join_and_nullable_pk --- ..._join_and_nullable_key_and_index.reference | 6 ++++ ...0_full_join_and_nullable_key_and_index.sql | 33 +++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 tests/queries/0_stateless/01410_full_join_and_nullable_key_and_index.reference create mode 100644 tests/queries/0_stateless/01410_full_join_and_nullable_key_and_index.sql diff --git a/tests/queries/0_stateless/01410_full_join_and_nullable_key_and_index.reference b/tests/queries/0_stateless/01410_full_join_and_nullable_key_and_index.reference new file mode 100644 index 00000000000..83ff1886775 --- /dev/null +++ b/tests/queries/0_stateless/01410_full_join_and_nullable_key_and_index.reference @@ -0,0 +1,6 @@ +select 1 +\N 1232 Johny +select 2 +\N 1232 Johny +select 3 +\N 1232 Johny diff --git a/tests/queries/0_stateless/01410_full_join_and_nullable_key_and_index.sql b/tests/queries/0_stateless/01410_full_join_and_nullable_key_and_index.sql new file mode 100644 index 00000000000..6e071aedb23 --- /dev/null +++ b/tests/queries/0_stateless/01410_full_join_and_nullable_key_and_index.sql @@ -0,0 +1,33 @@ +drop table if EXISTS l; +drop table if EXISTS r; + +CREATE TABLE l (luid Nullable(Int16), name String) ENGINE=MergeTree order by luid settings allow_nullable_key=1; +CREATE TABLE r (ruid Nullable(Int16), name String) ENGINE=MergeTree order by ruid settings allow_nullable_key=1; + +INSERT INTO l VALUES (1231, 'John'); +INSERT INTO l VALUES (6666, 'Ksenia'); +INSERT INTO l VALUES (Null, '---'); + +INSERT INTO r VALUES (1231, 'John'); +INSERT INTO r VALUES (1232, 'Johny'); + +select 'select 1'; +SELECT * FROM l full outer join r on l.luid = r.ruid +where luid is null + and ruid is not null; + +select 'select 2'; +select * from ( +SELECT * FROM l full outer join r on l.luid = r.ruid) + where luid is null + and ruid is not null; + +select 'select 3'; +select * from ( +SELECT * FROM l full outer join r on l.luid = r.ruid +limit 100000000) + where luid is null + and ruid is not null; + +drop table l; +drop table r; From d85f5cc4cf46aed7419feb82dffa085b392f6bff Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 2 Jul 2023 18:02:14 +0200 Subject: [PATCH 1092/1997] Fix 02116_tuple_element with Analyzer (#51669) * Fix 02116_tuple_element with Analyzer * Use alternative errors in test --------- Co-authored-by: Alexander Tokmakov --- tests/queries/0_stateless/02116_tuple_element.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02116_tuple_element.sql b/tests/queries/0_stateless/02116_tuple_element.sql index bedfedd7c2d..97f6c049705 100644 --- a/tests/queries/0_stateless/02116_tuple_element.sql +++ b/tests/queries/0_stateless/02116_tuple_element.sql @@ -17,8 +17,8 @@ EXPLAIN SYNTAX SELECT tupleElement(t1, 'a') FROM t_tuple_element; SELECT tupleElement(number, 1) FROM numbers(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } -SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX } -SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX } +SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT t2.1 FROM t_tuple_element; @@ -29,8 +29,8 @@ EXPLAIN SYNTAX SELECT tupleElement(t2, 1) FROM t_tuple_element; SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } -SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX } -SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX } +SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } DROP TABLE t_tuple_element; From 392b70d6f852a3e3912f4d2638da6f9a3fb7ddeb Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sun, 2 Jul 2023 19:30:18 +0300 Subject: [PATCH 1093/1997] fix merge_selecting_task scheduling (#51591) Co-authored-by: Alexey Milovidov --- src/Storages/MergeTree/MergeFromLogEntryTask.cpp | 2 +- src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 5 +++-- src/Storages/MergeTree/MutateFromLogEntryTask.cpp | 2 +- .../MergeTree/ReplicatedMergeMutateTaskBase.h | 11 ++++++++--- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index f29d37312f9..17582e7df98 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -392,7 +392,7 @@ bool MergeFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrite /** With `ZSESSIONEXPIRED` or `ZOPERATIONTIMEOUT`, we can inadvertently roll back local changes to the parts. * This is not a problem, because in this case the merge will remain in the queue, and we will try again. */ - storage.merge_selecting_task->schedule(); + finish_callback = [storage_ptr = &storage]() { storage_ptr->merge_selecting_task->schedule(); }; ProfileEvents::increment(ProfileEvents::ReplicatedPartMerges); write_part_log({}); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 29a1574b66e..c0acc401506 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -239,8 +239,9 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart if (!best_partition_id_to_optimize.empty()) res.emplace(std::move(best_partition_id_to_optimize)); - LOG_TRACE(log, "Checked {} partitions, found {} partitions with parts that may be merged: {}", - all_partition_ids.size(), res.size(), fmt::join(res, ", ")); + LOG_TRACE(log, "Checked {} partitions, found {} partitions with parts that may be merged: [{}]" + "(max_total_size_to_merge={}, merge_with_ttl_allowed{})", + all_partition_ids.size(), res.size(), fmt::join(res, ", "), max_total_size_to_merge, merge_with_ttl_allowed); return res; } diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 4c2def34e8a..ba55fb400ca 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -267,7 +267,7 @@ bool MutateFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrit /** With `ZSESSIONEXPIRED` or `ZOPERATIONTIMEOUT`, we can inadvertently roll back local changes to the parts. * This is not a problem, because in this case the entry will remain in the queue, and we will try again. */ - storage.merge_selecting_task->schedule(); + finish_callback = [storage_ptr = &storage]() { storage_ptr->merge_selecting_task->schedule(); }; ProfileEvents::increment(ProfileEvents::ReplicatedPartMutations); write_part_log({}); diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h index a7bf1290274..1e7f9834245 100644 --- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h +++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h @@ -21,10 +21,10 @@ public: StorageReplicatedMergeTree & storage_, ReplicatedMergeTreeQueue::SelectedEntryPtr & selected_entry_, IExecutableTask::TaskResultCallback & task_result_callback_) - : selected_entry(selected_entry_) + : storage(storage_) + , selected_entry(selected_entry_) , entry(*selected_entry->log_entry) , log(log_) - , storage(storage_) /// This is needed to ask an asssignee to assign a new merge/mutate operation /// It takes bool argument and true means that current task is successfully executed. , task_result_callback(task_result_callback_) @@ -52,13 +52,18 @@ protected: /// Will execute a part of inner MergeTask or MutateTask virtual bool executeInnerTask() = 0; + StorageReplicatedMergeTree & storage; + + /// A callback to reschedule merge_selecting_task after destroying merge_mutate_entry + /// The order is important, because merge_selecting_task may rely on the number of entries in MergeList + scope_guard finish_callback; + /// This is important not to execute the same mutation in parallel /// selected_entry is a RAII class, so the time of living must be the same as for the whole task ReplicatedMergeTreeQueue::SelectedEntryPtr selected_entry; ReplicatedMergeTreeLogEntry & entry; MergeList::EntryPtr merge_mutate_entry{nullptr}; Poco::Logger * log; - StorageReplicatedMergeTree & storage; /// ProfileEvents for current part will be stored here ProfileEvents::Counters profile_counters; ContextMutablePtr task_context; From 0656fd2313b13638c5af8478aa1130006408fa2f Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sun, 2 Jul 2023 14:56:35 -0300 Subject: [PATCH 1094/1997] more tests full join and null predicaet --- ...0_full_join_and_null_predicates.reference} | 6 ++ .../01410_full_join_and_null_predicates.sql | 58 +++++++++++++++++++ ...0_full_join_and_nullable_key_and_index.sql | 33 ----------- 3 files changed, 64 insertions(+), 33 deletions(-) rename tests/queries/0_stateless/{01410_full_join_and_nullable_key_and_index.reference => 01410_full_join_and_null_predicates.reference} (50%) create mode 100644 tests/queries/0_stateless/01410_full_join_and_null_predicates.sql delete mode 100644 tests/queries/0_stateless/01410_full_join_and_nullable_key_and_index.sql diff --git a/tests/queries/0_stateless/01410_full_join_and_nullable_key_and_index.reference b/tests/queries/0_stateless/01410_full_join_and_null_predicates.reference similarity index 50% rename from tests/queries/0_stateless/01410_full_join_and_nullable_key_and_index.reference rename to tests/queries/0_stateless/01410_full_join_and_null_predicates.reference index 83ff1886775..785d581c685 100644 --- a/tests/queries/0_stateless/01410_full_join_and_nullable_key_and_index.reference +++ b/tests/queries/0_stateless/01410_full_join_and_null_predicates.reference @@ -4,3 +4,9 @@ select 2 \N 1232 Johny select 3 \N 1232 Johny +select 4 +\N 1232 Johny +select 5 +\N 1232 Johny +select 6 +\N 1232 Johny diff --git a/tests/queries/0_stateless/01410_full_join_and_null_predicates.sql b/tests/queries/0_stateless/01410_full_join_and_null_predicates.sql new file mode 100644 index 00000000000..f7d5fa67b1d --- /dev/null +++ b/tests/queries/0_stateless/01410_full_join_and_null_predicates.sql @@ -0,0 +1,58 @@ +drop table if EXISTS l; +drop table if EXISTS r; + +CREATE TABLE l (luid Nullable(Int16), name String) +ENGINE=MergeTree order by luid settings allow_nullable_key=1 as +select * from VALUES ((1231, 'John'),(6666, 'Ksenia'),(Null, '---')); + +CREATE TABLE r (ruid Nullable(Int16), name String) +ENGINE=MergeTree order by ruid settings allow_nullable_key=1 as +select * from VALUES ((1231, 'John'),(1232, 'Johny')); + +select 'select 1'; +SELECT * FROM l full outer join r on l.luid = r.ruid +where luid is null + and ruid is not null; + +select 'select 2'; +select * from ( +SELECT * FROM l full outer join r on l.luid = r.ruid) + where luid is null + and ruid is not null; + +select 'select 3'; +select * from ( +SELECT * FROM l full outer join r on l.luid = r.ruid +limit 100000000) + where luid is null + and ruid is not null; + +drop table l; +drop table r; + +CREATE TABLE l (luid Nullable(Int16), name String) ENGINE=MergeTree order by tuple() as +select * from VALUES ((1231, 'John'),(6666, 'Ksenia'),(Null, '---')); + +CREATE TABLE r (ruid Nullable(Int16), name String) ENGINE=MergeTree order by tuple() as +select * from VALUES ((1231, 'John'),(1232, 'Johny')); + +select 'select 4'; +SELECT * FROM l full outer join r on l.luid = r.ruid +where luid is null + and ruid is not null; + +select 'select 5'; +select * from ( +SELECT * FROM l full outer join r on l.luid = r.ruid) + where luid is null + and ruid is not null; + +select 'select 6'; +select * from ( +SELECT * FROM l full outer join r on l.luid = r.ruid +limit 100000000) + where luid is null + and ruid is not null; + +drop table l; +drop table r; diff --git a/tests/queries/0_stateless/01410_full_join_and_nullable_key_and_index.sql b/tests/queries/0_stateless/01410_full_join_and_nullable_key_and_index.sql deleted file mode 100644 index 6e071aedb23..00000000000 --- a/tests/queries/0_stateless/01410_full_join_and_nullable_key_and_index.sql +++ /dev/null @@ -1,33 +0,0 @@ -drop table if EXISTS l; -drop table if EXISTS r; - -CREATE TABLE l (luid Nullable(Int16), name String) ENGINE=MergeTree order by luid settings allow_nullable_key=1; -CREATE TABLE r (ruid Nullable(Int16), name String) ENGINE=MergeTree order by ruid settings allow_nullable_key=1; - -INSERT INTO l VALUES (1231, 'John'); -INSERT INTO l VALUES (6666, 'Ksenia'); -INSERT INTO l VALUES (Null, '---'); - -INSERT INTO r VALUES (1231, 'John'); -INSERT INTO r VALUES (1232, 'Johny'); - -select 'select 1'; -SELECT * FROM l full outer join r on l.luid = r.ruid -where luid is null - and ruid is not null; - -select 'select 2'; -select * from ( -SELECT * FROM l full outer join r on l.luid = r.ruid) - where luid is null - and ruid is not null; - -select 'select 3'; -select * from ( -SELECT * FROM l full outer join r on l.luid = r.ruid -limit 100000000) - where luid is null - and ruid is not null; - -drop table l; -drop table r; From aeea3dc8247fecc8f4be75c3330cce696bd054ba Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 3 Jul 2023 07:40:05 +0000 Subject: [PATCH 1095/1997] Add odbcinst dep to sqllogic docker --- docker/test/sqllogic/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile index 83dcf7e1f56..5cf71e4d3f8 100644 --- a/docker/test/sqllogic/Dockerfile +++ b/docker/test/sqllogic/Dockerfile @@ -13,6 +13,7 @@ RUN apt-get update --yes \ sqlite3 \ unixodbc \ unixodbc-dev \ + odbcinst \ sudo \ && apt-get clean From 292eec247058c6020c48d0d4c3ebd27784fed466 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 3 Jul 2023 09:40:36 +0200 Subject: [PATCH 1096/1997] Run cargo update to fix build with nightly --- rust/skim/Cargo.lock | 360 ++++++++++++++++++++++++++----------------- 1 file changed, 218 insertions(+), 142 deletions(-) diff --git a/rust/skim/Cargo.lock b/rust/skim/Cargo.lock index badd22dad07..9f948ee1c38 100644 --- a/rust/skim/Cargo.lock +++ b/rust/skim/Cargo.lock @@ -14,13 +14,19 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "0.7.20" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" dependencies = [ "memchr", ] +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -32,9 +38,9 @@ dependencies = [ [[package]] name = "arrayvec" -version = "0.7.2" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "atty" @@ -42,7 +48,7 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", "winapi", ] @@ -67,15 +73,15 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bumpalo" -version = "3.11.1" +version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" +checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" [[package]] name = "cc" -version = "1.0.77" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9f73505338f7d905b19d18738976aae232eb46b8efc15554ffc56deb5d9ebe4" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" [[package]] name = "cfg-if" @@ -85,13 +91,13 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.23" +version = "0.4.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f" +checksum = "ec837a71355b28f6556dbd569b37b3f363091c0bd4b2e735674521b4c5fd9bc5" dependencies = [ + "android-tzdata", "iana-time-zone", "js-sys", - "num-integer", "num-traits", "time 0.1.45", "wasm-bindgen", @@ -100,9 +106,9 @@ dependencies = [ [[package]] name = "clap" -version = "3.2.23" +version = "3.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" +checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" dependencies = [ "atty", "bitflags", @@ -135,9 +141,9 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "crossbeam" @@ -155,9 +161,9 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.6" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" dependencies = [ "cfg-if", "crossbeam-utils", @@ -165,9 +171,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" dependencies = [ "cfg-if", "crossbeam-epoch", @@ -176,14 +182,14 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.13" +version = "0.9.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a" +checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", - "memoffset 0.7.1", + "memoffset 0.9.0", "scopeguard", ] @@ -199,18 +205,18 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.14" +version = "0.8.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f" +checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" dependencies = [ "cfg-if", ] [[package]] name = "cxx" -version = "1.0.83" +version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdf07d07d6531bfcdbe9b8b739b104610c6508dcc4d63b410585faf338241daf" +checksum = "e88abab2f5abbe4c56e8f1fb431b784d710b709888f35755a160e62e33fe38e8" dependencies = [ "cc", "cxxbridge-flags", @@ -220,9 +226,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.83" +version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2eb5b96ecdc99f72657332953d4d9c50135af1bac34277801cc3937906ebd39" +checksum = "5c0c11acd0e63bae27dcd2afced407063312771212b7a823b4fd72d633be30fb" dependencies = [ "cc", "codespan-reporting", @@ -230,31 +236,31 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn", + "syn 2.0.23", ] [[package]] name = "cxxbridge-flags" -version = "1.0.83" +version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac040a39517fd1674e0f32177648334b0f4074625b5588a64519804ba0553b12" +checksum = "8d3816ed957c008ccd4728485511e3d9aaf7db419aa321e3d2c5a2f3411e36c8" [[package]] name = "cxxbridge-macro" -version = "1.0.83" +version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1362b0ddcfc4eb0a1f57b68bd77dd99f0e826958a96abd0ae9bd092e114ffed6" +checksum = "a26acccf6f445af85ea056362561a24ef56cdc15fcc685f03aec50b9c702cb6d" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.23", ] [[package]] name = "darling" -version = "0.14.2" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0dd3cd20dc6b5a876612a6e5accfe7f3dd883db6d07acfbf14c128f61550dfa" +checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" dependencies = [ "darling_core", "darling_macro", @@ -262,27 +268,27 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.14.2" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a784d2ccaf7c98501746bf0be29b2022ba41fd62a2e622af997a03e9f972859f" +checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" dependencies = [ "fnv", "ident_case", "proc-macro2", "quote", "strsim", - "syn", + "syn 1.0.109", ] [[package]] name = "darling_macro" -version = "0.14.2" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7618812407e9402654622dd402b0a89dff9ba93badd6540781526117b92aab7e" +checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" dependencies = [ "darling_core", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -313,7 +319,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -323,7 +329,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f0314b72bed045f3a68671b3c86328386762c93f82d98c65c3cb5e5f573dd68" dependencies = [ "derive_builder_core", - "syn", + "syn 1.0.109", ] [[package]] @@ -349,9 +355,9 @@ dependencies = [ [[package]] name = "either" -version = "1.8.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" +checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" [[package]] name = "env_logger" @@ -383,9 +389,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.8" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if", "libc", @@ -407,6 +413,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" + [[package]] name = "humantime" version = "2.1.0" @@ -415,26 +427,25 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "iana-time-zone" -version = "0.1.53" +version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765" +checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "winapi", + "windows", ] [[package]] name = "iana-time-zone-haiku" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" dependencies = [ - "cxx", - "cxx-build", + "cc", ] [[package]] @@ -445,9 +456,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "indexmap" -version = "1.9.2" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown", @@ -455,9 +466,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.60" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" dependencies = [ "wasm-bindgen", ] @@ -470,27 +481,24 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.138" +version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db6d7e329c562c5dfab7a46a2afabc8b987ab9a4834c9d1ca04dc54c1546cef8" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" [[package]] name = "link-cplusplus" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9272ab7b96c9046fbc5bc56c06c117cb639fe2d509df0c421cad82d2915cf369" +checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" dependencies = [ "cc", ] [[package]] name = "log" -version = "0.4.17" +version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] +checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" [[package]] name = "memchr" @@ -509,9 +517,9 @@ dependencies = [ [[package]] name = "memoffset" -version = "0.7.1" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" dependencies = [ "autocfg", ] @@ -541,16 +549,6 @@ dependencies = [ "pin-utils", ] -[[package]] -name = "num-integer" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" -dependencies = [ - "autocfg", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.15" @@ -562,25 +560,25 @@ dependencies = [ [[package]] name = "num_cpus" -version = "1.14.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6058e64324c71e02bc2b150e4f3bc8286db6c83092132ffa3f6b1eab0f9def5" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.1", "libc", ] [[package]] name = "once_cell" -version = "1.16.0" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "os_str_bytes" -version = "6.4.1" +version = "6.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" +checksum = "4d5d9eb14b174ee9aa2ef96dc2b94637a2d4b6e7cb873c7e171f0c20c6cf3eac" [[package]] name = "pin-utils" @@ -590,27 +588,27 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "proc-macro2" -version = "1.0.47" +version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" +checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.21" +version = "1.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105" dependencies = [ "proc-macro2", ] [[package]] name = "rayon" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7" +checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" dependencies = [ "either", "rayon-core", @@ -618,9 +616,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cac410af5d00ab6884528b4ab69d1e8e146e8d471201800fa1b4524126de6ad3" +checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" dependencies = [ "crossbeam-channel", "crossbeam-deque", @@ -650,9 +648,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.7.0" +version = "1.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a" +checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f" dependencies = [ "aho-corasick", "memchr", @@ -661,15 +659,15 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.28" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" +checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" [[package]] name = "rustversion" -version = "1.0.9" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97477e48b4cf8603ad5f7aaf897467cf42ab4218a38ef76fb14c2d6773a6d6a8" +checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" [[package]] name = "scopeguard" @@ -679,15 +677,15 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "scratch" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8132065adcfd6e02db789d9285a0deb2f3fcb04002865ab67d5fb103533898" +checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1" [[package]] name = "serde" -version = "1.0.149" +version = "1.0.164" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "256b9932320c590e707b94576e3cc1f7c9024d0ee6612dfbcf1cb106cbe8e055" +checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d" [[package]] name = "shlex" @@ -697,9 +695,9 @@ checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" [[package]] name = "skim" -version = "0.10.2" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cebed5f897cd6c0d80fbe30adb36c0abf7400e93043a63ae56458495642b3485" +checksum = "e5d28de0a6cb2cdd83a076f1de9d965b973ae08b244df1aa70b432946dda0f32" dependencies = [ "atty", "beef", @@ -717,7 +715,7 @@ dependencies = [ "rayon", "regex", "shlex", - "time 0.3.17", + "time 0.3.22", "timer", "tuikit", "unicode-width", @@ -732,9 +730,20 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" -version = "1.0.105" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b9b43d45702de4c839cb9b51d9f529c5dd26a4aff255b42b1ebc03e88ee908" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737" dependencies = [ "proc-macro2", "quote", @@ -754,9 +763,9 @@ dependencies = [ [[package]] name = "termcolor" -version = "1.1.3" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" dependencies = [ "winapi-util", ] @@ -769,30 +778,31 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "thiserror" -version = "1.0.37" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10deb33631e3c9018b9baf9dcbbc4f737320d2b576bac10f6aefa048fa407e3e" +checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.37" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "982d17546b47146b28f7c22e3d08465f6b8903d0ea13c1660d9d84a6e7adcdbb" +checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.23", ] [[package]] name = "thread_local" -version = "1.1.4" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" dependencies = [ + "cfg-if", "once_cell", ] @@ -809,9 +819,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.17" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a561bf4617eebd33bca6434b988f39ed798e527f51a1e797d0ee4f61c0a38376" +checksum = "ea9e1b3cf1243ae005d9e74085d4d542f3125458f3a81af210d901dcd7411efd" dependencies = [ "serde", "time-core", @@ -819,9 +829,9 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" +checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" [[package]] name = "timer" @@ -848,9 +858,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.5" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" +checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" [[package]] name = "unicode-width" @@ -860,15 +870,15 @@ checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" [[package]] name = "utf8parse" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936e4b492acfd135421d8dca4b1aa80a7bfc26e702ef3af710e0752684df5372" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "vte" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aae21c12ad2ec2d168c236f369c38ff332bc1134f7246350dca641437365045" +checksum = "f5022b5fbf9407086c180e9557be968742d839e68346af7792b8592489732197" dependencies = [ "arrayvec", "utf8parse", @@ -899,9 +909,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -909,24 +919,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn", + "syn 2.0.23", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -934,22 +944,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.23", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" [[package]] name = "winapi" @@ -981,3 +991,69 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" From fe49e98455ef62957dec8c88c333bf97197a0dce Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 30 Jun 2023 08:55:49 +0000 Subject: [PATCH 1097/1997] Follow-up to re2 update 2023-06-02 (#50949) --- src/Common/OptimizedRegularExpression.cpp | 10 +++++----- src/Common/OptimizedRegularExpression.h | 1 - src/Common/SensitiveDataMasker.cpp | 3 +-- src/Common/parseGlobs.cpp | 7 +++---- src/Dictionaries/RegExpTreeDictionary.cpp | 7 ++----- src/Functions/ReplaceRegexpImpl.h | 4 ++-- src/Functions/checkHyperscanRegexp.cpp | 8 ++++---- src/Functions/extractAllGroups.h | 5 ++--- src/Functions/extractGroups.cpp | 4 ++-- src/Processors/Formats/Impl/RegexpRowInputFormat.h | 5 ++--- src/Server/HTTPHandler.cpp | 6 ++++-- src/Server/HTTPHandlerRequestFilter.h | 6 ++---- 12 files changed, 29 insertions(+), 37 deletions(-) diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index 5df9ce76098..c542945c78d 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -540,7 +540,7 @@ bool OptimizedRegularExpressionImpl::match(const char * subject, si } } - return re2->Match(StringPieceType(subject, subject_size), 0, subject_size, RegexType::UNANCHORED, nullptr, 0); + return re2->Match({subject, subject_size}, 0, subject_size, RegexType::UNANCHORED, nullptr, 0); } } @@ -585,9 +585,9 @@ bool OptimizedRegularExpressionImpl::match(const char * subject, si return false; } - StringPieceType piece; + std::string_view piece; - if (!RegexType::PartialMatch(StringPieceType(subject, subject_size), *re2, &piece)) + if (!RegexType::PartialMatch({subject, subject_size}, *re2, &piece)) return false; else { @@ -652,10 +652,10 @@ unsigned OptimizedRegularExpressionImpl::match(const char * subject return 0; } - DB::PODArrayWithStackMemory pieces(limit); + DB::PODArrayWithStackMemory pieces(limit); if (!re2->Match( - StringPieceType(subject, subject_size), + {subject, subject_size}, 0, subject_size, RegexType::UNANCHORED, diff --git a/src/Common/OptimizedRegularExpression.h b/src/Common/OptimizedRegularExpression.h index f6b59f0a465..51f1bc200e4 100644 --- a/src/Common/OptimizedRegularExpression.h +++ b/src/Common/OptimizedRegularExpression.h @@ -52,7 +52,6 @@ public: using MatchVec = std::vector; using RegexType = std::conditional_t; - using StringPieceType = std::conditional_t; OptimizedRegularExpressionImpl(const std::string & regexp_, int options = 0); /// NOLINT /// StringSearcher store pointers to required_substring, it must be updated on move. diff --git a/src/Common/SensitiveDataMasker.cpp b/src/Common/SensitiveDataMasker.cpp index 34db78d00fb..b59a4758822 100644 --- a/src/Common/SensitiveDataMasker.cpp +++ b/src/Common/SensitiveDataMasker.cpp @@ -5,7 +5,6 @@ #include #include -#include #include @@ -44,7 +43,7 @@ private: const std::string regexp_string; const RE2 regexp; - const re2::StringPiece replacement; + const std::string_view replacement; #ifndef NDEBUG mutable std::atomic matches_count = 0; diff --git a/src/Common/parseGlobs.cpp b/src/Common/parseGlobs.cpp index 07cce38afff..33747f6eece 100644 --- a/src/Common/parseGlobs.cpp +++ b/src/Common/parseGlobs.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -33,14 +32,14 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob std::string escaped_with_globs = buf_for_escaping.str(); static const re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})"); /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without "{", "}", "*" and "," - re2::StringPiece input(escaped_with_globs); - re2::StringPiece matched; + std::string_view input(escaped_with_globs); + std::string_view matched; std::ostringstream oss_for_replacing; // STYLE_CHECK_ALLOW_STD_STRING_STREAM oss_for_replacing.exceptions(std::ios::failbit); size_t current_index = 0; while (RE2::FindAndConsume(&input, enum_or_range, &matched)) { - std::string buffer{matched}; + std::string buffer(matched); oss_for_replacing << escaped_with_globs.substr(current_index, matched.data() - escaped_with_globs.data() - current_index - 1) << '('; if (buffer.find(',') == std::string::npos) diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp index 074b179c48e..a9846dc06e9 100644 --- a/src/Dictionaries/RegExpTreeDictionary.cpp +++ b/src/Dictionaries/RegExpTreeDictionary.cpp @@ -30,8 +30,6 @@ #include #include -#include - #include "config.h" #if USE_VECTORSCAN @@ -469,10 +467,9 @@ public: std::pair processBackRefs(const String & data, const re2_st::RE2 & searcher, const std::vector & pieces) { - re2_st::StringPiece haystack(data.data(), data.size()); - re2_st::StringPiece matches[10]; + std::string_view matches[10]; String result; - searcher.Match(haystack, 0, data.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 10); + searcher.Match({data.data(), data.size()}, 0, data.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 10); /// if the pattern is a single '$1' but fails to match, we would use the default value. if (pieces.size() == 1 && pieces[0].ref_num >= 0 && pieces[0].ref_num < 10 && matches[pieces[0].ref_num].empty()) return std::make_pair(result, true); diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 7e3af1e62d9..9395489dac3 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -99,8 +99,8 @@ struct ReplaceRegexpImpl int num_captures, const Instructions & instructions) { - re2_st::StringPiece haystack(haystack_data, haystack_length); - re2_st::StringPiece matches[max_captures]; + std::string_view haystack(haystack_data, haystack_length); + std::string_view matches[max_captures]; size_t copy_pos = 0; size_t match_pos = 0; diff --git a/src/Functions/checkHyperscanRegexp.cpp b/src/Functions/checkHyperscanRegexp.cpp index 441e35cc5db..0dd4c5740c3 100644 --- a/src/Functions/checkHyperscanRegexp.cpp +++ b/src/Functions/checkHyperscanRegexp.cpp @@ -45,8 +45,8 @@ bool isLargerThanFifty(std::string_view str) /// Check for sub-patterns of the form x{n} or x{n,} can be expensive. Ignore spaces before/after n and m. bool SlowWithHyperscanChecker::isSlowOneRepeat(std::string_view regexp) { - re2_st::StringPiece haystack(regexp.data(), regexp.size()); - re2_st::StringPiece matches[2]; + std::string_view haystack(regexp.data(), regexp.size()); + std::string_view matches[2]; size_t start_pos = 0; while (start_pos < haystack.size()) { @@ -67,8 +67,8 @@ bool SlowWithHyperscanChecker::isSlowOneRepeat(std::string_view regexp) /// Check if sub-patterns of the form x{n,m} can be expensive. Ignore spaces before/after n and m. bool SlowWithHyperscanChecker::isSlowTwoRepeats(std::string_view regexp) { - re2_st::StringPiece haystack(regexp.data(), regexp.size()); - re2_st::StringPiece matches[3]; + std::string_view haystack(regexp.data(), regexp.size()); + std::string_view matches[3]; size_t start_pos = 0; while (start_pos < haystack.size()) { diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h index faee25aa0ab..3a7987be93e 100644 --- a/src/Functions/extractAllGroups.h +++ b/src/Functions/extractAllGroups.h @@ -94,7 +94,6 @@ public: if (needle.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Length of 'needle' argument must be greater than 0."); - using StringPiece = typename Regexps::Regexp::StringPieceType; const Regexps::Regexp holder = Regexps::createRegexp(needle); const auto & regexp = holder.getRE2(); @@ -111,7 +110,7 @@ public: groups_count, std::to_string(MAX_GROUPS_COUNT - 1)); // Including 0-group, which is the whole regexp. - PODArrayWithStackMemory matched_groups(groups_count + 1); + PODArrayWithStackMemory matched_groups(groups_count + 1); ColumnArray::ColumnOffsets::MutablePtr root_offsets_col = ColumnArray::ColumnOffsets::create(); ColumnArray::ColumnOffsets::MutablePtr nested_offsets_col = ColumnArray::ColumnOffsets::create(); @@ -160,7 +159,7 @@ public: /// Additional limit to fail fast on supposedly incorrect usage. const auto max_matches_per_row = context->getSettingsRef().regexp_max_matches_per_row; - PODArray all_matches; + PODArray all_matches; /// Number of times RE matched on each row of haystack column. PODArray number_of_matches_per_row; diff --git a/src/Functions/extractGroups.cpp b/src/Functions/extractGroups.cpp index 6744edda922..21b8a68fc10 100644 --- a/src/Functions/extractGroups.cpp +++ b/src/Functions/extractGroups.cpp @@ -75,7 +75,7 @@ public: throw Exception(ErrorCodes::BAD_ARGUMENTS, "There are no groups in regexp: {}", needle); // Including 0-group, which is the whole regexp. - PODArrayWithStackMemory matched_groups(groups_count + 1); + PODArrayWithStackMemory matched_groups(groups_count + 1); ColumnArray::ColumnOffsets::MutablePtr offsets_col = ColumnArray::ColumnOffsets::create(); ColumnString::MutablePtr data_col = ColumnString::create(); @@ -89,7 +89,7 @@ public: { std::string_view current_row = column_haystack->getDataAt(i).toView(); - if (re2->Match(re2_st::StringPiece(current_row.data(), current_row.size()), + if (re2->Match({current_row.data(), current_row.size()}, 0, current_row.size(), re2_st::RE2::UNANCHORED, matched_groups.data(), static_cast(matched_groups.size()))) { diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.h b/src/Processors/Formats/Impl/RegexpRowInputFormat.h index d6696ffe751..2469774aaf9 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.h +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include @@ -28,14 +27,14 @@ public: /// Return true if row was successfully parsed and row fields were extracted. bool parseRow(PeekableReadBuffer & buf); - re2_st::StringPiece getField(size_t index) { return matched_fields[index]; } + std::string_view getField(size_t index) { return matched_fields[index]; } size_t getMatchedFieldsSize() const { return matched_fields.size(); } size_t getNumberOfGroups() const { return regexp.NumberOfCapturingGroups(); } private: const re2_st::RE2 regexp; // The vector of fields extracted from line using regexp. - std::vector matched_fields; + std::vector matched_fields; // These two vectors are needed to use RE2::FullMatchN (function for extracting fields). std::vector re2_arguments; std::vector re2_arguments_ptrs; diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index fe98ae5f69e..42459340c57 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -44,6 +44,8 @@ #include #include +#include + #include #include @@ -1163,8 +1165,8 @@ void PredefinedQueryHandler::customizeContext(HTTPServerRequest & request, Conte { int num_captures = compiled_regex->NumberOfCapturingGroups() + 1; - re2::StringPiece matches[num_captures]; - re2::StringPiece input(begin, end - begin); + std::string_view matches[num_captures]; + std::string_view input(begin, end - begin); if (compiled_regex->Match(input, 0, end - begin, re2::RE2::Anchor::ANCHOR_BOTH, matches, num_captures)) { for (const auto & [capturing_name, capturing_index] : compiled_regex->NamedCapturingGroups()) diff --git a/src/Server/HTTPHandlerRequestFilter.h b/src/Server/HTTPHandlerRequestFilter.h index c6bcdb211e1..25cbb950871 100644 --- a/src/Server/HTTPHandlerRequestFilter.h +++ b/src/Server/HTTPHandlerRequestFilter.h @@ -6,7 +6,6 @@ #include #include -#include #include #include @@ -26,9 +25,8 @@ static inline bool checkRegexExpression(std::string_view match_str, const Compil { int num_captures = compiled_regex->NumberOfCapturingGroups() + 1; - re2::StringPiece matches[num_captures]; - re2::StringPiece match_input(match_str.data(), match_str.size()); - return compiled_regex->Match(match_input, 0, match_str.size(), re2::RE2::Anchor::ANCHOR_BOTH, matches, num_captures); + std::string_view matches[num_captures]; + return compiled_regex->Match({match_str.data(), match_str.size()}, 0, match_str.size(), re2::RE2::Anchor::ANCHOR_BOTH, matches, num_captures); } static inline bool checkExpression(std::string_view match_str, const std::pair & expression) From f1709b3cf4360bbd57e7740a305d2a888849197b Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 3 Jul 2023 12:58:41 +0300 Subject: [PATCH 1098/1997] better diagnostics for 01193_metadata_loading (#51414) --- tests/queries/0_stateless/01193_metadata_loading.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01193_metadata_loading.sh b/tests/queries/0_stateless/01193_metadata_loading.sh index 63fcf8eb93b..50425eae018 100755 --- a/tests/queries/0_stateless/01193_metadata_loading.sh +++ b/tests/queries/0_stateless/01193_metadata_loading.sh @@ -12,7 +12,7 @@ db="test_01193_$RANDOM" tables=1000 threads=10 count_multiplier=1 -max_time_ms=5000 +max_time_ms=1500 debug_or_sanitizer_build=$($CLICKHOUSE_CLIENT -q "WITH ((SELECT value FROM system.build_options WHERE name='BUILD_TYPE') AS build, (SELECT value FROM system.build_options WHERE name='CXX_FLAGS') as flags) SELECT build='Debug' OR flags LIKE '%fsanitize%' OR hasThreadFuzzer()") @@ -42,13 +42,15 @@ wait $CLICKHOUSE_CLIENT -q "CREATE TABLE $db.table_merge (i UInt64, d Date, s String, n Nested(i UInt8, f Float32)) ENGINE=Merge('$db', '^table_')" $CLICKHOUSE_CLIENT -q "SELECT count() * $count_multiplier, i, d, s, n.i, n.f FROM merge('$db', '^table_9') GROUP BY i, d, s, n.i, n.f ORDER BY i" -for i in {1..10}; do +for i in {1..50}; do $CLICKHOUSE_CLIENT -q "DETACH DATABASE $db" - $CLICKHOUSE_CLIENT -q "ATTACH DATABASE $db" --query_id="$db-$i"; + $CLICKHOUSE_CLIENT --query_profiler_real_time_period_ns=100000000 --query_profiler_cpu_time_period_ns=100000000 -q "ATTACH DATABASE $db" --query_id="$db-$i"; done $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" -$CLICKHOUSE_CLIENT -q "SELECT if(min(query_duration_ms) < $max_time_ms, 'ok', toString(groupArray(query_duration_ms))) FROM system.query_log WHERE current_database = currentDatabase() AND query_id LIKE '$db-%' AND type=2" +durations=$($CLICKHOUSE_CLIENT -q "SELECT groupArray(query_duration_ms) FROM system.query_log WHERE current_database = currentDatabase() AND query_id LIKE '$db-%' AND type=2") +$CLICKHOUSE_CLIENT -q "SELECT 'durations', '$db', $durations FORMAT Null" +$CLICKHOUSE_CLIENT -q "SELECT if(quantile(0.5)(arrayJoin($durations)) < $max_time_ms, 'ok', toString($durations))" $CLICKHOUSE_CLIENT -q "SELECT count() * $count_multiplier, i, d, s, n.i, n.f FROM $db.table_merge GROUP BY i, d, s, n.i, n.f ORDER BY i" From d0c38474274ce0c64478b33230b49db574e06e11 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 3 Jul 2023 12:29:12 +0200 Subject: [PATCH 1099/1997] Fix source image for sqllogic --- docker/images.json | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docker/images.json b/docker/images.json index b4f3e755bd1..e8fc329a640 100644 --- a/docker/images.json +++ b/docker/images.json @@ -120,11 +120,12 @@ "docker/test/base": { "name": "clickhouse/test-base", "dependent": [ - "docker/test/stateless", - "docker/test/integration/base", "docker/test/fuzzer", + "docker/test/integration/base", "docker/test/keeper-jepsen", - "docker/test/server-jepsen" + "docker/test/server-jepsen", + "docker/test/sqllogic", + "docker/test/stateless" ] }, "docker/test/integration/kerberized_hadoop": { From c575dc37b3c64f5da3a2390f20680bda5f6a32d2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 3 Jul 2023 12:38:12 +0200 Subject: [PATCH 1100/1997] Remove redundant change --- src/Disks/getOrCreateDiskFromAST.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/getOrCreateDiskFromAST.cpp b/src/Disks/getOrCreateDiskFromAST.cpp index 6c1b0a966b2..1d5e40d5d07 100644 --- a/src/Disks/getOrCreateDiskFromAST.cpp +++ b/src/Disks/getOrCreateDiskFromAST.cpp @@ -102,7 +102,7 @@ namespace std::string getOrCreateDiskFromDiskAST(const ASTPtr & disk_function, ContextPtr context) { - if (!disk_function->as()) + if (isDiskFunction(disk_function)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected a function"); auto ast = disk_function->clone(); From c4fb532f9306e8680968a6c79d3bd99108c0cbfc Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 3 Jul 2023 12:55:38 +0200 Subject: [PATCH 1101/1997] Better --- src/Disks/getDiskConfigurationFromAST.cpp | 1 - src/Disks/getOrCreateDiskFromAST.cpp | 6 ++--- src/Interpreters/Cache/FileCache.cpp | 32 +++++------------------ src/Interpreters/Cache/Metadata.cpp | 8 ++++-- src/Interpreters/Cache/Metadata.h | 5 ++-- 5 files changed, 19 insertions(+), 33 deletions(-) diff --git a/src/Disks/getDiskConfigurationFromAST.cpp b/src/Disks/getDiskConfigurationFromAST.cpp index 89dda978f6a..4b1323b4db8 100644 --- a/src/Disks/getDiskConfigurationFromAST.cpp +++ b/src/Disks/getDiskConfigurationFromAST.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Disks/getOrCreateDiskFromAST.cpp b/src/Disks/getOrCreateDiskFromAST.cpp index ce1d14c7ea5..81d5b7372f3 100644 --- a/src/Disks/getOrCreateDiskFromAST.cpp +++ b/src/Disks/getOrCreateDiskFromAST.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -10,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -107,8 +107,8 @@ namespace std::string getOrCreateDiskFromDiskAST(const ASTPtr & disk_function, ContextPtr context) { - if (isDiskFunction(disk_function)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected a function"); + if (!isDiskFunction(disk_function)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected a disk function"); auto ast = disk_function->clone(); diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 7baeafdd724..d2647a68d42 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -813,12 +813,8 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) void FileCache::removeKey(const Key & key) { assertInitialized(); - - auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::RETURN_NULL); - if (!locked_key) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}`", key); - - locked_key->removeAllReleasable(); + auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW); + locked_key->removeAll(); } void FileCache::removeKeyIfExists(const Key & key) @@ -833,17 +829,13 @@ void FileCache::removeKeyIfExists(const Key & key) /// But if we have multiple replicated zero-copy tables on the same server /// it became possible to start removing something from cache when it is used /// by other "zero-copy" tables. That is why it's not an error. - locked_key->removeAllReleasable(); + locked_key->removeAll(/* if_releasable */true); } void FileCache::removeFileSegment(const Key & key, size_t offset) { assertInitialized(); - - auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::RETURN_NULL); - if (!locked_key) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}`", key); - + auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW); locked_key->removeFileSegment(offset); } @@ -856,22 +848,12 @@ void FileCache::removeAllReleasable() { assertInitialized(); - auto lock = lockCache(); - - main_priority->iterate([&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) - { - if (segment_metadata->releasable()) - { - auto file_segment = segment_metadata->file_segment; - locked_key.removeFileSegment(file_segment->offset(), file_segment->lock()); - return PriorityIterationResult::REMOVE_AND_CONTINUE; - } - return PriorityIterationResult::CONTINUE; - }, lock); + metadata.iterate([](LockedKey & locked_key) { locked_key.removeAll(/* if_releasable */true); }); if (stash) { /// Remove all access information. + auto lock = lockCache(); stash->records.clear(); stash->queue->removeAll(lock); } @@ -1095,7 +1077,7 @@ FileSegmentsHolderPtr FileCache::getSnapshot() FileSegmentsHolderPtr FileCache::getSnapshot(const Key & key) { FileSegments file_segments; - auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW); + auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW_LOGICAL); for (const auto & [_, file_segment_metadata] : *locked_key->getKeyMetadata()) file_segments.push_back(FileSegment::getSnapshot(file_segment_metadata->file_segment)); return std::make_unique(std::move(file_segments)); diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index a2234a4825a..7197696dfcb 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -183,6 +183,8 @@ LockedKeyPtr CacheMetadata::lockKeyMetadata( if (it == end()) { if (key_not_found_policy == KeyNotFoundPolicy::THROW) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}` in cache", key); + else if (key_not_found_policy == KeyNotFoundPolicy::THROW_LOGICAL) throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key); else if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL) return nullptr; @@ -207,6 +209,8 @@ LockedKeyPtr CacheMetadata::lockKeyMetadata( return locked_metadata; if (key_not_found_policy == KeyNotFoundPolicy::THROW) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}` in cache", key); + else if (key_not_found_policy == KeyNotFoundPolicy::THROW_LOGICAL) throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key); if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL) @@ -356,11 +360,11 @@ bool LockedKey::isLastOwnerOfFileSegment(size_t offset) const return file_segment_metadata->file_segment.use_count() == 2; } -void LockedKey::removeAllReleasable() +void LockedKey::removeAll(bool if_releasable) { for (auto it = key_metadata->begin(); it != key_metadata->end();) { - if (!it->second->releasable()) + if (if_releasable && !it->second->releasable()) { ++it; continue; diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index 9f2c5f278f9..9a45e6f0dd0 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -81,7 +81,7 @@ struct CacheMetadata : public std::unordered_map, { public: using Key = FileCacheKey; - using IterateCacheMetadataFunc = std::function; + using IterateCacheMetadataFunc = std::function; explicit CacheMetadata(const std::string & path_); @@ -100,6 +100,7 @@ public: enum class KeyNotFoundPolicy { THROW, + THROW_LOGICAL, CREATE_EMPTY, RETURN_NULL, }; @@ -156,7 +157,7 @@ struct LockedKey : private boost::noncopyable std::shared_ptr getKeyMetadata() const { return key_metadata; } std::shared_ptr getKeyMetadata() { return key_metadata; } - void removeAllReleasable(); + void removeAll(bool if_releasable = true); KeyMetadata::iterator removeFileSegment(size_t offset, const FileSegmentGuard::Lock &); KeyMetadata::iterator removeFileSegment(size_t offset); From 22e44ced8a30763ff147174633255fab91f01d70 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 30 Jun 2023 15:51:41 +0200 Subject: [PATCH 1102/1997] Add column modification time into system.parts_columns This can be useful to obtain at least some time for the part after mutations, since mutations will change the modification time of all parts. Signed-off-by: Azat Khuzhin --- .../MergeTree/DataPartStorageOnDiskFull.cpp | 5 ++++ .../MergeTree/DataPartStorageOnDiskFull.h | 1 + src/Storages/MergeTree/IDataPartStorage.h | 1 + src/Storages/MergeTree/IMergeTreeDataPart.h | 2 ++ .../MergeTree/MergeTreeDataPartCompact.cpp | 5 ++++ .../MergeTree/MergeTreeDataPartCompact.h | 2 ++ .../MergeTree/MergeTreeDataPartInMemory.h | 1 + .../MergeTree/MergeTreeDataPartWide.cpp | 12 ++++++++ .../MergeTree/MergeTreeDataPartWide.h | 2 ++ .../System/StorageSystemPartsColumns.cpp | 10 +++++++ .../StorageSystemProjectionPartsColumns.cpp | 11 ++++++- .../02117_show_create_table_system.reference | 2 ++ ..._parts_columns_modification_time.reference | 6 ++++ ...tem_parts_columns_modification_time.sql.j2 | 30 +++++++++++++++++++ 14 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02806_system_parts_columns_modification_time.reference create mode 100644 tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskFull.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskFull.cpp index e62e5a5d6b2..20b6c5a919e 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskFull.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskFull.cpp @@ -80,6 +80,11 @@ DataPartStorageIteratorPtr DataPartStorageOnDiskFull::iterate() const volume->getDisk()->iterateDirectory(fs::path(root_path) / part_dir)); } +Poco::Timestamp DataPartStorageOnDiskFull::getFileLastModified(const String & file_name) const +{ + return volume->getDisk()->getLastModified(fs::path(root_path) / part_dir / file_name); +} + size_t DataPartStorageOnDiskFull::getFileSize(const String & file_name) const { return volume->getDisk()->getFileSize(fs::path(root_path) / part_dir / file_name); diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskFull.h b/src/Storages/MergeTree/DataPartStorageOnDiskFull.h index 2ceb392e80f..5d70404fcfa 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskFull.h +++ b/src/Storages/MergeTree/DataPartStorageOnDiskFull.h @@ -20,6 +20,7 @@ public: bool isDirectory(const std::string & name) const override; DataPartStorageIteratorPtr iterate() const override; + Poco::Timestamp getFileLastModified(const String & file_name) const override; size_t getFileSize(const std::string & file_name) const override; UInt32 getRefCount(const std::string & file_name) const override; std::string getRemotePath(const std::string & file_name) const override; diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index 9d6c5d0dcba..8c9fd6d0426 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -108,6 +108,7 @@ public: virtual DataPartStorageIteratorPtr iterate() const = 0; /// Get metadata for a file inside path dir. + virtual Poco::Timestamp getFileLastModified(const std::string & file_name) const = 0; virtual size_t getFileSize(const std::string & file_name) const = 0; virtual UInt32 getRefCount(const std::string & file_name) const = 0; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index fd73d802579..797de39b020 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -116,6 +116,8 @@ public: /// Otherwise return information about column size on disk. ColumnSize getColumnSize(const String & column_name) const; + virtual std::optional getColumnModificationTime(const String & column_name) const = 0; + /// NOTE: Returns zeros if secondary indexes are not found in checksums. /// Otherwise return information about secondary index size on disk. IndexSize getSecondaryIndexSize(const String & secondary_index_name) const; diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 07e20f16a9f..9c47608e364 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -144,6 +144,11 @@ bool MergeTreeDataPartCompact::hasColumnFiles(const NameAndTypePair & column) co return (bin_checksum != checksums.files.end() && mrk_checksum != checksums.files.end()); } +std::optional MergeTreeDataPartCompact::getColumnModificationTime(const String & /* column_name */) const +{ + return getDataPartStorage().getFileLastModified(DATA_FILE_NAME_WITH_EXTENSION).epochTime(); +} + void MergeTreeDataPartCompact::checkConsistency(bool require_part_metadata) const { checkConsistencyBase(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index b115692a7cf..08764eedb43 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -55,6 +55,8 @@ public: bool hasColumnFiles(const NameAndTypePair & column) const override; + std::optional getColumnModificationTime(const String & column_name) const override; + String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return DATA_FILE_NAME; } ~MergeTreeDataPartCompact() override; diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index db7244d8e99..2698b69b38e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -43,6 +43,7 @@ public: String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; } void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) override; DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const override; + std::optional getColumnModificationTime(const String & /* column_name */) const override { return {}; } MutableDataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index f44cbdd8628..2d886e2058b 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -260,6 +260,18 @@ bool MergeTreeDataPartWide::hasColumnFiles(const NameAndTypePair & column) const return res; } +std::optional MergeTreeDataPartWide::getColumnModificationTime(const String & column_name) const +{ + try + { + return getDataPartStorage().getFileLastModified(column_name + DATA_FILE_EXTENSION).epochTime(); + } + catch (const fs::filesystem_error &) + { + return {}; + } +} + String MergeTreeDataPartWide::getFileNameForColumn(const NameAndTypePair & column) const { String filename; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index 5ee497b9b21..0b2ffeb4b18 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -54,6 +54,8 @@ public: bool hasColumnFiles(const NameAndTypePair & column) const override; + std::optional getColumnModificationTime(const String & column_name) const override; + protected: static void loadIndexGranularityImpl( MergeTreeIndexGranularity & index_granularity_, MergeTreeIndexGranularityInfo & index_granularity_info_, diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp index 00b958b015f..67c8d06e432 100644 --- a/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/src/Storages/System/StorageSystemPartsColumns.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -62,6 +63,8 @@ StorageSystemPartsColumns::StorageSystemPartsColumns(const StorageID & table_id_ {"column_data_compressed_bytes", std::make_shared()}, {"column_data_uncompressed_bytes", std::make_shared()}, {"column_marks_bytes", std::make_shared()}, + {"column_modification_time", std::make_shared(std::make_shared())}, + {"serialization_kind", std::make_shared()}, {"subcolumns.names", std::make_shared(std::make_shared())}, {"subcolumns.types", std::make_shared(std::make_shared())}, @@ -235,6 +238,13 @@ void StorageSystemPartsColumns::processNextStorage( columns[res_index++]->insert(column_size.data_uncompressed); if (columns_mask[src_index++]) columns[res_index++]->insert(column_size.marks); + if (columns_mask[src_index++]) + { + if (auto column_modification_time = part->getColumnModificationTime(column.name)) + columns[res_index++]->insert(UInt64(column_modification_time.value())); + else + columns[res_index++]->insertDefault(); + } auto serialization = part->getSerialization(column.name); if (columns_mask[src_index++]) diff --git a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp index a5968597885..06becc6d91c 100644 --- a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp +++ b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -66,7 +67,8 @@ StorageSystemProjectionPartsColumns::StorageSystemProjectionPartsColumns(const S {"column_bytes_on_disk", std::make_shared()}, {"column_data_compressed_bytes", std::make_shared()}, {"column_data_uncompressed_bytes", std::make_shared()}, - {"column_marks_bytes", std::make_shared()} + {"column_marks_bytes", std::make_shared()}, + {"column_modification_time", std::make_shared(std::make_shared())}, } ) { @@ -247,6 +249,13 @@ void StorageSystemProjectionPartsColumns::processNextStorage( columns[res_index++]->insert(column_size.data_uncompressed); if (columns_mask[src_index++]) columns[res_index++]->insert(column_size.marks); + if (columns_mask[src_index++]) + { + if (auto column_modification_time = part->getColumnModificationTime(column.name)) + columns[res_index++]->insert(UInt64(column_modification_time.value())); + else + columns[res_index++]->insertDefault(); + } if (has_state_column) columns[res_index++]->insert(part->stateString()); diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 703972279e7..3834b05601f 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -565,6 +565,7 @@ CREATE TABLE system.parts_columns `column_data_compressed_bytes` UInt64, `column_data_uncompressed_bytes` UInt64, `column_marks_bytes` UInt64, + `column_modification_time` Nullable(DateTime), `serialization_kind` String, `subcolumns.names` Array(String), `subcolumns.types` Array(String), @@ -750,6 +751,7 @@ CREATE TABLE system.projection_parts_columns `column_data_compressed_bytes` UInt64, `column_data_uncompressed_bytes` UInt64, `column_marks_bytes` UInt64, + `column_modification_time` Nullable(DateTime), `bytes` UInt64, `marks_size` UInt64, `part_name` String diff --git a/tests/queries/0_stateless/02806_system_parts_columns_modification_time.reference b/tests/queries/0_stateless/02806_system_parts_columns_modification_time.reference new file mode 100644 index 00000000000..f1be11980bb --- /dev/null +++ b/tests/queries/0_stateless/02806_system_parts_columns_modification_time.reference @@ -0,0 +1,6 @@ +Wide key 1 1 +Wide key 1 1 +Wide value 1 0 +Compact key 1 1 +Compact key 1 1 +Compact value 1 1 diff --git a/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 b/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 new file mode 100644 index 00000000000..eee236ff681 --- /dev/null +++ b/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 @@ -0,0 +1,30 @@ +-- Tags: no-s3-storage +-- Tag: no-s3-storage because S3 updates metadata for the virtual link file on metadata disk (see CreateHardlinkOperation::execute() for details) + +set mutations_sync=1; + +{# modification time of the part folder and column files not always equal, this is how much seconds of difference is allowed #} +{% set mtime_diff_in_seconds = 5 %} + +{% for id, settings, file_per_column in [ + ("wide", "min_bytes_for_wide_part=0, min_rows_for_wide_part=0", true), + ("compact", "min_bytes_for_wide_part=1000, min_rows_for_wide_part=100", false) +] +%} + +drop table if exists data_{{ id }}; +create table data_{{ id }} (key Int) engine=MergeTree() order by tuple() settings {{ settings }}; +insert into data_{{ id }} values (1); +select sleep(3) format Null; +select part_type, column, now()-modification_time < 10, modification_time - column_modification_time < {{ mtime_diff_in_seconds }} from system.parts_columns where database = currentDatabase() and table = 'data_{{ id }}'; +alter table data_{{ id }} add column value Int default 0; +alter table data_{{ id }} materialize column value; +select part_type, column, now()-modification_time < 10, +{% if file_per_column %} +modification_time - column_modification_time >= 3 +{% else %} +modification_time - column_modification_time < {{ mtime_diff_in_seconds }} +{% endif %} +from system.parts_columns where active and database = currentDatabase() and table = 'data_{{ id }}' order by column; + +{% endfor %} From 1a40e30797fcfb65885beb0630e9605bd46d0b64 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 3 Jul 2023 11:37:55 +0000 Subject: [PATCH 1103/1997] Add initcapUtf8: impl + tests --- .../functions/string-functions.md | 10 +- .../functions/string-functions.md | 9 +- src/Functions/LowerUpperUTF8Impl.h | 2 - src/Functions/initcapUTF8.cpp | 114 ++++++++++++++++++ .../0_stateless/02810_initcap.reference | 8 +- tests/queries/0_stateless/02810_initcap.sql | 11 +- .../aspell-ignore/en/aspell-dict.txt | 1 + 7 files changed, 148 insertions(+), 7 deletions(-) create mode 100644 src/Functions/initcapUTF8.cpp diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index d2180c9f3ea..cab6764c041 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1256,4 +1256,12 @@ Result: ## initcap -Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters. \ No newline at end of file +Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters. + +## initcapUTF8 + +Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). + +If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. \ No newline at end of file diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index bd104b27bed..ecb36bf4f65 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -1116,4 +1116,11 @@ Do Nothing for 2 Minutes 2:00   ## initcap {#initcap} -Переводит первую букву каждого слова в строке в верхний регистр, а остальные — в нижний. Словами считаются последовательности алфавитно-цифровых символов, разделённые любыми другими символами. \ No newline at end of file +Переводит первую букву каждого слова в строке в верхний регистр, а остальные — в нижний. Словами считаются последовательности алфавитно-цифровых символов, разделённые любыми другими символами. + +## initcapUTF8 {#initcapUTF8} + +Как [initcap](#initcap), предпологая, что строка содержит набор байтов, представляющий текст в кодировке UTF-8. +Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным. +Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным. +Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено. \ No newline at end of file diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h index f6b18439fd1..460f75f9bde 100644 --- a/src/Functions/LowerUpperUTF8Impl.h +++ b/src/Functions/LowerUpperUTF8Impl.h @@ -133,8 +133,6 @@ struct LowerUpperUTF8Impl } else { - static const Poco::UTF8Encoding utf8; - size_t src_sequence_length = UTF8::seqLength(*src); /// In case partial buffer was passed (due to SSE optimization) /// we cannot convert it with current src_end, but we may have more diff --git a/src/Functions/initcapUTF8.cpp b/src/Functions/initcapUTF8.cpp new file mode 100644 index 00000000000..333ebe266d3 --- /dev/null +++ b/src/Functions/initcapUTF8.cpp @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +struct InitcapUTF8Impl +{ + static void vector( + const ColumnString::Chars & data, + const ColumnString::Offsets & offsets, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + if (data.empty()) + return; + res_data.resize(data.size()); + res_offsets.assign(offsets); + array(data.data(), data.data() + data.size(), offsets, res_data.data()); + } + + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function initcapUTF8 cannot work with FixedString argument"); + } + + static void processCodePoint(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst, bool& prev_alphanum) + { + size_t src_sequence_length = UTF8::seqLength(*src); + auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src); + + if (src_code_point) + { + bool alpha = Poco::Unicode::isAlpha(*src_code_point); + bool alphanum = alpha || Poco::Unicode::isDigit(*src_code_point); + + int dst_code_point = *src_code_point; + if (alphanum && !prev_alphanum) + { + if (alpha) + dst_code_point = Poco::Unicode::toUpper(*src_code_point); + } + else if (alpha) + { + dst_code_point = Poco::Unicode::toLower(*src_code_point); + } + prev_alphanum = alphanum; + if (dst_code_point > 0) + { + size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src); + assert(dst_sequence_length <= 4); + + if (dst_sequence_length == src_sequence_length) + { + src += dst_sequence_length; + dst += dst_sequence_length; + return; + } + } + } + + *dst = *src; + ++dst; + ++src; + prev_alphanum = false; + } + +private: + + static void array(const UInt8 * src, const UInt8 * src_end, const ColumnString::Offsets & offsets, UInt8 * dst) + { + auto offset_it = offsets.begin(); + const UInt8 * begin = src; + + /// handle remaining symbols, row by row (to avoid influence of bad UTF8 symbols from one row, to another) + while (src < src_end) + { + const UInt8 * row_end = begin + *offset_it; + chassert(row_end >= src); + bool prev_alphanum = false; + while (src < row_end) + processCodePoint(src, row_end, dst, prev_alphanum); + ++offset_it; + } + } +}; + +struct NameInitcapUTF8 +{ + static constexpr auto name = "initcapUTF8"; +}; + +using FunctionInitcapUTF8 = FunctionStringToString; + +} + +REGISTER_FUNCTION(InitcapUTF8) +{ + factory.registerFunction(); +} + +} diff --git a/tests/queries/0_stateless/02810_initcap.reference b/tests/queries/0_stateless/02810_initcap.reference index 4caa57e5ac0..0d24e14c445 100644 --- a/tests/queries/0_stateless/02810_initcap.reference +++ b/tests/queries/0_stateless/02810_initcap.reference @@ -3,5 +3,11 @@ Hello Hello Hello World Yeah, Well, I`M Gonna Go Build My Own Theme Park -Crc32ieee Is Best Function +Crc32ieee Is The Best Function 42ok + +Hello +Yeah, Well, I`M Gonna Go Build My Own Theme Park +Привет, Как Дела? +Ätsch, Bätsch +We Dont Support Cases When Lowercase And Uppercase Characters Occupy Different Number Of Bytes In Utf-8. As An Example, This Happens For ß And ẞ. diff --git a/tests/queries/0_stateless/02810_initcap.sql b/tests/queries/0_stateless/02810_initcap.sql index f61fcddf4c1..1a730003604 100644 --- a/tests/queries/0_stateless/02810_initcap.sql +++ b/tests/queries/0_stateless/02810_initcap.sql @@ -3,5 +3,12 @@ select initcap('Hello'); select initcap('hello'); select initcap('hello world'); select initcap('yeah, well, i`m gonna go build my own theme park'); -select initcap('CRC32IEEE is best function'); -select initcap('42oK'); \ No newline at end of file +select initcap('CRC32IEEE is the best function'); +select initcap('42oK'); + +select initcapUTF8(''); +select initcapUTF8('Hello'); +select initcapUTF8('yeah, well, i`m gonna go build my own theme park'); +select initcapUTF8('привет, как дела?'); +select initcapUTF8('ätsch, bätsch'); +select initcapUTF8('We dont support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8. As an example, this happens for ß and ẞ.'); \ No newline at end of file diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index f25d082e5a6..835de91c0d8 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1580,6 +1580,7 @@ indexOf infi initialQueryID initializeAggregation +initcap injective innogames inodes From 138e28cf6b98697297c35430f877782c437ddbe0 Mon Sep 17 00:00:00 2001 From: Tyler Hannan Date: Mon, 3 Jul 2023 13:45:39 +0200 Subject: [PATCH 1104/1997] Update README.md (#51726) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index eac036c2d9b..80a8d0e331f 100644 --- a/README.md +++ b/README.md @@ -23,11 +23,11 @@ curl https://clickhouse.com/ | sh ## Upcoming Events * [**v23.6 Release Webinar**](https://clickhouse.com/company/events/v23-6-release-call?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-06) - Jun 29 - 23.6 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. -* [**ClickHouse Meetup in Paris**](https://www.meetup.com/clickhouse-france-user-group/events/294283460) - Jul 4 * [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/293913596) - Jul 18 * [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/293913441) - Jul 19 * [**ClickHouse Meetup in Toronto**](https://www.meetup.com/clickhouse-toronto-user-group/events/294183127) - Jul 20 * [**ClickHouse Meetup in Singapore**](https://www.meetup.com/clickhouse-singapore-meetup-group/events/294428050/) - Jul 27 +* [**ClickHouse Meetup in Paris**](https://www.meetup.com/clickhouse-france-user-group/events/294283460) - Sep 12 Also, keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler clickhouse com. From 2feb2c0f70fd201fb100ae02831ca90bb43f162e Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 3 Jul 2023 13:47:08 +0200 Subject: [PATCH 1105/1997] Fix style check --- src/Interpreters/Cache/FileCache.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index d2647a68d42..9a7214a1f2f 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -48,7 +48,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int BAD_ARGUMENTS; } FileCache::FileCache(const FileCacheSettings & settings) From 579a446d1901b356fd1f58d98cd1cf6898de2604 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 3 Jul 2023 13:49:56 +0200 Subject: [PATCH 1106/1997] Update 02789_filesystem_cache_alignment.sh --- tests/queries/0_stateless/02789_filesystem_cache_alignment.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh b/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh index 509d1a635b1..912cdd3d1e8 100755 --- a/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh +++ b/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh @@ -61,7 +61,7 @@ WHERE file_segment_size < file_size AND end_offset + 1 != file_size; ") #echo $all -if [ "$all" -gt "10" ]; then +if [ "$all" -gt "1" ]; then echo "OK" else echo "FAIL" @@ -105,7 +105,7 @@ SELECT count() FROM ($query2) WHERE file_segment_size < file_size AND file_segment_range_end + 1 != file_size; ") -if [ "$all" -gt "10" ]; then +if [ "$all" -gt "1" ]; then echo "OK" else echo "FAIL" From 16ab84d8040e92c0b1a258cf554195371469344b Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 3 Jul 2023 11:50:26 +0000 Subject: [PATCH 1107/1997] Style fix --- docs/en/sql-reference/functions/string-functions.md | 2 +- docs/ru/sql-reference/functions/string-functions.md | 2 +- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index cab6764c041..cbbd32328bd 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1264,4 +1264,4 @@ Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). -If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. \ No newline at end of file +If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index ecb36bf4f65..62697e5e197 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -1123,4 +1123,4 @@ Do Nothing for 2 Minutes 2:00   Как [initcap](#initcap), предпологая, что строка содержит набор байтов, представляющий текст в кодировке UTF-8. Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным. Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным. -Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено. \ No newline at end of file +Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено. diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 835de91c0d8..79c34360584 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1581,6 +1581,7 @@ infi initialQueryID initializeAggregation initcap +initcapUTF injective innogames inodes From d1c4a37473749172406e850a3f0c0bcc6f122a49 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Mon, 3 Jul 2023 14:38:38 +0200 Subject: [PATCH 1108/1997] Remove MemoryPool from Poco because it's useless --- base/poco/Foundation/CMakeLists.txt | 1 - .../poco/Foundation/include/Poco/MemoryPool.h | 116 ------------------ base/poco/Foundation/src/MemoryPool.cpp | 105 ---------------- .../Net/include/Poco/Net/HTTPBasicStreamBuf.h | 5 +- .../include/Poco/Net/HTTPBufferAllocator.h | 53 -------- .../Net/include/Poco/Net/HTTPChunkedStream.h | 13 -- .../include/Poco/Net/HTTPFixedLengthStream.h | 12 -- .../Net/include/Poco/Net/HTTPHeaderStream.h | 13 -- base/poco/Net/include/Poco/Net/HTTPStream.h | 13 -- base/poco/Net/src/HTTPBufferAllocator.cpp | 44 ------- base/poco/Net/src/HTTPChunkedStream.cpp | 50 +------- base/poco/Net/src/HTTPFixedLengthStream.cpp | 47 +------ base/poco/Net/src/HTTPHeaderStream.cpp | 50 +------- base/poco/Net/src/HTTPSession.cpp | 8 +- base/poco/Net/src/HTTPStream.cpp | 48 +------- 15 files changed, 10 insertions(+), 568 deletions(-) delete mode 100644 base/poco/Foundation/include/Poco/MemoryPool.h delete mode 100644 base/poco/Foundation/src/MemoryPool.cpp delete mode 100644 base/poco/Net/include/Poco/Net/HTTPBufferAllocator.h delete mode 100644 base/poco/Net/src/HTTPBufferAllocator.cpp diff --git a/base/poco/Foundation/CMakeLists.txt b/base/poco/Foundation/CMakeLists.txt index 358f49ed055..d0dde8a51a5 100644 --- a/base/poco/Foundation/CMakeLists.txt +++ b/base/poco/Foundation/CMakeLists.txt @@ -87,7 +87,6 @@ set (SRCS src/LoggingRegistry.cpp src/LogStream.cpp src/MD5Engine.cpp - src/MemoryPool.cpp src/MemoryStream.cpp src/Message.cpp src/Mutex.cpp diff --git a/base/poco/Foundation/include/Poco/MemoryPool.h b/base/poco/Foundation/include/Poco/MemoryPool.h deleted file mode 100644 index 9ab12081b5f..00000000000 --- a/base/poco/Foundation/include/Poco/MemoryPool.h +++ /dev/null @@ -1,116 +0,0 @@ -// -// MemoryPool.h -// -// Library: Foundation -// Package: Core -// Module: MemoryPool -// -// Definition of the MemoryPool class. -// -// Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_MemoryPool_INCLUDED -#define Foundation_MemoryPool_INCLUDED - - -#include -#include -#include "Poco/Foundation.h" -#include "Poco/Mutex.h" - - -namespace Poco -{ - - -class Foundation_API MemoryPool -/// A simple pool for fixed-size memory blocks. -/// -/// The main purpose of this class is to speed-up -/// memory allocations, as well as to reduce memory -/// fragmentation in situations where the same blocks -/// are allocated all over again, such as in server -/// applications. -/// -/// All allocated blocks are retained for future use. -/// A limit on the number of blocks can be specified. -/// Blocks can be preallocated. -{ -public: - MemoryPool(std::size_t blockSize, int preAlloc = 0, int maxAlloc = 0); - /// Creates a MemoryPool for blocks with the given blockSize. - /// The number of blocks given in preAlloc are preallocated. - - ~MemoryPool(); - - void * get(); - /// Returns a memory block. If there are no more blocks - /// in the pool, a new block will be allocated. - /// - /// If maxAlloc blocks are already allocated, an - /// OutOfMemoryException is thrown. - - void release(void * ptr); - /// Releases a memory block and returns it to the pool. - - std::size_t blockSize() const; - /// Returns the block size. - - int allocated() const; - /// Returns the number of allocated blocks. - - int available() const; - /// Returns the number of available blocks in the pool. - -private: - MemoryPool(); - MemoryPool(const MemoryPool &); - MemoryPool & operator=(const MemoryPool &); - - void clear(); - - enum - { - BLOCK_RESERVE = 128 - }; - - typedef std::vector BlockVec; - - std::size_t _blockSize; - int _maxAlloc; - int _allocated; - BlockVec _blocks; - FastMutex _mutex; -}; - - -// -// inlines -// -inline std::size_t MemoryPool::blockSize() const -{ - return _blockSize; -} - - -inline int MemoryPool::allocated() const -{ - return _allocated; -} - - -inline int MemoryPool::available() const -{ - return (int)_blocks.size(); -} - - -} // namespace Poco - - -#endif // Foundation_MemoryPool_INCLUDED diff --git a/base/poco/Foundation/src/MemoryPool.cpp b/base/poco/Foundation/src/MemoryPool.cpp deleted file mode 100644 index 01c477be525..00000000000 --- a/base/poco/Foundation/src/MemoryPool.cpp +++ /dev/null @@ -1,105 +0,0 @@ -// -// MemoryPool.cpp -// -// Library: Foundation -// Package: Core -// Module: MemoryPool -// -// Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MemoryPool.h" -#include "Poco/Exception.h" - - -namespace Poco { - - -MemoryPool::MemoryPool(std::size_t blockSize, int preAlloc, int maxAlloc): - _blockSize(blockSize), - _maxAlloc(maxAlloc), - _allocated(preAlloc) -{ - poco_assert (maxAlloc == 0 || maxAlloc >= preAlloc); - poco_assert (preAlloc >= 0 && maxAlloc >= 0); - - int r = BLOCK_RESERVE; - if (preAlloc > r) - r = preAlloc; - if (maxAlloc > 0 && maxAlloc < r) - r = maxAlloc; - _blocks.reserve(r); - - try - { - for (int i = 0; i < preAlloc; ++i) - { - _blocks.push_back(new char[_blockSize]); - } - } - catch (...) - { - clear(); - throw; - } -} - - -MemoryPool::~MemoryPool() -{ - clear(); -} - - -void MemoryPool::clear() -{ - for (BlockVec::iterator it = _blocks.begin(); it != _blocks.end(); ++it) - { - delete [] *it; - } - _blocks.clear(); -} - - -void* MemoryPool::get() -{ - FastMutex::ScopedLock lock(_mutex); - - if (_blocks.empty()) - { - if (_maxAlloc == 0 || _allocated < _maxAlloc) - { - ++_allocated; - return new char[_blockSize]; - } - else throw OutOfMemoryException("MemoryPool exhausted"); - } - else - { - char* ptr = _blocks.back(); - _blocks.pop_back(); - return ptr; - } -} - - -void MemoryPool::release(void* ptr) -{ - FastMutex::ScopedLock lock(_mutex); - - try - { - _blocks.push_back(reinterpret_cast(ptr)); - } - catch (...) - { - delete [] reinterpret_cast(ptr); - } -} - - -} // namespace Poco diff --git a/base/poco/Net/include/Poco/Net/HTTPBasicStreamBuf.h b/base/poco/Net/include/Poco/Net/HTTPBasicStreamBuf.h index c4872d95353..c87719b63a4 100644 --- a/base/poco/Net/include/Poco/Net/HTTPBasicStreamBuf.h +++ b/base/poco/Net/include/Poco/Net/HTTPBasicStreamBuf.h @@ -19,7 +19,6 @@ #include "Poco/BufferedStreamBuf.h" -#include "Poco/Net/HTTPBufferAllocator.h" #include "Poco/Net/Net.h" @@ -27,9 +26,9 @@ namespace Poco { namespace Net { + constexpr size_t HTTP_DEFAULT_BUFFER_SIZE = 8 * 1024; - - typedef Poco::BasicBufferedStreamBuf, HTTPBufferAllocator> HTTPBasicStreamBuf; + typedef Poco::BasicBufferedStreamBuf> HTTPBasicStreamBuf; } diff --git a/base/poco/Net/include/Poco/Net/HTTPBufferAllocator.h b/base/poco/Net/include/Poco/Net/HTTPBufferAllocator.h deleted file mode 100644 index 5d088e35297..00000000000 --- a/base/poco/Net/include/Poco/Net/HTTPBufferAllocator.h +++ /dev/null @@ -1,53 +0,0 @@ -// -// HTTPBufferAllocator.h -// -// Library: Net -// Package: HTTP -// Module: HTTPBufferAllocator -// -// Definition of the HTTPBufferAllocator class. -// -// Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Net_HTTPBufferAllocator_INCLUDED -#define Net_HTTPBufferAllocator_INCLUDED - - -#include -#include "Poco/MemoryPool.h" -#include "Poco/Net/Net.h" - - -namespace Poco -{ -namespace Net -{ - - - class Net_API HTTPBufferAllocator - /// A BufferAllocator for HTTP streams. - { - public: - static char * allocate(std::streamsize size); - static void deallocate(char * ptr, std::streamsize size); - - enum - { - BUFFER_SIZE = 128 * 1024 - }; - - private: - static Poco::MemoryPool _pool; - }; - - -} -} // namespace Poco::Net - - -#endif // Net_HTTPBufferAllocator_INCLUDED diff --git a/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h b/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h index 47987b18817..5f4729c9278 100644 --- a/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h +++ b/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h @@ -21,7 +21,6 @@ #include #include #include -#include "Poco/MemoryPool.h" #include "Poco/Net/HTTPBasicStreamBuf.h" #include "Poco/Net/Net.h" @@ -80,12 +79,6 @@ namespace Net public: HTTPChunkedInputStream(HTTPSession & session); ~HTTPChunkedInputStream(); - - void * operator new(std::size_t size); - void operator delete(void * ptr); - - private: - static Poco::MemoryPool _pool; }; @@ -95,12 +88,6 @@ namespace Net public: HTTPChunkedOutputStream(HTTPSession & session); ~HTTPChunkedOutputStream(); - - void * operator new(std::size_t size); - void operator delete(void * ptr); - - private: - static Poco::MemoryPool _pool; }; diff --git a/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h b/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h index 4de211fdb92..2f4df102605 100644 --- a/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h +++ b/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h @@ -78,12 +78,6 @@ namespace Net public: HTTPFixedLengthInputStream(HTTPSession & session, HTTPFixedLengthStreamBuf::ContentLength length); ~HTTPFixedLengthInputStream(); - - void * operator new(std::size_t size); - void operator delete(void * ptr); - - private: - static Poco::MemoryPool _pool; }; @@ -93,12 +87,6 @@ namespace Net public: HTTPFixedLengthOutputStream(HTTPSession & session, HTTPFixedLengthStreamBuf::ContentLength length); ~HTTPFixedLengthOutputStream(); - - void * operator new(std::size_t size); - void operator delete(void * ptr); - - private: - static Poco::MemoryPool _pool; }; diff --git a/base/poco/Net/include/Poco/Net/HTTPHeaderStream.h b/base/poco/Net/include/Poco/Net/HTTPHeaderStream.h index bcfca984d8b..cf1a6dba2e6 100644 --- a/base/poco/Net/include/Poco/Net/HTTPHeaderStream.h +++ b/base/poco/Net/include/Poco/Net/HTTPHeaderStream.h @@ -21,7 +21,6 @@ #include #include #include -#include "Poco/MemoryPool.h" #include "Poco/Net/HTTPBasicStreamBuf.h" #include "Poco/Net/Net.h" @@ -74,12 +73,6 @@ namespace Net public: HTTPHeaderInputStream(HTTPSession & session); ~HTTPHeaderInputStream(); - - void * operator new(std::size_t size); - void operator delete(void * ptr); - - private: - static Poco::MemoryPool _pool; }; @@ -89,12 +82,6 @@ namespace Net public: HTTPHeaderOutputStream(HTTPSession & session); ~HTTPHeaderOutputStream(); - - void * operator new(std::size_t size); - void operator delete(void * ptr); - - private: - static Poco::MemoryPool _pool; }; diff --git a/base/poco/Net/include/Poco/Net/HTTPStream.h b/base/poco/Net/include/Poco/Net/HTTPStream.h index 0197bc62eb2..48502347b2c 100644 --- a/base/poco/Net/include/Poco/Net/HTTPStream.h +++ b/base/poco/Net/include/Poco/Net/HTTPStream.h @@ -21,7 +21,6 @@ #include #include #include -#include "Poco/MemoryPool.h" #include "Poco/Net/HTTPBasicStreamBuf.h" #include "Poco/Net/Net.h" @@ -75,12 +74,6 @@ namespace Net public: HTTPInputStream(HTTPSession & session); ~HTTPInputStream(); - - void * operator new(std::size_t size); - void operator delete(void * ptr); - - private: - static Poco::MemoryPool _pool; }; @@ -90,12 +83,6 @@ namespace Net public: HTTPOutputStream(HTTPSession & session); ~HTTPOutputStream(); - - void * operator new(std::size_t size); - void operator delete(void * ptr); - - private: - static Poco::MemoryPool _pool; }; diff --git a/base/poco/Net/src/HTTPBufferAllocator.cpp b/base/poco/Net/src/HTTPBufferAllocator.cpp deleted file mode 100644 index 2944e2a6121..00000000000 --- a/base/poco/Net/src/HTTPBufferAllocator.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// -// HTTPBufferAllocator.cpp -// -// Library: Net -// Package: HTTP -// Module: HTTPBufferAllocator -// -// Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Net/HTTPBufferAllocator.h" - - -using Poco::MemoryPool; - - -namespace Poco { -namespace Net { - - -MemoryPool HTTPBufferAllocator::_pool(HTTPBufferAllocator::BUFFER_SIZE, 16); - - -char* HTTPBufferAllocator::allocate(std::streamsize size) -{ - poco_assert_dbg (size == BUFFER_SIZE); - - return reinterpret_cast(_pool.get()); -} - - -void HTTPBufferAllocator::deallocate(char* ptr, std::streamsize size) -{ - poco_assert_dbg (size == BUFFER_SIZE); - - _pool.release(ptr); -} - - -} } // namespace Poco::Net diff --git a/base/poco/Net/src/HTTPChunkedStream.cpp b/base/poco/Net/src/HTTPChunkedStream.cpp index f2f79da590b..376e3f55492 100644 --- a/base/poco/Net/src/HTTPChunkedStream.cpp +++ b/base/poco/Net/src/HTTPChunkedStream.cpp @@ -34,7 +34,7 @@ namespace Net { HTTPChunkedStreamBuf::HTTPChunkedStreamBuf(HTTPSession& session, openmode mode): - HTTPBasicStreamBuf(HTTPBufferAllocator::BUFFER_SIZE, mode), + HTTPBasicStreamBuf(HTTP_DEFAULT_BUFFER_SIZE, mode), _session(session), _mode(mode), _chunk(0) @@ -181,10 +181,6 @@ HTTPChunkedStreamBuf* HTTPChunkedIOS::rdbuf() // HTTPChunkedInputStream // - -Poco::MemoryPool HTTPChunkedInputStream::_pool(sizeof(HTTPChunkedInputStream)); - - HTTPChunkedInputStream::HTTPChunkedInputStream(HTTPSession& session): HTTPChunkedIOS(session, std::ios::in), std::istream(&_buf) @@ -196,34 +192,10 @@ HTTPChunkedInputStream::~HTTPChunkedInputStream() { } - -void* HTTPChunkedInputStream::operator new(std::size_t size) -{ - return _pool.get(); -} - - -void HTTPChunkedInputStream::operator delete(void* ptr) -{ - try - { - _pool.release(ptr); - } - catch (...) - { - poco_unexpected(); - } -} - - // // HTTPChunkedOutputStream // - -Poco::MemoryPool HTTPChunkedOutputStream::_pool(sizeof(HTTPChunkedOutputStream)); - - HTTPChunkedOutputStream::HTTPChunkedOutputStream(HTTPSession& session): HTTPChunkedIOS(session, std::ios::out), std::ostream(&_buf) @@ -235,24 +207,4 @@ HTTPChunkedOutputStream::~HTTPChunkedOutputStream() { } - -void* HTTPChunkedOutputStream::operator new(std::size_t size) -{ - return _pool.get(); -} - - -void HTTPChunkedOutputStream::operator delete(void* ptr) -{ - try - { - _pool.release(ptr); - } - catch (...) - { - poco_unexpected(); - } -} - - } } // namespace Poco::Net diff --git a/base/poco/Net/src/HTTPFixedLengthStream.cpp b/base/poco/Net/src/HTTPFixedLengthStream.cpp index d19f6122ee1..fd77ff71cd9 100644 --- a/base/poco/Net/src/HTTPFixedLengthStream.cpp +++ b/base/poco/Net/src/HTTPFixedLengthStream.cpp @@ -30,7 +30,7 @@ namespace Net { HTTPFixedLengthStreamBuf::HTTPFixedLengthStreamBuf(HTTPSession& session, ContentLength length, openmode mode): - HTTPBasicStreamBuf(HTTPBufferAllocator::BUFFER_SIZE, mode), + HTTPBasicStreamBuf(HTTP_DEFAULT_BUFFER_SIZE, mode), _session(session), _length(length), _count(0) @@ -109,9 +109,6 @@ HTTPFixedLengthStreamBuf* HTTPFixedLengthIOS::rdbuf() // -Poco::MemoryPool HTTPFixedLengthInputStream::_pool(sizeof(HTTPFixedLengthInputStream)); - - HTTPFixedLengthInputStream::HTTPFixedLengthInputStream(HTTPSession& session, HTTPFixedLengthStreamBuf::ContentLength length): HTTPFixedLengthIOS(session, length, std::ios::in), std::istream(&_buf) @@ -124,33 +121,10 @@ HTTPFixedLengthInputStream::~HTTPFixedLengthInputStream() } -void* HTTPFixedLengthInputStream::operator new(std::size_t size) -{ - return _pool.get(); -} - - -void HTTPFixedLengthInputStream::operator delete(void* ptr) -{ - try - { - _pool.release(ptr); - } - catch (...) - { - poco_unexpected(); - } -} - - // // HTTPFixedLengthOutputStream // - -Poco::MemoryPool HTTPFixedLengthOutputStream::_pool(sizeof(HTTPFixedLengthOutputStream)); - - HTTPFixedLengthOutputStream::HTTPFixedLengthOutputStream(HTTPSession& session, HTTPFixedLengthStreamBuf::ContentLength length): HTTPFixedLengthIOS(session, length, std::ios::out), std::ostream(&_buf) @@ -163,23 +137,4 @@ HTTPFixedLengthOutputStream::~HTTPFixedLengthOutputStream() } -void* HTTPFixedLengthOutputStream::operator new(std::size_t size) -{ - return _pool.get(); -} - - -void HTTPFixedLengthOutputStream::operator delete(void* ptr) -{ - try - { - _pool.release(ptr); - } - catch (...) - { - poco_unexpected(); - } -} - - } } // namespace Poco::Net diff --git a/base/poco/Net/src/HTTPHeaderStream.cpp b/base/poco/Net/src/HTTPHeaderStream.cpp index 8e0091fcbe3..39b9007062d 100644 --- a/base/poco/Net/src/HTTPHeaderStream.cpp +++ b/base/poco/Net/src/HTTPHeaderStream.cpp @@ -26,7 +26,7 @@ namespace Net { HTTPHeaderStreamBuf::HTTPHeaderStreamBuf(HTTPSession& session, openmode mode): - HTTPBasicStreamBuf(HTTPBufferAllocator::BUFFER_SIZE, mode), + HTTPBasicStreamBuf(HTTP_DEFAULT_BUFFER_SIZE, mode), _session(session), _end(false) { @@ -101,10 +101,6 @@ HTTPHeaderStreamBuf* HTTPHeaderIOS::rdbuf() // HTTPHeaderInputStream // - -Poco::MemoryPool HTTPHeaderInputStream::_pool(sizeof(HTTPHeaderInputStream)); - - HTTPHeaderInputStream::HTTPHeaderInputStream(HTTPSession& session): HTTPHeaderIOS(session, std::ios::in), std::istream(&_buf) @@ -116,34 +112,10 @@ HTTPHeaderInputStream::~HTTPHeaderInputStream() { } - -void* HTTPHeaderInputStream::operator new(std::size_t size) -{ - return _pool.get(); -} - - -void HTTPHeaderInputStream::operator delete(void* ptr) -{ - try - { - _pool.release(ptr); - } - catch (...) - { - poco_unexpected(); - } -} - - // // HTTPHeaderOutputStream // - -Poco::MemoryPool HTTPHeaderOutputStream::_pool(sizeof(HTTPHeaderOutputStream)); - - HTTPHeaderOutputStream::HTTPHeaderOutputStream(HTTPSession& session): HTTPHeaderIOS(session, std::ios::out), std::ostream(&_buf) @@ -155,24 +127,4 @@ HTTPHeaderOutputStream::~HTTPHeaderOutputStream() { } - -void* HTTPHeaderOutputStream::operator new(std::size_t size) -{ - return _pool.get(); -} - - -void HTTPHeaderOutputStream::operator delete(void* ptr) -{ - try - { - _pool.release(ptr); - } - catch (...) - { - poco_unexpected(); - } -} - - } } // namespace Poco::Net diff --git a/base/poco/Net/src/HTTPSession.cpp b/base/poco/Net/src/HTTPSession.cpp index cb6fdc25e9a..1e15b24ddae 100644 --- a/base/poco/Net/src/HTTPSession.cpp +++ b/base/poco/Net/src/HTTPSession.cpp @@ -13,8 +13,8 @@ #include "Poco/Net/HTTPSession.h" -#include "Poco/Net/HTTPBufferAllocator.h" #include "Poco/Net/NetException.h" +#include "Poco/Net/HTTPBasicStreamBuf.h" #include @@ -70,7 +70,7 @@ HTTPSession::~HTTPSession() { try { - if (_pBuffer) HTTPBufferAllocator::deallocate(_pBuffer, HTTPBufferAllocator::BUFFER_SIZE); + if (_pBuffer) delete[] _pBuffer; } catch (...) { @@ -177,10 +177,10 @@ void HTTPSession::refill() { if (!_pBuffer) { - _pBuffer = HTTPBufferAllocator::allocate(HTTPBufferAllocator::BUFFER_SIZE); + _pBuffer = new char[HTTP_DEFAULT_BUFFER_SIZE]; } _pCurrent = _pEnd = _pBuffer; - int n = receive(_pBuffer, HTTPBufferAllocator::BUFFER_SIZE); + int n = receive(_pBuffer, HTTP_DEFAULT_BUFFER_SIZE); _pEnd += n; } diff --git a/base/poco/Net/src/HTTPStream.cpp b/base/poco/Net/src/HTTPStream.cpp index 4acb881c4f3..c2f27600569 100644 --- a/base/poco/Net/src/HTTPStream.cpp +++ b/base/poco/Net/src/HTTPStream.cpp @@ -26,7 +26,7 @@ namespace Net { HTTPStreamBuf::HTTPStreamBuf(HTTPSession& session, openmode mode): - HTTPBasicStreamBuf(HTTPBufferAllocator::BUFFER_SIZE, mode), + HTTPBasicStreamBuf(HTTP_DEFAULT_BUFFER_SIZE, mode), _session(session), _mode(mode) { @@ -96,10 +96,6 @@ HTTPStreamBuf* HTTPIOS::rdbuf() // HTTPInputStream // - -Poco::MemoryPool HTTPInputStream::_pool(sizeof(HTTPInputStream)); - - HTTPInputStream::HTTPInputStream(HTTPSession& session): HTTPIOS(session, std::ios::in), std::istream(&_buf) @@ -112,33 +108,11 @@ HTTPInputStream::~HTTPInputStream() } -void* HTTPInputStream::operator new(std::size_t size) -{ - return _pool.get(); -} - - -void HTTPInputStream::operator delete(void* ptr) -{ - try - { - _pool.release(ptr); - } - catch (...) - { - poco_unexpected(); - } -} - - // // HTTPOutputStream // -Poco::MemoryPool HTTPOutputStream::_pool(sizeof(HTTPOutputStream)); - - HTTPOutputStream::HTTPOutputStream(HTTPSession& session): HTTPIOS(session, std::ios::out), std::ostream(&_buf) @@ -150,24 +124,4 @@ HTTPOutputStream::~HTTPOutputStream() { } - -void* HTTPOutputStream::operator new(std::size_t size) -{ - return _pool.get(); -} - - -void HTTPOutputStream::operator delete(void* ptr) -{ - try - { - _pool.release(ptr); - } - catch (...) - { - poco_unexpected(); - } -} - - } } // namespace Poco::Net From 20d7cf2bf6e87f65ede724a2b12a406a1d50c20a Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 3 Jul 2023 14:51:19 +0200 Subject: [PATCH 1109/1997] Fix tests --- tests/integration/test_storage_dict/test.py | 5 ++++- tests/integration/test_storage_s3/test.py | 8 ++++---- .../test_storage_s3/test_invalid_env_credentials.py | 1 + 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_storage_dict/test.py b/tests/integration/test_storage_dict/test.py index 1ed974f267d..dd4ab5c8d2c 100644 --- a/tests/integration/test_storage_dict/test.py +++ b/tests/integration/test_storage_dict/test.py @@ -10,7 +10,10 @@ def cluster(): try: cluster = ClickHouseCluster(__file__) cluster.add_instance( - "node1", main_configs=["configs/conf.xml"], with_nginx=True + "node1", + main_configs=["configs/conf.xml"], + user_configs=["configs/users.xml"], + with_nginx=True, ) cluster.start() diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index cecc201945c..45437fefa79 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -944,7 +944,7 @@ def test_predefined_connection_configuration(started_cluster): f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')" ) assert ( - "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" + "To execute this query it's necessary to have grant NAMED COLLECTION ON s3_conf1" in error ) error = instance.query_and_get_error( @@ -952,7 +952,7 @@ def test_predefined_connection_configuration(started_cluster): user="user", ) assert ( - "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" + "To execute this query it's necessary to have grant NAMED COLLECTION ON s3_conf1" in error ) @@ -973,12 +973,12 @@ def test_predefined_connection_configuration(started_cluster): error = instance.query_and_get_error("SELECT * FROM s3(no_collection)") assert ( - "To execute this query it's necessary to have grant USE NAMED COLLECTION ON no_collection" + "To execute this query it's necessary to have grant NAMED COLLECTION ON no_collection" in error ) error = instance.query_and_get_error("SELECT * FROM s3(no_collection)", user="user") assert ( - "To execute this query it's necessary to have grant USE NAMED COLLECTION ON no_collection" + "To execute this query it's necessary to have grant NAMED COLLECTION ON no_collection" in error ) instance = started_cluster.instances["dummy"] # has named collection access diff --git a/tests/integration/test_storage_s3/test_invalid_env_credentials.py b/tests/integration/test_storage_s3/test_invalid_env_credentials.py index 0ee679014b1..d91cb7d68f9 100644 --- a/tests/integration/test_storage_s3/test_invalid_env_credentials.py +++ b/tests/integration/test_storage_s3/test_invalid_env_credentials.py @@ -92,6 +92,7 @@ def started_cluster(): "configs/use_environment_credentials.xml", "configs/named_collections.xml", ], + user_configs=["configs/users.xml"], ) logging.info("Starting cluster...") From fd545deba071ffc9c6bde43683ecfbec533e4498 Mon Sep 17 00:00:00 2001 From: velavokr Date: Sun, 2 Jul 2023 17:51:43 +0300 Subject: [PATCH 1110/1997] added a warning on autocalculated parallelizm limits underutilizing CPU cores --- cmake/limit_jobs.cmake | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index a8f105b8987..100ce921b19 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -18,6 +18,9 @@ if (NOT PARALLEL_COMPILE_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_COMPILER_MEMORY) if (NOT PARALLEL_COMPILE_JOBS) set (PARALLEL_COMPILE_JOBS 1) endif () + if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES) + set (PARALLEL_COMPILE_JOBS_LESS TRUE) + endif() endif () if (PARALLEL_COMPILE_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)) @@ -33,6 +36,9 @@ if (NOT PARALLEL_LINK_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_LINKER_MEMORY) if (NOT PARALLEL_LINK_JOBS) set (PARALLEL_LINK_JOBS 1) endif () + if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES) + set (PARALLEL_LINK_JOBS_LESS TRUE) + endif() endif () # ThinLTO provides its own parallel linking @@ -56,4 +62,10 @@ if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS) message(STATUS "${CMAKE_CURRENT_SOURCE_DIR}: Have ${TOTAL_PHYSICAL_MEMORY} megabytes of memory. Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS} (system has ${NUMBER_OF_LOGICAL_CORES} logical cores)") + if (PARALLEL_COMPILE_JOBS_LESS) + message(WARNING "The autocalculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) will underutilize CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.") + endif() + if (PARALLEL_LINK_JOBS_LESS) + message(WARNING "The autocalculated link jobs limit (${PARALLEL_LINK_JOBS}) will underutilize CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.") + endif() endif () From 711d8db6443c4a87dcb3b7a28df3265079717e54 Mon Sep 17 00:00:00 2001 From: velavokr Date: Sun, 2 Jul 2023 17:59:48 +0300 Subject: [PATCH 1111/1997] better wording --- cmake/limit_jobs.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index 100ce921b19..3a33b3b9989 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -63,9 +63,9 @@ if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS) "${CMAKE_CURRENT_SOURCE_DIR}: Have ${TOTAL_PHYSICAL_MEMORY} megabytes of memory. Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS} (system has ${NUMBER_OF_LOGICAL_CORES} logical cores)") if (PARALLEL_COMPILE_JOBS_LESS) - message(WARNING "The autocalculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) will underutilize CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.") + message(WARNING "The autocalculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.") endif() if (PARALLEL_LINK_JOBS_LESS) - message(WARNING "The autocalculated link jobs limit (${PARALLEL_LINK_JOBS}) will underutilize CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.") + message(WARNING "The autocalculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.") endif() endif () From d05785a326c5c7e7b0a906e4fda3fd433920d720 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 3 Jul 2023 15:03:26 +0200 Subject: [PATCH 1112/1997] Fix tests --- ...tem_cache_bypass_cache_threshold.reference | 18 ++++++++++- .../02240_filesystem_query_cache.reference | 16 +++++++++- ...40_system_filesystem_cache_table.reference | 32 ------------------- .../02240_system_filesystem_cache_table.sh | 29 ----------------- .../02286_drop_filesystem_cache.reference | 6 ---- .../02286_drop_filesystem_cache.sh | 9 ------ 6 files changed, 32 insertions(+), 78 deletions(-) diff --git a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference index 997105c9da3..4828d9771b3 100644 --- a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference +++ b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference @@ -3,7 +3,23 @@ SYSTEM DROP FILESYSTEM CACHE; SET enable_filesystem_cache_on_write_operations=0; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_6', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; +CREATE TABLE test (key UInt32, value String) +Engine=MergeTree() +ORDER BY key +SETTINGS min_bytes_for_wide_part = 10485760, + compress_marks=false, + compress_primary_key=false, + disk = disk( + type = cache, + max_size = '128Mi', + path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + enable_bypass_cache_with_threashold = 1, + bypass_cache_threashold = 100, + cache_on_write_operations = 1, + enable_filesystem_query_cache_limit = 1, + do_not_evict_index_and_mark_files = 0, + delayed_cleanup_interval_ms = 100, + disk = 's3_disk'); INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.reference b/tests/queries/0_stateless/02240_filesystem_query_cache.reference index 16c4cd1c049..2e59aea73b9 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.reference +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.reference @@ -5,7 +5,21 @@ SET enable_filesystem_cache_on_write_operations=0; SET skip_download_if_exceeds_query_cache=1; SET filesystem_cache_max_download_size=128; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_4', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; +CREATE TABLE test (key UInt32, value String) +Engine=MergeTree() +ORDER BY key +SETTINGS min_bytes_for_wide_part = 10485760, + compress_marks=false, + compress_primary_key=false, + disk = disk( + type = cache, + max_size = '128Mi', + path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + cache_on_write_operations= 1, + enable_filesystem_query_cache_limit = 1, + do_not_evict_index_and_mark_files = 0, + delayed_cleanup_interval_ms = 100, + disk = 's3_disk'); SYSTEM DROP FILESYSTEM CACHE; INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference index f960b4eb21c..93b6d4de94f 100644 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference @@ -16,22 +16,6 @@ DOWNLOADED 0 79 80 DOWNLOADED 0 745 746 2 Expect no cache -Expect cache -DOWNLOADED 0 0 1 -DOWNLOADED 0 79 80 -DOWNLOADED 0 745 746 -3 -Expect cache -DOWNLOADED 0 0 1 -DOWNLOADED 0 79 80 -DOWNLOADED 0 745 746 -3 -Expect no cache -Expect cache -DOWNLOADED 0 79 80 -DOWNLOADED 0 745 746 -2 -Expect no cache Using storage policy: local_cache 0 Expect cache @@ -50,19 +34,3 @@ DOWNLOADED 0 79 80 DOWNLOADED 0 745 746 2 Expect no cache -Expect cache -DOWNLOADED 0 0 1 -DOWNLOADED 0 79 80 -DOWNLOADED 0 745 746 -3 -Expect cache -DOWNLOADED 0 0 1 -DOWNLOADED 0 79 80 -DOWNLOADED 0 745 746 -3 -Expect no cache -Expect cache -DOWNLOADED 0 79 80 -DOWNLOADED 0 745 746 -2 -Expect no cache diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh index c7dc9fbd961..6a94cffea5a 100755 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh @@ -45,33 +45,4 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do echo 'Expect no cache' ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" - ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_02240_storage_policy_3" - ${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy_3 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}_3', min_bytes_for_wide_part = 1000000, compress_marks=false, compress_primary_key=false" - ${CLICKHOUSE_CLIENT} --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_02240_storage_policy_3 SELECT number, toString(number) FROM numbers(100)" - - echo 'Expect cache' - ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE" - ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null" - ${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size" - ${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache"; - - echo 'Expect cache' - ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE" - ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null" - ${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size" - ${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache"; - - echo 'Expect no cache' - ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" - ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" - - echo 'Expect cache' - ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE" - ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null" - ${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size" - ${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache"; - - ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" - echo 'Expect no cache' - ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" done diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.reference b/tests/queries/0_stateless/02286_drop_filesystem_cache.reference index 62907a7c81c..279e77e795a 100644 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.reference +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.reference @@ -6,9 +6,6 @@ Using storage policy: s3_cache 1 1 1 -0 -2 -0 Using storage policy: local_cache 0 2 @@ -17,6 +14,3 @@ Using storage policy: local_cache 1 1 1 -0 -2 -0 diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh index a6fa0457078..091bca10bcf 100755 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh @@ -72,13 +72,4 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}_2', min_bytes_for_wide_part = 10485760" - - $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_022862 SELECT number, toString(number) FROM numbers(100)" - $CLICKHOUSE_CLIENT --query "SELECT * FROM test_022862 FORMAT Null" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" - - $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE '${STORAGE_POLICY}_2'" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" - - $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_022862" done From d9d98d6286f1bef423167ef35f0278c08426b3a6 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 3 Jul 2023 13:18:54 +0000 Subject: [PATCH 1113/1997] Fix all_new_function... test --- .../02415_all_new_functions_must_be_documented.reference | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index b5c133988e6..7eb0c57b362 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -363,6 +363,8 @@ in inIgnoreSet indexHint indexOf +initcap +initcapUTF8 initialQueryID initializeAggregation intDiv From fe1cf294fb9fd239d449b4ed464bf7e1c3e2c207 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 3 Jul 2023 13:25:16 +0000 Subject: [PATCH 1114/1997] Fix ru docs --- docs/ru/sql-reference/functions/string-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index 62697e5e197..b872200f99b 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -1120,7 +1120,7 @@ Do Nothing for 2 Minutes 2:00   ## initcapUTF8 {#initcapUTF8} -Как [initcap](#initcap), предпологая, что строка содержит набор байтов, представляющий текст в кодировке UTF-8. +Как [initcap](#initcap), предполагая, что строка содержит набор байтов, представляющий текст в кодировке UTF-8. Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным. Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным. Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено. From 425875a7f203a19c500d3e8d8679f7a276315f12 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Mon, 3 Jul 2023 15:40:39 +0200 Subject: [PATCH 1115/1997] Remove mmap from Allocator --- src/Common/Allocator.cpp | 26 +--- src/Common/Allocator.h | 180 ++++------------------ src/Common/Allocator_fwd.h | 2 +- src/Common/CurrentMetrics.cpp | 2 - src/Common/HashTable/HashTableAllocator.h | 2 +- 5 files changed, 38 insertions(+), 174 deletions(-) diff --git a/src/Common/Allocator.cpp b/src/Common/Allocator.cpp index 6779fee58e6..769df70d71e 100644 --- a/src/Common/Allocator.cpp +++ b/src/Common/Allocator.cpp @@ -1,26 +1,4 @@ #include "Allocator.h" -/** Keep definition of this constant in cpp file; otherwise its value - * is inlined into allocator code making it impossible to override it - * in third-party code. - * - * Note: extern may seem redundant, but is actually needed due to bug in GCC. - * See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html - */ -#ifdef NDEBUG - __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 64 * (1ULL << 32); -#else - /** - * In debug build, use small mmap threshold to reproduce more memory - * stomping bugs. Along with ASLR it will hopefully detect more issues than - * ASan. The program may fail due to the limit on number of memory mappings. - * - * Not too small to avoid too quick exhaust of memory mappings. - */ - __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 16384; -#endif - -template class Allocator; -template class Allocator; -template class Allocator; -template class Allocator; +template class Allocator; +template class Allocator; diff --git a/src/Common/Allocator.h b/src/Common/Allocator.h index 5180fbdaa2d..1e77e988326 100644 --- a/src/Common/Allocator.h +++ b/src/Common/Allocator.h @@ -36,51 +36,26 @@ #include -/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS -#ifndef MAP_ANONYMOUS -#define MAP_ANONYMOUS MAP_ANON -#endif - -/** - * Many modern allocators (for example, tcmalloc) do not do a mremap for - * realloc, even in case of large enough chunks of memory. Although this allows - * you to increase performance and reduce memory consumption during realloc. - * To fix this, we do mremap manually if the chunk of memory is large enough. - * The threshold (64 MB) is chosen quite large, since changing the address - * space is very slow, especially in the case of a large number of threads. We - * expect that the set of operations mmap/something to do/mremap can only be - * performed about 1000 times per second. - * - * P.S. This is also required, because tcmalloc can not allocate a chunk of - * memory greater than 16 GB. - * - * P.P.S. Note that MMAP_THRESHOLD symbol is intentionally made weak. It allows - * to override it during linkage when using ClickHouse as a library in - * third-party applications which may already use own allocator doing mmaps - * in the implementation of alloc/realloc. - */ -extern const size_t MMAP_THRESHOLD; - static constexpr size_t MALLOC_MIN_ALIGNMENT = 8; -namespace CurrentMetrics -{ - extern const Metric MMappedAllocs; - extern const Metric MMappedAllocBytes; -} - namespace DB { + namespace ErrorCodes { - extern const int BAD_ARGUMENTS; extern const int CANNOT_ALLOCATE_MEMORY; - extern const int CANNOT_MUNMAP; - extern const int CANNOT_MREMAP; extern const int LOGICAL_ERROR; } + } +/** Previously there was a code which tried to use manual mmap and mremap (clickhouse_mremap.h) for large allocations/reallocations (64MB+). + * Most modern allocators (including jemalloc) don't use mremap, so the idea was to take advantage from mremap system call for large reallocs. + * Actually jemalloc had support for mremap, but it was intentionally removed from codebase https://github.com/jemalloc/jemalloc/commit/e2deab7a751c8080c2b2cdcfd7b11887332be1bb. + * Our performance tests also shows that without manual mmap/mremap/munmap clickhouse is overall faster for about 1-2% and up to 5-7x for some types of queries. + * That is why we don't do manuall mmap/mremap/munmap here and completely rely on jemalloc for allocations of any size. + */ + /** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena. * Also used in hash tables. * The interface is different from std::allocator @@ -88,10 +63,8 @@ namespace ErrorCodes * - passing the size into the `free` method; * - by the presence of the `alignment` argument; * - the possibility of zeroing memory (used in hash tables); - * - random hint address for mmap - * - mmap_threshold for using mmap less or more */ -template +template class Allocator { public: @@ -109,7 +82,7 @@ public: try { checkSize(size); - freeNoTrack(buf, size); + freeNoTrack(buf); CurrentMemoryTracker::free(size); } catch (...) @@ -132,49 +105,26 @@ public: /// nothing to do. /// BTW, it's not possible to change alignment while doing realloc. } - else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD - && alignment <= MALLOC_MIN_ALIGNMENT) + else if (alignment <= MALLOC_MIN_ALIGNMENT) { /// Resize malloc'd memory region with no special alignment requirement. CurrentMemoryTracker::realloc(old_size, new_size); void * new_buf = ::realloc(buf, new_size); if (nullptr == new_buf) - DB::throwFromErrno(fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + { + DB::throwFromErrno( + fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + } buf = new_buf; if constexpr (clear_memory) if (new_size > old_size) memset(reinterpret_cast(buf) + old_size, 0, new_size - old_size); } - else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD) - { - /// Resize mmap'd memory region. - CurrentMemoryTracker::realloc(old_size, new_size); - - // On apple and freebsd self-implemented mremap used (common/mremap.h) - buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, - PROT_READ | PROT_WRITE, mmap_flags, -1, 0); - if (MAP_FAILED == buf) - DB::throwFromErrno(fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.", - ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_MREMAP); - - /// No need for zero-fill, because mmap guarantees it. - } - else if (new_size < MMAP_THRESHOLD) - { - /// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once. - CurrentMemoryTracker::realloc(old_size, new_size); - - void * new_buf = allocNoTrack(new_size, alignment); - memcpy(new_buf, buf, std::min(old_size, new_size)); - freeNoTrack(buf, old_size); - buf = new_buf; - } else { /// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods. - void * new_buf = alloc(new_size, alignment); memcpy(new_buf, buf, std::min(old_size, new_size)); free(buf, old_size); @@ -192,83 +142,38 @@ protected: static constexpr bool clear_memory = clear_memory_; - // Freshly mmapped pages are copy-on-write references to a global zero page. - // On the first write, a page fault occurs, and an actual writable page is - // allocated. If we are going to use this memory soon, such as when resizing - // hash tables, it makes sense to pre-fault the pages by passing - // MAP_POPULATE to mmap(). This takes some time, but should be faster - // overall than having a hot loop interrupted by page faults. - // It is only supported on Linux. - static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS -#if defined(OS_LINUX) - | (mmap_populate ? MAP_POPULATE : 0) -#endif - ; - private: void * allocNoTrack(size_t size, size_t alignment) { void * buf; - size_t mmap_min_alignment = ::getPageSize(); - - if (size >= MMAP_THRESHOLD) + if (alignment <= MALLOC_MIN_ALIGNMENT) { - if (alignment > mmap_min_alignment) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, - "Too large alignment {}: more than page size when allocating {}.", - ReadableSize(alignment), ReadableSize(size)); + if constexpr (clear_memory) + buf = ::calloc(size, 1); + else + buf = ::malloc(size); - buf = mmap(getMmapHint(), size, PROT_READ | PROT_WRITE, - mmap_flags, -1, 0); - if (MAP_FAILED == buf) - DB::throwFromErrno(fmt::format("Allocator: Cannot mmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); - /// No need for zero-fill, because mmap guarantees it. - - CurrentMetrics::add(CurrentMetrics::MMappedAllocs); - CurrentMetrics::add(CurrentMetrics::MMappedAllocBytes, size); + if (nullptr == buf) + DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); } else { - if (alignment <= MALLOC_MIN_ALIGNMENT) - { - if constexpr (clear_memory) - buf = ::calloc(size, 1); - else - buf = ::malloc(size); + buf = nullptr; + int res = posix_memalign(&buf, alignment, size); - if (nullptr == buf) - DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); - } - else - { - buf = nullptr; - int res = posix_memalign(&buf, alignment, size); + if (0 != res) + DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)), + DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res); - if (0 != res) - DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)), - DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res); - - if constexpr (clear_memory) - memset(buf, 0, size); - } + if constexpr (clear_memory) + memset(buf, 0, size); } return buf; } - void freeNoTrack(void * buf, size_t size) + void freeNoTrack(void * buf) { - if (size >= MMAP_THRESHOLD) - { - if (0 != munmap(buf, size)) - DB::throwFromErrno(fmt::format("Allocator: Cannot munmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_MUNMAP); - - CurrentMetrics::sub(CurrentMetrics::MMappedAllocs); - CurrentMetrics::sub(CurrentMetrics::MMappedAllocBytes, size); - } - else - { - ::free(buf); - } + ::free(buf); } void checkSize(size_t size) @@ -277,21 +182,6 @@ private: if (size >= 0x8000000000000000ULL) throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Too large size ({}) passed to allocator. It indicates an error.", size); } - -#ifndef NDEBUG - /// In debug builds, request mmap() at random addresses (a kind of ASLR), to - /// reproduce more memory stomping bugs. Note that Linux doesn't do it by - /// default. This may lead to worse TLB performance. - void * getMmapHint() - { - return reinterpret_cast(std::uniform_int_distribution(0x100000000000UL, 0x700000000000UL)(thread_local_rng)); - } -#else - void * getMmapHint() - { - return nullptr; - } -#endif }; @@ -367,7 +257,5 @@ constexpr size_t allocatorInitialBytes; -extern template class Allocator; -extern template class Allocator; -extern template class Allocator; +extern template class Allocator; +extern template class Allocator; diff --git a/src/Common/Allocator_fwd.h b/src/Common/Allocator_fwd.h index a13a4398654..a96bc2a503b 100644 --- a/src/Common/Allocator_fwd.h +++ b/src/Common/Allocator_fwd.h @@ -3,7 +3,7 @@ * This file provides forward declarations for Allocator. */ -template +template class Allocator; template diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index f2ddb7a84c0..2f716cfb7ef 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -171,8 +171,6 @@ M(PartsInMemory, "In-memory parts.") \ M(MMappedFiles, "Total number of mmapped files.") \ M(MMappedFileBytes, "Sum size of mmapped file regions.") \ - M(MMappedAllocs, "Total number of mmapped allocations") \ - M(MMappedAllocBytes, "Sum bytes of mmapped allocations") \ M(AsynchronousReadWait, "Number of threads waiting for asynchronous read.") \ M(PendingAsyncInsert, "Number of asynchronous inserts that are waiting for flush.") \ M(KafkaConsumers, "Number of active Kafka consumers") \ diff --git a/src/Common/HashTable/HashTableAllocator.h b/src/Common/HashTable/HashTableAllocator.h index 47e3fdfc4b6..8252265111d 100644 --- a/src/Common/HashTable/HashTableAllocator.h +++ b/src/Common/HashTable/HashTableAllocator.h @@ -8,7 +8,7 @@ * table, so it makes sense to pre-fault the pages so that page faults don't * interrupt the resize loop. Set the allocator parameter accordingly. */ -using HashTableAllocator = Allocator; +using HashTableAllocator = Allocator; template using HashTableAllocatorWithStackMemory = AllocatorWithStackMemory; From a74bc6190da07904116ffa36f6ee1340afb74c91 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Mon, 3 Jul 2023 15:46:30 +0200 Subject: [PATCH 1116/1997] Implement suggestion --- base/poco/Net/include/Poco/Net/HTTPSession.h | 2 +- base/poco/Net/src/HTTPSession.cpp | 16 ++++------------ 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/base/poco/Net/include/Poco/Net/HTTPSession.h b/base/poco/Net/include/Poco/Net/HTTPSession.h index d0045025f5f..934b34be5d5 100644 --- a/base/poco/Net/include/Poco/Net/HTTPSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPSession.h @@ -192,7 +192,7 @@ namespace Net HTTPSession & operator=(const HTTPSession &); StreamSocket _socket; - char * _pBuffer; + std::unique_ptr _pBuffer; char * _pCurrent; char * _pEnd; bool _keepAlive; diff --git a/base/poco/Net/src/HTTPSession.cpp b/base/poco/Net/src/HTTPSession.cpp index 1e15b24ddae..d2663baaf9f 100644 --- a/base/poco/Net/src/HTTPSession.cpp +++ b/base/poco/Net/src/HTTPSession.cpp @@ -68,14 +68,6 @@ HTTPSession::HTTPSession(const StreamSocket& socket, bool keepAlive): HTTPSession::~HTTPSession() { - try - { - if (_pBuffer) delete[] _pBuffer; - } - catch (...) - { - poco_unexpected(); - } try { close(); @@ -177,10 +169,10 @@ void HTTPSession::refill() { if (!_pBuffer) { - _pBuffer = new char[HTTP_DEFAULT_BUFFER_SIZE]; + _pBuffer = std::make_unique(HTTP_DEFAULT_BUFFER_SIZE); } - _pCurrent = _pEnd = _pBuffer; - int n = receive(_pBuffer, HTTP_DEFAULT_BUFFER_SIZE); + _pCurrent = _pEnd = _pBuffer.get(); + int n = receive(_pBuffer.get(), HTTP_DEFAULT_BUFFER_SIZE); _pEnd += n; } @@ -199,7 +191,7 @@ void HTTPSession::connect(const SocketAddress& address) _socket.setNoDelay(true); // There may be leftover data from a previous (failed) request in the buffer, // so we clear it. - _pCurrent = _pEnd = _pBuffer; + _pCurrent = _pEnd = _pBuffer.get(); } From af603c2cc6455b31aba1a70c967c35b083fe6c0a Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 3 Jul 2023 16:40:04 +0200 Subject: [PATCH 1117/1997] Fixed stack overflow on Field destruction --- ..._function_state_deserialization_fuzzer.cpp | 1 + .../fuzzers/delta_decompress_fuzzer.cpp | 2 +- .../double_delta_decompress_fuzzer.cpp | 2 +- .../fuzzers/encrypted_decompress_fuzzer.cpp | 4 +-- .../fuzzers/lz4_decompress_fuzzer.cpp | 4 +-- src/Core/Field.h | 33 ++++++++++++++++++- src/DataTypes/DataTypeFactory.cpp | 2 +- src/Functions/DateTimeTransforms.h | 4 ++- 8 files changed, 43 insertions(+), 9 deletions(-) diff --git a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp index 2ea01e1d5bc..3db1afb7a92 100644 --- a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp +++ b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp @@ -8,6 +8,7 @@ #include #include +#include #include diff --git a/src/Compression/fuzzers/delta_decompress_fuzzer.cpp b/src/Compression/fuzzers/delta_decompress_fuzzer.cpp index b039777da15..451606843e2 100644 --- a/src/Compression/fuzzers/delta_decompress_fuzzer.cpp +++ b/src/Compression/fuzzers/delta_decompress_fuzzer.cpp @@ -34,7 +34,7 @@ try DB::Memory<> memory; memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer()); - codec->doDecompressData(reinterpret_cast(data), size, memory.data(), output_buffer_size); + codec->doDecompressData(reinterpret_cast(data), static_cast(size), memory.data(), static_cast(output_buffer_size)); return 0; } diff --git a/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp b/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp index f9822daa3bd..f7e685d68ad 100644 --- a/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp +++ b/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp @@ -34,7 +34,7 @@ try DB::Memory<> memory; memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer()); - codec->doDecompressData(reinterpret_cast(data), size, memory.data(), output_buffer_size); + codec->doDecompressData(reinterpret_cast(data), static_cast(size), memory.data(), static_cast(output_buffer_size)); return 0; } diff --git a/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp b/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp index 3e3d0e164fe..207cce21e3b 100644 --- a/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp +++ b/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp @@ -292,10 +292,10 @@ try DB::Memory<> memory; memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer()); - codec_128->doDecompressData(input.data(), input.size(), memory.data(), input.size() - 31); + codec_128->doDecompressData(input.data(), static_cast(input.size()), memory.data(), static_cast(input.size() - 31)); memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer()); - codec_256->doDecompressData(input.data(), input.size(), memory.data(), input.size() - 31); + codec_256->doDecompressData(input.data(), static_cast(input.size()), memory.data(), static_cast(input.size() - 31)); return 0; } catch (...) diff --git a/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp b/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp index 85c4c9bd329..f6d4c51f18b 100644 --- a/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp +++ b/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp @@ -24,7 +24,7 @@ try return 0; const auto * p = reinterpret_cast(data); - auto codec = DB::getCompressionCodecLZ4(p->level); + auto codec = DB::getCompressionCodecLZ4(static_cast(p->level)); size_t output_buffer_size = p->decompressed_size % 65536; size -= sizeof(AuxiliaryRandomData); @@ -37,7 +37,7 @@ try DB::Memory<> memory; memory.resize(output_buffer_size + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); - codec->doDecompressData(reinterpret_cast(data), size, memory.data(), output_buffer_size); + codec->doDecompressData(reinterpret_cast(data), static_cast(size), memory.data(), static_cast(output_buffer_size)); return 0; } diff --git a/src/Core/Field.h b/src/Core/Field.h index ef1bd9a895d..8ee93d08411 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -28,6 +28,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int TOO_DEEP_RECURSION; } constexpr Null NEGATIVE_INFINITY{Null::Value::NegativeInfinity}; @@ -291,6 +292,11 @@ decltype(auto) castToNearestFieldType(T && x) */ #define DBMS_MIN_FIELD_SIZE 32 +#if defined(SANITIZER) || !defined(NDEBUG) + #define DBMS_MAX_NESTED_FIELD_DEPTH 64 +#else + #define DBMS_MAX_NESTED_FIELD_DEPTH 256 +#endif /** Discriminated union of several types. * Made for replacement of `boost::variant` @@ -671,6 +677,27 @@ private: Types::Which which; + /// Field may contain a Field inside in case when Field stores Array, Tuple, Map or Object. + /// As the result stack overflow on destruction is possible + /// and to avoid it we need to count the depth and have a threshold. + size_t nested_field_depth = 0; + + /// Check whether T is already a Field with composite underlying type. + template + size_t calculateAndCheckFieldDepth(Original && x) + { + size_t result = 0; + + if constexpr (std::is_same_v || std::is_same_v || std::is_same_v) + std::for_each(x.begin(), x.end(), [this, &x](auto & elem){ nested_field_depth = std::max(nested_field_depth, elem.nested_field_depth); }); + else if constexpr (std::is_same_v) + std::for_each(x.begin(), x.end(), [this, &x](auto & elem){ nested_field_depth = std::max(nested_field_depth, elem.second.nested_field_depth); }); + + if (result >= DBMS_MAX_NESTED_FIELD_DEPTH) + throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "Too deep Field"); + + return result; + } /// Assuming there was no allocated state or it was deallocated (see destroy). template @@ -686,6 +713,8 @@ private: using StorageType = NearestFieldType; new (&storage) StorageType(std::forward(x)); which = TypeToEnum::value; + /// Incrementing the depth since we create a new Field. + nested_field_depth = calculateAndCheckFieldDepth(x) + 1; } /// Assuming same types. @@ -696,6 +725,8 @@ private: assert(which == TypeToEnum::value); JustT * MAY_ALIAS ptr = reinterpret_cast(&storage); *ptr = std::forward(x); + /// Do not increment the depth, because it is an assignment. + nested_field_depth = calculateAndCheckFieldDepth(x); } template @@ -781,7 +812,7 @@ private: } template - void destroy() + ALWAYS_INLINE void destroy() { T * MAY_ALIAS ptr = reinterpret_cast(&storage); ptr->~T(); diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index 415f24d8151..89dacae59ff 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -62,7 +62,7 @@ DataTypePtr DataTypeFactory::getImpl(const String & full_name) const } else { - ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", false, data_type_max_parse_depth); + ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", DBMS_DEFAULT_MAX_QUERY_SIZE, data_type_max_parse_depth); } return getImpl(ast); diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 019e0c42cde..0aa495dace2 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -1449,8 +1449,10 @@ struct Transformer if constexpr (std::is_same_v || std::is_same_v) { +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wimplicit-const-int-float-conversion" bool is_valid_input = vec_from[i] >= 0 && vec_from[i] <= 0xFFFFFFFFL; - +# pragma clang diagnostic pop if (!is_valid_input) { if constexpr (std::is_same_v) From 2f287703ddacb833c8cb03c497c65fd87e4f888e Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Mon, 3 Jul 2023 16:59:43 +0200 Subject: [PATCH 1118/1997] Fix test --- tests/queries/0_stateless/01778_mmap_cache_infra.reference | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/queries/0_stateless/01778_mmap_cache_infra.reference b/tests/queries/0_stateless/01778_mmap_cache_infra.reference index ed365028ecc..0e82b277bc1 100644 --- a/tests/queries/0_stateless/01778_mmap_cache_infra.reference +++ b/tests/queries/0_stateless/01778_mmap_cache_infra.reference @@ -2,7 +2,5 @@ CreatedReadBufferMMap CreatedReadBufferMMapFailed MMappedFileCacheHits MMappedFileCacheMisses -MMappedAllocBytes -MMappedAllocs MMappedFileBytes MMappedFiles From ebd7ecb230a101bc5bed7d3ad79c9f08cba523f5 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 25 May 2023 14:44:24 +0000 Subject: [PATCH 1119/1997] Remove unstable queries from performance/join_set_filter --- tests/performance/join_set_filter.xml | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/performance/join_set_filter.xml b/tests/performance/join_set_filter.xml index 7f7804853fc..712b60ae12f 100644 --- a/tests/performance/join_set_filter.xml +++ b/tests/performance/join_set_filter.xml @@ -34,9 +34,6 @@ SELECT * FROM t1 JOIN t2 ON t1.x = t2.x WHERE greater(t1.y, {table_size} - 10000) SELECT * FROM t2 JOIN t1 ON t1.x = t2.x WHERE greater(t1.y, {table_size} - 10000) - SELECT * FROM t1 JOIN t2 ON t1.x = t2.x WHERE t1.y % 100 = 0 - SELECT * FROM t2 JOIN t1 ON t1.x = t2.x WHERE t1.y % 100 = 0 - SELECT * FROM t1 JOIN t2 ON t1.x = t2.x WHERE t1.y % 1000 = 0 SELECT * FROM t2 JOIN t1 ON t1.x = t2.x WHERE t1.y % 1000 = 0 From 9ea5d929a5f80466d0be721266302b67c4e54fa0 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 1 Jun 2023 13:45:58 +0200 Subject: [PATCH 1120/1997] Update tests/performance/join_set_filter.xml --- tests/performance/join_set_filter.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/performance/join_set_filter.xml b/tests/performance/join_set_filter.xml index 712b60ae12f..38ff9beb827 100644 --- a/tests/performance/join_set_filter.xml +++ b/tests/performance/join_set_filter.xml @@ -33,7 +33,8 @@ SELECT * FROM t1 JOIN t2 ON t1.x = t2.x WHERE greater(t1.y, {table_size} - 10000) SELECT * FROM t2 JOIN t1 ON t1.x = t2.x WHERE greater(t1.y, {table_size} - 10000) - + SELECT * FROM t1 JOIN t2 ON t1.x = t2.x WHERE t1.y % 100 = 0 AND t1.y < 10000 + SELECT * FROM t2 JOIN t1 ON t1.x = t2.x WHERE t1.y % 100 = 0 AND t1.y < 10000 SELECT * FROM t1 JOIN t2 ON t1.x = t2.x WHERE t1.y % 1000 = 0 SELECT * FROM t2 JOIN t1 ON t1.x = t2.x WHERE t1.y % 1000 = 0 From 737cff7e5703941e1e05b4f9c70b18f1f508f2f8 Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 3 Jul 2023 14:42:58 +0200 Subject: [PATCH 1121/1997] Remove whole join_set_filter.xml, will resubmit --- tests/performance/join_set_filter.xml | 43 --------------------------- 1 file changed, 43 deletions(-) delete mode 100644 tests/performance/join_set_filter.xml diff --git a/tests/performance/join_set_filter.xml b/tests/performance/join_set_filter.xml deleted file mode 100644 index 38ff9beb827..00000000000 --- a/tests/performance/join_set_filter.xml +++ /dev/null @@ -1,43 +0,0 @@ - - - - table_size - - 100000000 - - - - - - full_sorting_merge - - - - CREATE TABLE t1 (x UInt64, y UInt64) ENGINE = MergeTree ORDER BY y - AS SELECT - sipHash64(number, 't1_x') % {table_size} AS x, - sipHash64(number, 't1_y') % {table_size} AS y - FROM numbers({table_size}) - - - - CREATE TABLE t2 (x UInt64, y UInt64) ENGINE = MergeTree ORDER BY y - AS SELECT - sipHash64(number, 't2_x') % {table_size} AS x, - sipHash64(number, 't2_y') % {table_size} AS y - FROM numbers({table_size}) - - - SELECT * FROM t1 JOIN t2 ON t1.x = t2.x WHERE less(t1.y, 10000) - SELECT * FROM t2 JOIN t1 ON t1.x = t2.x WHERE less(t1.y, 10000) - - SELECT * FROM t1 JOIN t2 ON t1.x = t2.x WHERE greater(t1.y, {table_size} - 10000) - SELECT * FROM t2 JOIN t1 ON t1.x = t2.x WHERE greater(t1.y, {table_size} - 10000) - SELECT * FROM t1 JOIN t2 ON t1.x = t2.x WHERE t1.y % 100 = 0 AND t1.y < 10000 - SELECT * FROM t2 JOIN t1 ON t1.x = t2.x WHERE t1.y % 100 = 0 AND t1.y < 10000 - SELECT * FROM t1 JOIN t2 ON t1.x = t2.x WHERE t1.y % 1000 = 0 - SELECT * FROM t2 JOIN t1 ON t1.x = t2.x WHERE t1.y % 1000 = 0 - - DROP TABLE IF EXISTS t1 - DROP TABLE IF EXISTS t2 - From ccda3c3a6e25a8d9b2245631691e4fe892b21f5a Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 3 Jul 2023 15:03:40 +0000 Subject: [PATCH 1122/1997] Try to fix logical error #51703 --- src/Interpreters/GraceHashJoin.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 4218a8ea4e1..4bfe0315138 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -638,10 +638,9 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) if (current_block.rows() > 0) { std::lock_guard lock(hash_join_mutex); - auto current_buckets = getCurrentBuckets(); - if (!isPowerOf2(current_buckets.size())) [[unlikely]] + if (!isPowerOf2(buckets_snapshot.size())) [[unlikely]] { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Broken buckets. its size({}) is not power of 2", current_buckets.size()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Broken buckets. its size({}) is not power of 2", buckets_snapshot.size()); } if (!hash_join) hash_join = makeInMemoryJoin(); @@ -654,7 +653,7 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) current_block = {}; // Must use the latest buckets snapshot in case that it has been rehashed by other threads. - buckets_snapshot = rehashBuckets(current_buckets.size() * 2); + buckets_snapshot = rehashBuckets(buckets_snapshot.size() * 2); auto right_blocks = hash_join->releaseJoinedBlocks(/* restructure */ false); hash_join = nullptr; From f316914aed71f2e90caf0a6af707cb5756bdbec8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 3 Jul 2023 15:29:57 +0000 Subject: [PATCH 1123/1997] Fix another one key. --- .../Optimizations/liftUpFunctions.cpp | 10 +- ...nd_columns_with_same_names_bug_2.reference | 3 + ...ting_and_columns_with_same_names_bug_2.sql | 107 ++++++++++++++++++ 3 files changed, 114 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.reference create mode 100644 tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.sql diff --git a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp index b2c3f3b4a6d..47b4e31ed32 100644 --- a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp +++ b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp @@ -29,16 +29,14 @@ const DB::DataStream & getChildOutputStream(DB::QueryPlan::Node & node) namespace DB::QueryPlanOptimizations { -/// This is a check that output columns with the same name have the same types. -/// This is ok to have such a situation in DAG, but not for Block. -/// TODO: we should have a different data structure for headers. +/// This is a check that output columns does not have the same name +/// This is ok for DAG, but may introduce a bug in a SotringStep cause columns are selected by name. static bool areOutputsAreConvertableToBlock(const ActionsDAG::NodeRawConstPtrs & outputs) { - std::unordered_map name_to_type; + std::unordered_set names; for (const auto & output : outputs) { - auto [it, inserted] = name_to_type.emplace(output->result_name, output->result_type.get()); - if (!inserted && !it->second->equals(*output->result_type)) + if (!names.emplace(output->result_name).second) return false; } diff --git a/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.reference b/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.reference new file mode 100644 index 00000000000..bcc55e50958 --- /dev/null +++ b/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.reference @@ -0,0 +1,3 @@ +20230626 0.3156979034107179 \N \N +20230626 0.2624629016490004 \N \N +20230626 0.19390556368960468 \N \N diff --git a/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.sql b/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.sql new file mode 100644 index 00000000000..b0221635fe9 --- /dev/null +++ b/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.sql @@ -0,0 +1,107 @@ +create table test1 ( + `pt` String, + `brand_name` String, + `total_indirect_order_cnt` Float64, + `total_indirect_gmv` Float64 +) ENGINE = Memory; + +create table test2 ( + `pt` String, + `brand_name` String, + `exposure_uv` Float64, + `click_uv` Float64 +) ENGINE = Memory; + +INSERT INTO test1 (`pt`, `brand_name`, `total_indirect_order_cnt`, `total_indirect_gmv`) VALUES ('20230625', 'LINING', 2232, 1008710), ('20230625', 'adidas', 125, 58820), ('20230625', 'Nike', 1291, 1033020), ('20230626', 'Nike', 1145, 938926), ('20230626', 'LINING', 1904, 853336), ('20230626', 'adidas', 133, 62546), ('20220626', 'LINING', 3747, 1855203), ('20220626', 'Nike', 2295, 1742665), ('20220626', 'adidas', 302, 122388); + +INSERT INTO test2 (`pt`, `brand_name`, `exposure_uv`, `click_uv`) VALUES ('20230625', 'Nike', 2012913, 612831), ('20230625', 'adidas', 480277, 96176), ('20230625', 'LINING', 2474234, 627814), ('20230626', 'Nike', 1934666, 610770), ('20230626', 'adidas', 469904, 91117), ('20230626', 'LINING', 2285142, 599765), ('20220626', 'Nike', 2979656, 937166), ('20220626', 'adidas', 704751, 124250), ('20220626', 'LINING', 3163884, 1010221); + +SELECT * FROM ( + SELECT m0.pt AS pt + ,m0.`uvctr` AS uvctr + ,round(m1.uvctr,4) AS uvctr_hb_last_value + ,round(m2.uvctr,4) AS uvctr_tb_last_value + FROM + ( + SELECT m0.pt AS pt + ,COALESCE(m0.brand_name,m1.brand_name) AS brand_name + ,if(isNaN(`click_uv` / `exposure_uv`) OR isInfinite(`click_uv` / `exposure_uv`),NULL,`click_uv` / `exposure_uv`) AS `uvctr` + FROM + ( + SELECT pt AS pt + ,brand_name AS `brand_name` + ,exposure_uv AS `exposure_uv` + ,click_uv AS `click_uv` + FROM test2 + WHERE pt = '20230626' + ) m0 + FULL JOIN + ( + SELECT pt AS pt + ,brand_name AS `brand_name` + ,total_indirect_order_cnt AS `total_indirect_order_cnt` + ,total_indirect_gmv AS `total_indirect_gmv` + FROM test1 + WHERE pt = '20230626' + ) m1 + ON m0.brand_name = m1.brand_name AND m0.pt = m1.pt + ) m0 + LEFT JOIN + ( + SELECT m0.pt AS pt + ,if(isNaN(`click_uv` / `exposure_uv`) OR isInfinite(`click_uv` / `exposure_uv`),NULL,`click_uv` / `exposure_uv`) AS `uvctr` + ,COALESCE(m0.brand_name,m1.brand_name) AS brand_name + ,`exposure_uv` AS `exposure_uv` + ,`click_uv` + FROM + ( + SELECT pt AS pt + ,brand_name AS `brand_name` + ,exposure_uv AS `exposure_uv` + ,click_uv AS `click_uv` + FROM test2 + WHERE pt = '20230625' + ) m0 + FULL JOIN + ( + SELECT pt AS pt + ,brand_name AS `brand_name` + ,total_indirect_order_cnt AS `total_indirect_order_cnt` + ,total_indirect_gmv AS `total_indirect_gmv` + FROM test1 + WHERE pt = '20230625' + ) m1 + ON m0.brand_name = m1.brand_name AND m0.pt = m1.pt + ) m1 + ON m0.brand_name = m1.brand_name AND m0.pt = m1.pt + LEFT JOIN + ( + SELECT m0.pt AS pt + ,if(isNaN(`click_uv` / `exposure_uv`) OR isInfinite(`click_uv` / `exposure_uv`),NULL,`click_uv` / `exposure_uv`) AS `uvctr` + ,COALESCE(m0.brand_name,m1.brand_name) AS brand_name + ,`exposure_uv` AS `exposure_uv` + ,`click_uv` + FROM + ( + SELECT pt AS pt + ,brand_name AS `brand_name` + ,exposure_uv AS `exposure_uv` + ,click_uv AS `click_uv` + FROM test2 + WHERE pt = '20220626' + ) m0 + FULL JOIN + ( + SELECT pt AS pt + ,brand_name AS `brand_name` + ,total_indirect_order_cnt AS `total_indirect_order_cnt` + ,total_indirect_gmv AS `total_indirect_gmv` + FROM test1 + WHERE pt = '20220626' + ) m1 + ON m0.brand_name = m1.brand_name AND m0.pt = m1.pt + ) m2 + ON m0.brand_name = m2.brand_name AND m0.pt = m2.pt +) c0 +ORDER BY pt ASC, uvctr DESC; + From 66227ce8d3faacd7a60a1cde9c96f55cb6c1b134 Mon Sep 17 00:00:00 2001 From: velavokr Date: Sun, 2 Jul 2023 15:20:59 +0300 Subject: [PATCH 1124/1997] #51292 added default_temporary_table_engine setting --- docs/en/operations/settings/settings.md | 34 ++++++++++++++++++ src/Core/Settings.h | 1 + src/Interpreters/InterpreterCreateQuery.cpp | 35 ++++++++----------- src/Interpreters/InterpreterCreateQuery.h | 2 +- .../02184_default_table_engine.reference | 1 + .../02184_default_table_engine.sql | 4 +++ 6 files changed, 56 insertions(+), 21 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index cff13302cdc..0d5072d5474 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3201,6 +3201,40 @@ ENGINE = Log └──────────────────────────────────────────────────────────────────────────┘ ``` +## default_temporary_table_engine {#default_temporary_table_engine} + +Same as [default_temporary_table_engine](#default_temporary_table_engine) but for temporary tables. + +Default value: `Memory`. + +In this example, any new temporary table that does not specify an `Engine` will use the `Log` table engine: + +Query: + +```sql +SET default_temporary_table_engine = 'Log'; + +CREATE TEMPORARY TABLE my_table ( + x UInt32, + y UInt32 +); + +SHOW CREATE TEMPORARY TABLE my_table; +``` + +Result: + +```response +┌─statement────────────────────────────────────────────────────────────────┐ +│ CREATE TEMPORARY TABLE default.my_table +( + `x` UInt32, + `y` UInt32 +) +ENGINE = Log +└──────────────────────────────────────────────────────────────────────────┘ +``` + ## data_type_default_nullable {#data_type_default_nullable} Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md/#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable). diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b7d12a518c8..59373df3ece 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -517,6 +517,7 @@ class IColumn; M(Seconds, wait_for_window_view_fire_signal_timeout, 10, "Timeout for waiting for window view fire signal in event time processing", 0) \ M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \ \ + M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \ M(DefaultTableEngine, default_table_engine, DefaultTableEngine::None, "Default table engine used when ENGINE is not set in CREATE statement.",0) \ M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \ M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \ diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index d0bb3dd389f..1419203b45b 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -911,14 +911,13 @@ String InterpreterCreateQuery::getTableEngineName(DefaultTableEngine default_tab } } -void InterpreterCreateQuery::setDefaultTableEngine(ASTStorage & storage, ContextPtr local_context) +void InterpreterCreateQuery::setDefaultTableEngine(ASTStorage & storage, DefaultTableEngine engine) { - if (local_context->getSettingsRef().default_table_engine.value == DefaultTableEngine::None) + if (engine == DefaultTableEngine::None) throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query"); auto engine_ast = std::make_shared(); - auto default_table_engine = local_context->getSettingsRef().default_table_engine.value; - engine_ast->name = getTableEngineName(default_table_engine); + engine_ast->name = getTableEngineName(engine); engine_ast->no_empty_args = true; storage.set(storage.engine, engine_ast); } @@ -943,24 +942,20 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (!create.cluster.empty()) throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with ON CLUSTER clause"); - if (create.storage) + if (create.storage && create.storage->engine) { - if (create.storage->engine) - { - if (create.storage->engine->name.starts_with("Replicated") || create.storage->engine->name == "KeeperMap") - throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated or KeeperMap table engines"); - } - else - throw Exception(ErrorCodes::INCORRECT_QUERY, "Invalid storage definition for temporary table"); + if (create.storage->engine->name.starts_with("Replicated") || create.storage->engine->name == "KeeperMap") + throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated or KeeperMap table engines"); + return; } else { - auto engine_ast = std::make_shared(); - engine_ast->name = "Memory"; - engine_ast->no_empty_args = true; - auto storage_ast = std::make_shared(); - storage_ast->set(storage_ast->engine, engine_ast); - create.set(create.storage, storage_ast); + if (!create.storage) + { + auto storage_ast = std::make_shared(); + create.set(create.storage, storage_ast); + } + setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_temporary_table_engine.value); } return; } @@ -969,7 +964,7 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const { /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. if (!create.storage->engine) - setDefaultTableEngine(*create.storage, getContext()); + setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); return; } @@ -1008,7 +1003,7 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const } create.set(create.storage, std::make_shared()); - setDefaultTableEngine(*create.storage, getContext()); + setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); } static void generateUUIDForTable(ASTCreateQuery & create) diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index a5fa6576091..09a582d6686 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -91,7 +91,7 @@ private: TableProperties getTablePropertiesAndNormalizeCreateQuery(ASTCreateQuery & create) const; void validateTableStructure(const ASTCreateQuery & create, const TableProperties & properties) const; static String getTableEngineName(DefaultTableEngine default_table_engine); - static void setDefaultTableEngine(ASTStorage & storage, ContextPtr local_context); + static void setDefaultTableEngine(ASTStorage & storage, DefaultTableEngine engine); void setEngine(ASTCreateQuery & create) const; AccessRightsElements getRequiredAccess() const; diff --git a/tests/queries/0_stateless/02184_default_table_engine.reference b/tests/queries/0_stateless/02184_default_table_engine.reference index 870dff90efa..495b9627acb 100644 --- a/tests/queries/0_stateless/02184_default_table_engine.reference +++ b/tests/queries/0_stateless/02184_default_table_engine.reference @@ -27,3 +27,4 @@ CREATE TABLE default.val2\n(\n `n` Int32\n) AS values(\'n int\', 1, 2) CREATE TABLE default.log\n(\n `n` Int32\n)\nENGINE = Log CREATE TABLE default.kek\n(\n `n` Int32\n)\nENGINE = Memory CREATE TABLE default.lol\n(\n `n` Int32\n)\nENGINE = MergeTree\nORDER BY n\nSETTINGS min_bytes_for_wide_part = 123, index_granularity = 8192 +CREATE TEMPORARY TABLE tmp_log\n(\n `n` Int32\n)\nENGINE = Log diff --git a/tests/queries/0_stateless/02184_default_table_engine.sql b/tests/queries/0_stateless/02184_default_table_engine.sql index 109875d53a5..68422f273b0 100644 --- a/tests/queries/0_stateless/02184_default_table_engine.sql +++ b/tests/queries/0_stateless/02184_default_table_engine.sql @@ -128,3 +128,7 @@ SHOW CREATE TABLE kek; SHOW CREATE TABLE lol; DROP TABLE kek; DROP TABLE lol; + +SET default_temporary_table_engine = 'Log'; +CREATE TEMPORARY TABLE tmp_log (n int); +SHOW CREATE TEMPORARY TABLE tmp_log; From 1e10bf5bdf50aac027f0824bad812676988a1eb3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 3 Jul 2023 13:47:52 +0300 Subject: [PATCH 1125/1997] Update docs/en/operations/settings/settings.md --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 0d5072d5474..5f6cf98646b 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3203,7 +3203,7 @@ ENGINE = Log ## default_temporary_table_engine {#default_temporary_table_engine} -Same as [default_temporary_table_engine](#default_temporary_table_engine) but for temporary tables. +Same as [default_table_engine](#default_table_engine) but for temporary tables. Default value: `Memory`. From 2f85d048ae42f0b06658b2acd38271d041be057e Mon Sep 17 00:00:00 2001 From: velavokr Date: Mon, 3 Jul 2023 16:14:19 +0300 Subject: [PATCH 1126/1997] bugfix --- src/Interpreters/InterpreterCreateQuery.cpp | 65 ++++++--------------- src/Interpreters/InterpreterCreateQuery.h | 2 - 2 files changed, 19 insertions(+), 48 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 1419203b45b..72312a33b3d 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -881,45 +881,21 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat } } -String InterpreterCreateQuery::getTableEngineName(DefaultTableEngine default_table_engine) -{ - switch (default_table_engine) - { - case DefaultTableEngine::Log: - return "Log"; - - case DefaultTableEngine::StripeLog: - return "StripeLog"; - - case DefaultTableEngine::MergeTree: - return "MergeTree"; - - case DefaultTableEngine::ReplacingMergeTree: - return "ReplacingMergeTree"; - - case DefaultTableEngine::ReplicatedMergeTree: - return "ReplicatedMergeTree"; - - case DefaultTableEngine::ReplicatedReplacingMergeTree: - return "ReplicatedReplacingMergeTree"; - - case DefaultTableEngine::Memory: - return "Memory"; - - default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "default_table_engine is set to unknown value"); +namespace { + void checkTemporaryTableEngineName(const String& name) { + if (name.starts_with("Replicated") || name == "KeeperMap") + throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated or KeeperMap table engines"); } -} -void InterpreterCreateQuery::setDefaultTableEngine(ASTStorage & storage, DefaultTableEngine engine) -{ - if (engine == DefaultTableEngine::None) - throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query"); + void setDefaultTableEngine(ASTStorage &storage, DefaultTableEngine engine) { + if (engine == DefaultTableEngine::None) + throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query"); - auto engine_ast = std::make_shared(); - engine_ast->name = getTableEngineName(engine); - engine_ast->no_empty_args = true; - storage.set(storage.engine, engine_ast); + auto engine_ast = std::make_shared(); + engine_ast->name = SettingFieldDefaultTableEngine(engine).toString(); + engine_ast->no_empty_args = true; + storage.set(storage.engine, engine_ast); + } } void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const @@ -942,21 +918,18 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (!create.cluster.empty()) throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with ON CLUSTER clause"); - if (create.storage && create.storage->engine) + if (!create.storage) { - if (create.storage->engine->name.starts_with("Replicated") || create.storage->engine->name == "KeeperMap") - throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated or KeeperMap table engines"); - return; + auto storage_ast = std::make_shared(); + create.set(create.storage, storage_ast); } - else + + if (!create.storage->engine) { - if (!create.storage) - { - auto storage_ast = std::make_shared(); - create.set(create.storage, storage_ast); - } setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_temporary_table_engine.value); } + + checkTemporaryTableEngineName(create.storage->engine->name); return; } diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index 09a582d6686..67339dea928 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -90,8 +90,6 @@ private: /// Calculate list of columns, constraints, indices, etc... of table. Rewrite query in canonical way. TableProperties getTablePropertiesAndNormalizeCreateQuery(ASTCreateQuery & create) const; void validateTableStructure(const ASTCreateQuery & create, const TableProperties & properties) const; - static String getTableEngineName(DefaultTableEngine default_table_engine); - static void setDefaultTableEngine(ASTStorage & storage, DefaultTableEngine engine); void setEngine(ASTCreateQuery & create) const; AccessRightsElements getRequiredAccess() const; From ae87d43f887376d19f2df3e197bc20ecefa7b012 Mon Sep 17 00:00:00 2001 From: velavokr Date: Mon, 3 Jul 2023 18:28:56 +0300 Subject: [PATCH 1127/1997] test fixes --- src/Interpreters/InterpreterCreateQuery.cpp | 9 ++++++--- tests/queries/0_stateless/02184_default_table_engine.sql | 4 ++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 72312a33b3d..dc95335d3ad 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -881,13 +881,16 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat } } -namespace { - void checkTemporaryTableEngineName(const String& name) { +namespace +{ + void checkTemporaryTableEngineName(const String& name) + { if (name.starts_with("Replicated") || name == "KeeperMap") throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated or KeeperMap table engines"); } - void setDefaultTableEngine(ASTStorage &storage, DefaultTableEngine engine) { + void setDefaultTableEngine(ASTStorage &storage, DefaultTableEngine engine) + { if (engine == DefaultTableEngine::None) throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query"); diff --git a/tests/queries/0_stateless/02184_default_table_engine.sql b/tests/queries/0_stateless/02184_default_table_engine.sql index 68422f273b0..a984ec1b6c9 100644 --- a/tests/queries/0_stateless/02184_default_table_engine.sql +++ b/tests/queries/0_stateless/02184_default_table_engine.sql @@ -83,8 +83,8 @@ CREATE TEMPORARY TABLE tmp (n int); SHOW CREATE TEMPORARY TABLE tmp; CREATE TEMPORARY TABLE tmp1 (n int) ENGINE=Memory; CREATE TEMPORARY TABLE tmp2 (n int) ENGINE=Log; -CREATE TEMPORARY TABLE tmp2 (n int) ORDER BY n; -- {serverError 80} -CREATE TEMPORARY TABLE tmp2 (n int, PRIMARY KEY (n)); -- {serverError 80} +CREATE TEMPORARY TABLE tmp2 (n int) ORDER BY n; -- {serverError 36} +CREATE TEMPORARY TABLE tmp2 (n int, PRIMARY KEY (n)); -- {serverError 36} CREATE TABLE log (n int); SHOW CREATE log; From dcc0076ded42792fd41c7f83bca9ff3e5ce0ed4b Mon Sep 17 00:00:00 2001 From: velavokr Date: Mon, 3 Jul 2023 19:01:35 +0300 Subject: [PATCH 1128/1997] fixed comment --- src/Interpreters/InterpreterCreateQuery.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index dc95335d3ad..55d2449f739 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -914,9 +914,7 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (create.temporary) { - /// It's possible if some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not. - /// It makes sense when default_table_engine setting is used, but not for temporary tables. - /// For temporary tables we ignore this setting to allow CREATE TEMPORARY TABLE query without specifying ENGINE + /// Some part of storage definition is specified, but ENGINE is not: just set the one from default_temporary_table_engine setting. if (!create.cluster.empty()) throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with ON CLUSTER clause"); From 9941b29fd3031bc12e055d17f18ad2b31ba7973d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 3 Jul 2023 18:18:14 +0200 Subject: [PATCH 1129/1997] Better --- src/Core/Field.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Core/Field.h b/src/Core/Field.h index 8ee93d08411..0b3c5b7f48d 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -689,9 +689,9 @@ private: size_t result = 0; if constexpr (std::is_same_v || std::is_same_v || std::is_same_v) - std::for_each(x.begin(), x.end(), [this, &x](auto & elem){ nested_field_depth = std::max(nested_field_depth, elem.nested_field_depth); }); + std::for_each(x.begin(), x.end(), [this, &result](auto & elem){ result = std::max(result, elem.nested_field_depth); }); else if constexpr (std::is_same_v) - std::for_each(x.begin(), x.end(), [this, &x](auto & elem){ nested_field_depth = std::max(nested_field_depth, elem.second.nested_field_depth); }); + std::for_each(x.begin(), x.end(), [this, &result](auto & elem){ result = std::max(result, elem.second.nested_field_depth); }); if (result >= DBMS_MAX_NESTED_FIELD_DEPTH) throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "Too deep Field"); @@ -711,10 +711,10 @@ private: // we must initialize the entire wide stored type, and not just the // nominal type. using StorageType = NearestFieldType; - new (&storage) StorageType(std::forward(x)); - which = TypeToEnum::value; /// Incrementing the depth since we create a new Field. nested_field_depth = calculateAndCheckFieldDepth(x) + 1; + new (&storage) StorageType(std::forward(x)); + which = TypeToEnum::value; } /// Assuming same types. @@ -724,9 +724,9 @@ private: using JustT = std::decay_t; assert(which == TypeToEnum::value); JustT * MAY_ALIAS ptr = reinterpret_cast(&storage); - *ptr = std::forward(x); /// Do not increment the depth, because it is an assignment. nested_field_depth = calculateAndCheckFieldDepth(x); + *ptr = std::forward(x); } template From edeef107f07f9f12184df55f46ab5b9dc95e6763 Mon Sep 17 00:00:00 2001 From: Zach Naimon Date: Mon, 3 Jul 2023 12:42:51 -0400 Subject: [PATCH 1130/1997] fix authError behavior, fix overlapping chart behavior --- programs/server/dashboard.html | 36 +++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index 951b7db3aa3..ea818e05e31 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -12,7 +12,8 @@ --chart-background: white; --shadow-color: rgba(0, 0, 0, 0.25); --input-shadow-color: rgba(0, 255, 0, 1); - --error-color: white; + --error-color: red; + --auth-error-color: white; --legend-background: rgba(255, 255, 255, 0.75); --title-color: #666; --text-color: black; @@ -258,7 +259,7 @@ width: 60%; padding: .5rem; - color: var(--error-color); + color: var(--auth-error-color); display: flex; flex-flow: row nowrap; @@ -906,9 +907,9 @@ async function draw(idx, chart, url_params, query) { if (error) { const errorMatch = errorMessages.find(({ regex }) => error.match(regex)) - if (errorMatch) { - const match = error.match(errorMatch.regex) - const message = errorMatch.messageFunc(match) + const match = error.match(errorMatch.regex) + const message = errorMatch.messageFunc(match) + if (message) { const authError = new Error(message) throw authError } @@ -930,7 +931,7 @@ async function draw(idx, chart, url_params, query) { let title_div = chart.querySelector('.title'); if (error) { error_div.firstChild.data = error; - title_div.style.display = 'none'; + title_div.style.display = 'none'; error_div.style.display = 'block'; return false; } else { @@ -1019,13 +1020,15 @@ async function drawAll() { firstLoad = false; } else { enableReloadButton(); + enableRunButton(); } - if (!results.includes(false)) { + if (results.includes(true)) { const element = document.querySelector('.inputs'); element.classList.remove('unconnected'); const add = document.querySelector('#add'); add.style.display = 'block'; - } else { + } + else { const charts = document.querySelector('#charts') charts.style.height = '0px'; } @@ -1050,6 +1053,13 @@ function disableReloadButton() { reloadButton.classList.add('disabled') } +function disableRunButton() { + const runButton = document.getElementById('run') + runButton.value = 'Reloading...' + runButton.disabled = true + runButton.classList.add('disabled') +} + function enableReloadButton() { const reloadButton = document.getElementById('reload') reloadButton.value = 'Reload' @@ -1057,11 +1067,19 @@ function enableReloadButton() { reloadButton.classList.remove('disabled') } +function enableRunButton() { + const runButton = document.getElementById('run') + runButton.value = 'Ok' + runButton.disabled = false + runButton.classList.remove('disabled') +} + function reloadAll() { updateParams(); drawAll(); saveState(); - disableReloadButton() + disableReloadButton(); + disableRunButton(); } document.getElementById('params').onsubmit = function(event) { From 509a0c6f30543b364f805deaca5ba8a025ab8538 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 3 Jul 2023 13:50:40 -0400 Subject: [PATCH 1131/1997] add missing doc templates back --- docs/_description_templates/template-data-type.md | 1 + docs/_description_templates/template-engine.md | 1 + docs/_description_templates/template-function.md | 1 + docs/_description_templates/template-server-setting.md | 1 + docs/_description_templates/template-setting.md | 1 + docs/_description_templates/template-statement.md | 1 + docs/_description_templates/template-system-table.md | 1 + 7 files changed, 7 insertions(+) create mode 100644 docs/_description_templates/template-data-type.md create mode 100644 docs/_description_templates/template-engine.md create mode 100644 docs/_description_templates/template-function.md create mode 100644 docs/_description_templates/template-server-setting.md create mode 100644 docs/_description_templates/template-setting.md create mode 100644 docs/_description_templates/template-statement.md create mode 100644 docs/_description_templates/template-system-table.md diff --git a/docs/_description_templates/template-data-type.md b/docs/_description_templates/template-data-type.md new file mode 100644 index 00000000000..b66d77c7a77 --- /dev/null +++ b/docs/_description_templates/template-data-type.md @@ -0,0 +1 @@ +{"payload":{"allShortcutsEnabled":true,"fileTree":{"docs/_description_templates":{"items":[{"name":"template-data-type.md","path":"docs/_description_templates/template-data-type.md","contentType":"file"},{"name":"template-engine.md","path":"docs/_description_templates/template-engine.md","contentType":"file"},{"name":"template-function.md","path":"docs/_description_templates/template-function.md","contentType":"file"},{"name":"template-server-setting.md","path":"docs/_description_templates/template-server-setting.md","contentType":"file"},{"name":"template-setting.md","path":"docs/_description_templates/template-setting.md","contentType":"file"},{"name":"template-statement.md","path":"docs/_description_templates/template-statement.md","contentType":"file"},{"name":"template-system-table.md","path":"docs/_description_templates/template-system-table.md","contentType":"file"}],"totalCount":7},"docs":{"items":[{"name":"_description_templates","path":"docs/_description_templates","contentType":"directory"},{"name":"_includes","path":"docs/_includes","contentType":"directory"},{"name":"changelogs","path":"docs/changelogs","contentType":"directory"},{"name":"en","path":"docs/en","contentType":"directory"},{"name":"ru","path":"docs/ru","contentType":"directory"},{"name":"tools","path":"docs/tools","contentType":"directory"},{"name":"zh","path":"docs/zh","contentType":"directory"},{"name":".gitignore","path":"docs/.gitignore","contentType":"file"},{"name":"README.md","path":"docs/README.md","contentType":"file"},{"name":"clean","path":"docs/clean","contentType":"file"},{"name":"mkdocs.yml","path":"docs/mkdocs.yml","contentType":"file"},{"name":"redirects.txt","path":"docs/redirects.txt","contentType":"file"}],"totalCount":12},"":{"items":[{"name":".github","path":".github","contentType":"directory"},{"name":"base","path":"base","contentType":"directory"},{"name":"benchmark","path":"benchmark","contentType":"directory"},{"name":"cmake","path":"cmake","contentType":"directory"},{"name":"contrib","path":"contrib","contentType":"directory"},{"name":"docker","path":"docker","contentType":"directory"},{"name":"docs","path":"docs","contentType":"directory"},{"name":"packages","path":"packages","contentType":"directory"},{"name":"programs","path":"programs","contentType":"directory"},{"name":"src","path":"src","contentType":"directory"},{"name":"tests","path":"tests","contentType":"directory"},{"name":"utils","path":"utils","contentType":"directory"},{"name":"website","path":"website","contentType":"directory"},{"name":".clang-format","path":".clang-format","contentType":"file"},{"name":".clang-tidy","path":".clang-tidy","contentType":"file"},{"name":".editorconfig","path":".editorconfig","contentType":"file"},{"name":".gitattributes","path":".gitattributes","contentType":"file"},{"name":".gitignore","path":".gitignore","contentType":"file"},{"name":".gitmodules","path":".gitmodules","contentType":"file"},{"name":".pylintrc","path":".pylintrc","contentType":"file"},{"name":".vimrc","path":".vimrc","contentType":"file"},{"name":".yamllint","path":".yamllint","contentType":"file"},{"name":"AUTHORS","path":"AUTHORS","contentType":"file"},{"name":"CHANGELOG.md","path":"CHANGELOG.md","contentType":"file"},{"name":"CMakeLists.txt","path":"CMakeLists.txt","contentType":"file"},{"name":"CODE_OF_CONDUCT.md","path":"CODE_OF_CONDUCT.md","contentType":"file"},{"name":"CONTRIBUTING.md","path":"CONTRIBUTING.md","contentType":"file"},{"name":"LICENSE","path":"LICENSE","contentType":"file"},{"name":"PreLoad.cmake","path":"PreLoad.cmake","contentType":"file"},{"name":"README.md","path":"README.md","contentType":"file"},{"name":"SECURITY.md","path":"SECURITY.md","contentType":"file"},{"name":"format_sources","path":"format_sources","contentType":"file"}],"totalCount":32}},"fileTreeProcessingTime":14.564540000000001,"foldersToFetch":[],"reducedMotionEnabled":"system","repo":{"id":60246359,"defaultBranch":"master","name":"ClickHouse","ownerLogin":"ClickHouse","currentUserCanPush":true,"isFork":false,"isEmpty":false,"createdAt":"2016-06-02T04:28:18.000-04:00","ownerAvatar":"https://avatars.githubusercontent.com/u/54801242?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"5f18640215159ed1ad50be1efce2cb996a49fd73","listCacheKey":"v0:1688403108.0","canEdit":false,"refType":"tree","currentOid":"5f18640215159ed1ad50be1efce2cb996a49fd73"},"path":"docs/_description_templates/template-data-type.md","currentUser":{"id":25182304,"login":"DanRoscigno","userEmail":"dan@roscigno.com"},"blob":{"rawBlob":null,"colorizedLines":null,"stylingDirectives":null,"csv":null,"csvError":null,"dependabotInfo":{"showConfigurationBanner":null,"configFilePath":null,"networkDependabotPath":"/ClickHouse/ClickHouse/network/updates","dismissConfigurationNoticePath":"/settings/dismiss-notice/dependabot_configuration_notice","configurationNoticeDismissed":false,"repoAlertsPath":"/ClickHouse/ClickHouse/security/dependabot","repoSecurityAndAnalysisPath":"/ClickHouse/ClickHouse/settings/security_analysis","repoOwnerIsOrg":true,"currentUserCanAdminRepo":false},"displayName":"template-data-type.md","displayUrl":"https://github.com/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-data-type.md?raw=true","headerInfo":{"blobSize":"540 Bytes","deleteInfo":{"deletePath":null,"deleteTooltip":"You must be on a branch to make or propose changes to this file"},"editInfo":{"editTooltip":"You must be on a branch to make or propose changes to this file"},"ghDesktopPath":null,"gitLfsPath":null,"onBranch":false,"shortPath":"239edb2","siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FClickHouse%2FClickHouse%2Fblob%2F5f18640215159ed1ad50be1efce2cb996a49fd73%2Fdocs%2F_description_templates%2Ftemplate-data-type.md","isCSV":false,"isRichtext":true,"toc":[{"level":1,"text":"data_type_name {#data_type-name}","anchor":"data_type_name-data_type-name","htmlText":"data_type_name {#data_type-name}"},{"level":2,"text":"Additional Info {#additional-info} (Optional)","anchor":"additional-info-additional-info-optional","htmlText":"Additional Info {#additional-info} (Optional)"}],"lineInfo":{"truncatedLoc":"29","truncatedSloc":"17"},"mode":"file"},"image":false,"isCodeownersFile":null,"isValidLegacyIssueTemplate":false,"issueTemplateHelpUrl":"https://docs.github.com/articles/about-issue-and-pull-request-templates","issueTemplate":null,"discussionTemplate":null,"language":"Markdown","large":false,"loggedIn":true,"newDiscussionPath":"/ClickHouse/ClickHouse/discussions/new","newIssuePath":"/ClickHouse/ClickHouse/issues/new","planSupportInfo":{"repoIsFork":null,"repoOwnedByCurrentUser":null,"requestFullPath":"/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-data-type.md","showFreeOrgGatedFeatureMessage":null,"showPlanSupportBanner":null,"upgradeDataAttributes":null,"upgradePath":null},"publishBannersInfo":{"dismissActionNoticePath":"/settings/dismiss-notice/publish_action_from_dockerfile","dismissStackNoticePath":"/settings/dismiss-notice/publish_stack_from_file","releasePath":"/ClickHouse/ClickHouse/releases/new?marketplace=true","showPublishActionBanner":false,"showPublishStackBanner":false},"renderImageOrRaw":false,"richText":"
\n \n \n \n \n \n \n \n \n
toc_prioritytoc_title
\n\n

data_type_name {#data_type-name}

\n

Description.

\n

Parameters (Optional)

\n\n

Examples

\n
\n

Additional Info {#additional-info} (Optional)

\n

The name of an additional section can be any, for example, Usage.

\n

See Also (Optional)

\n\n

Original article

\n
","renderedFileInfo":null,"tabSize":8,"topBannersInfo":{"overridingGlobalFundingFile":false,"globalPreferredFundingPath":null,"repoOwner":"ClickHouse","repoName":"ClickHouse","showInvalidCitationWarning":false,"citationHelpUrl":"https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-on-github/about-citation-files","showDependabotConfigurationBanner":null,"actionsOnboardingTip":null},"truncated":false,"viewable":true,"workflowRedirectUrl":null,"symbols":{"timedOut":false,"notAnalyzed":true,"symbols":[]}},"csrf_tokens":{"/ClickHouse/ClickHouse/branches":{"post":"N-EHr0Rgr9I38z0iGu5sqtyb4o4AwtOW1kjllryDXwFKHQnulJDSASf5e_zz3Uts_O0aS5zN9jEL88ie9T5sww"}}},"title":"ClickHouse/docs/_description_templates/template-data-type.md at 5f18640215159ed1ad50be1efce2cb996a49fd73 · ClickHouse/ClickHouse","locale":"en"} \ No newline at end of file diff --git a/docs/_description_templates/template-engine.md b/docs/_description_templates/template-engine.md new file mode 100644 index 00000000000..c5bb5feb85d --- /dev/null +++ b/docs/_description_templates/template-engine.md @@ -0,0 +1 @@ +{"payload":{"allShortcutsEnabled":true,"fileTree":{"docs/_description_templates":{"items":[{"name":"template-data-type.md","path":"docs/_description_templates/template-data-type.md","contentType":"file"},{"name":"template-engine.md","path":"docs/_description_templates/template-engine.md","contentType":"file"},{"name":"template-function.md","path":"docs/_description_templates/template-function.md","contentType":"file"},{"name":"template-server-setting.md","path":"docs/_description_templates/template-server-setting.md","contentType":"file"},{"name":"template-setting.md","path":"docs/_description_templates/template-setting.md","contentType":"file"},{"name":"template-statement.md","path":"docs/_description_templates/template-statement.md","contentType":"file"},{"name":"template-system-table.md","path":"docs/_description_templates/template-system-table.md","contentType":"file"}],"totalCount":7},"docs":{"items":[{"name":"_description_templates","path":"docs/_description_templates","contentType":"directory"},{"name":"_includes","path":"docs/_includes","contentType":"directory"},{"name":"changelogs","path":"docs/changelogs","contentType":"directory"},{"name":"en","path":"docs/en","contentType":"directory"},{"name":"ru","path":"docs/ru","contentType":"directory"},{"name":"tools","path":"docs/tools","contentType":"directory"},{"name":"zh","path":"docs/zh","contentType":"directory"},{"name":".gitignore","path":"docs/.gitignore","contentType":"file"},{"name":"README.md","path":"docs/README.md","contentType":"file"},{"name":"clean","path":"docs/clean","contentType":"file"},{"name":"mkdocs.yml","path":"docs/mkdocs.yml","contentType":"file"},{"name":"redirects.txt","path":"docs/redirects.txt","contentType":"file"}],"totalCount":12},"":{"items":[{"name":".github","path":".github","contentType":"directory"},{"name":"base","path":"base","contentType":"directory"},{"name":"benchmark","path":"benchmark","contentType":"directory"},{"name":"cmake","path":"cmake","contentType":"directory"},{"name":"contrib","path":"contrib","contentType":"directory"},{"name":"docker","path":"docker","contentType":"directory"},{"name":"docs","path":"docs","contentType":"directory"},{"name":"packages","path":"packages","contentType":"directory"},{"name":"programs","path":"programs","contentType":"directory"},{"name":"src","path":"src","contentType":"directory"},{"name":"tests","path":"tests","contentType":"directory"},{"name":"utils","path":"utils","contentType":"directory"},{"name":"website","path":"website","contentType":"directory"},{"name":".clang-format","path":".clang-format","contentType":"file"},{"name":".clang-tidy","path":".clang-tidy","contentType":"file"},{"name":".editorconfig","path":".editorconfig","contentType":"file"},{"name":".gitattributes","path":".gitattributes","contentType":"file"},{"name":".gitignore","path":".gitignore","contentType":"file"},{"name":".gitmodules","path":".gitmodules","contentType":"file"},{"name":".pylintrc","path":".pylintrc","contentType":"file"},{"name":".vimrc","path":".vimrc","contentType":"file"},{"name":".yamllint","path":".yamllint","contentType":"file"},{"name":"AUTHORS","path":"AUTHORS","contentType":"file"},{"name":"CHANGELOG.md","path":"CHANGELOG.md","contentType":"file"},{"name":"CMakeLists.txt","path":"CMakeLists.txt","contentType":"file"},{"name":"CODE_OF_CONDUCT.md","path":"CODE_OF_CONDUCT.md","contentType":"file"},{"name":"CONTRIBUTING.md","path":"CONTRIBUTING.md","contentType":"file"},{"name":"LICENSE","path":"LICENSE","contentType":"file"},{"name":"PreLoad.cmake","path":"PreLoad.cmake","contentType":"file"},{"name":"README.md","path":"README.md","contentType":"file"},{"name":"SECURITY.md","path":"SECURITY.md","contentType":"file"},{"name":"format_sources","path":"format_sources","contentType":"file"}],"totalCount":32}},"fileTreeProcessingTime":10.334819,"foldersToFetch":[],"reducedMotionEnabled":"system","repo":{"id":60246359,"defaultBranch":"master","name":"ClickHouse","ownerLogin":"ClickHouse","currentUserCanPush":true,"isFork":false,"isEmpty":false,"createdAt":"2016-06-02T04:28:18.000-04:00","ownerAvatar":"https://avatars.githubusercontent.com/u/54801242?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"5f18640215159ed1ad50be1efce2cb996a49fd73","listCacheKey":"v0:1688403108.0","canEdit":false,"refType":"tree","currentOid":"5f18640215159ed1ad50be1efce2cb996a49fd73"},"path":"docs/_description_templates/template-engine.md","currentUser":{"id":25182304,"login":"DanRoscigno","userEmail":"dan@roscigno.com"},"blob":{"rawBlob":null,"colorizedLines":null,"stylingDirectives":null,"csv":null,"csvError":null,"dependabotInfo":{"showConfigurationBanner":null,"configFilePath":null,"networkDependabotPath":"/ClickHouse/ClickHouse/network/updates","dismissConfigurationNoticePath":"/settings/dismiss-notice/dependabot_configuration_notice","configurationNoticeDismissed":false,"repoAlertsPath":"/ClickHouse/ClickHouse/security/dependabot","repoSecurityAndAnalysisPath":"/ClickHouse/ClickHouse/settings/security_analysis","repoOwnerIsOrg":true,"currentUserCanAdminRepo":false},"displayName":"template-engine.md","displayUrl":"https://github.com/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-engine.md?raw=true","headerInfo":{"blobSize":"1.19 KB","deleteInfo":{"deletePath":null,"deleteTooltip":"You must be on a branch to make or propose changes to this file"},"editInfo":{"editTooltip":"You must be on a branch to make or propose changes to this file"},"ghDesktopPath":null,"gitLfsPath":null,"onBranch":false,"shortPath":"392bc59","siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FClickHouse%2FClickHouse%2Fblob%2F5f18640215159ed1ad50be1efce2cb996a49fd73%2Fdocs%2F_description_templates%2Ftemplate-engine.md","isCSV":false,"isRichtext":true,"toc":[{"level":1,"text":"EngineName {#enginename}","anchor":"enginename-enginename","htmlText":"EngineName {#enginename}"},{"level":2,"text":"Creating a Database {#creating-a-database}","anchor":"creating-a-database-creating-a-database","htmlText":"Creating a Database {#creating-a-database}"},{"level":2,"text":"Creating a Table {#creating-a-table}","anchor":"creating-a-table-creating-a-table","htmlText":"Creating a Table {#creating-a-table}"},{"level":2,"text":"Virtual columns {#virtual-columns} (for Table engines only)","anchor":"virtual-columns-virtual-columns-for-table-engines-only","htmlText":"Virtual columns {#virtual-columns} (for Table engines only)"},{"level":2,"text":"Data Types Support {#data_types-support} (for Database engines only)","anchor":"data-types-support-data_types-support-for-database-engines-only","htmlText":"Data Types Support {#data_types-support} (for Database engines only)"},{"level":2,"text":"Specifics and recommendations {#specifics-and-recommendations}","anchor":"specifics-and-recommendations-specifics-and-recommendations","htmlText":"Specifics and recommendations {#specifics-and-recommendations}"},{"level":2,"text":"Usage Example {#usage-example}","anchor":"usage-example-usage-example","htmlText":"Usage Example {#usage-example}"}],"lineInfo":{"truncatedLoc":"63","truncatedSloc":"40"},"mode":"file"},"image":false,"isCodeownersFile":null,"isValidLegacyIssueTemplate":false,"issueTemplateHelpUrl":"https://docs.github.com/articles/about-issue-and-pull-request-templates","issueTemplate":null,"discussionTemplate":null,"language":"Markdown","large":false,"loggedIn":true,"newDiscussionPath":"/ClickHouse/ClickHouse/discussions/new","newIssuePath":"/ClickHouse/ClickHouse/issues/new","planSupportInfo":{"repoIsFork":null,"repoOwnedByCurrentUser":null,"requestFullPath":"/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-engine.md","showFreeOrgGatedFeatureMessage":null,"showPlanSupportBanner":null,"upgradeDataAttributes":null,"upgradePath":null},"publishBannersInfo":{"dismissActionNoticePath":"/settings/dismiss-notice/publish_action_from_dockerfile","dismissStackNoticePath":"/settings/dismiss-notice/publish_stack_from_file","releasePath":"/ClickHouse/ClickHouse/releases/new?marketplace=true","showPublishActionBanner":false,"showPublishStackBanner":false},"renderImageOrRaw":false,"richText":"

EngineName {#enginename}

\n
    \n
  • What the Database/Table engine does.
  • \n
  • Relations with other engines if they exist.
  • \n
\n

Creating a Database {#creating-a-database}

\n
    CREATE DATABASE ...
\n

or

\n

Creating a Table {#creating-a-table}

\n
    CREATE TABLE ...
\n

Engine Parameters

\n

Query Clauses (for Table engines only)

\n

Virtual columns {#virtual-columns} (for Table engines only)

\n

List and virtual columns with description, if they exist.

\n

Data Types Support {#data_types-support} (for Database engines only)

\n\n\n\n\n\n\n\n\n\n\n\n\n\n
EngineNameClickHouse
NativeDataTypeNameClickHouseDataTypeName
\n

Specifics and recommendations {#specifics-and-recommendations}

\n

Algorithms\nSpecifics of read and write processes\nExamples of tasks\nRecommendations for usage\nSpecifics of data storage

\n

Usage Example {#usage-example}

\n

The example must show usage and use cases. The following text contains the recommended parts of this section.

\n

Input table:

\n
\n

Query:

\n
\n

Result:

\n
\n

Follow up with any text to clarify the example.

\n

See Also

\n\n
","renderedFileInfo":null,"tabSize":8,"topBannersInfo":{"overridingGlobalFundingFile":false,"globalPreferredFundingPath":null,"repoOwner":"ClickHouse","repoName":"ClickHouse","showInvalidCitationWarning":false,"citationHelpUrl":"https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-on-github/about-citation-files","showDependabotConfigurationBanner":null,"actionsOnboardingTip":null},"truncated":false,"viewable":true,"workflowRedirectUrl":null,"symbols":{"timedOut":false,"notAnalyzed":true,"symbols":[]}},"csrf_tokens":{"/ClickHouse/ClickHouse/branches":{"post":"mbr-hnliBQCB5MoHGpQf6OTZtY7x7nHuM4vvq5uev5jkRvDHqZJ405HujNnzpzguxK9NS23hVEnuMMKj0iOMWg"}}},"title":"ClickHouse/docs/_description_templates/template-engine.md at 5f18640215159ed1ad50be1efce2cb996a49fd73 · ClickHouse/ClickHouse","locale":"en"} \ No newline at end of file diff --git a/docs/_description_templates/template-function.md b/docs/_description_templates/template-function.md new file mode 100644 index 00000000000..0891e5d872d --- /dev/null +++ b/docs/_description_templates/template-function.md @@ -0,0 +1 @@ +{"payload":{"allShortcutsEnabled":true,"fileTree":{"docs/_description_templates":{"items":[{"name":"template-data-type.md","path":"docs/_description_templates/template-data-type.md","contentType":"file"},{"name":"template-engine.md","path":"docs/_description_templates/template-engine.md","contentType":"file"},{"name":"template-function.md","path":"docs/_description_templates/template-function.md","contentType":"file"},{"name":"template-server-setting.md","path":"docs/_description_templates/template-server-setting.md","contentType":"file"},{"name":"template-setting.md","path":"docs/_description_templates/template-setting.md","contentType":"file"},{"name":"template-statement.md","path":"docs/_description_templates/template-statement.md","contentType":"file"},{"name":"template-system-table.md","path":"docs/_description_templates/template-system-table.md","contentType":"file"}],"totalCount":7},"docs":{"items":[{"name":"_description_templates","path":"docs/_description_templates","contentType":"directory"},{"name":"_includes","path":"docs/_includes","contentType":"directory"},{"name":"changelogs","path":"docs/changelogs","contentType":"directory"},{"name":"en","path":"docs/en","contentType":"directory"},{"name":"ru","path":"docs/ru","contentType":"directory"},{"name":"tools","path":"docs/tools","contentType":"directory"},{"name":"zh","path":"docs/zh","contentType":"directory"},{"name":".gitignore","path":"docs/.gitignore","contentType":"file"},{"name":"README.md","path":"docs/README.md","contentType":"file"},{"name":"clean","path":"docs/clean","contentType":"file"},{"name":"mkdocs.yml","path":"docs/mkdocs.yml","contentType":"file"},{"name":"redirects.txt","path":"docs/redirects.txt","contentType":"file"}],"totalCount":12},"":{"items":[{"name":".github","path":".github","contentType":"directory"},{"name":"base","path":"base","contentType":"directory"},{"name":"benchmark","path":"benchmark","contentType":"directory"},{"name":"cmake","path":"cmake","contentType":"directory"},{"name":"contrib","path":"contrib","contentType":"directory"},{"name":"docker","path":"docker","contentType":"directory"},{"name":"docs","path":"docs","contentType":"directory"},{"name":"packages","path":"packages","contentType":"directory"},{"name":"programs","path":"programs","contentType":"directory"},{"name":"src","path":"src","contentType":"directory"},{"name":"tests","path":"tests","contentType":"directory"},{"name":"utils","path":"utils","contentType":"directory"},{"name":"website","path":"website","contentType":"directory"},{"name":".clang-format","path":".clang-format","contentType":"file"},{"name":".clang-tidy","path":".clang-tidy","contentType":"file"},{"name":".editorconfig","path":".editorconfig","contentType":"file"},{"name":".gitattributes","path":".gitattributes","contentType":"file"},{"name":".gitignore","path":".gitignore","contentType":"file"},{"name":".gitmodules","path":".gitmodules","contentType":"file"},{"name":".pylintrc","path":".pylintrc","contentType":"file"},{"name":".vimrc","path":".vimrc","contentType":"file"},{"name":".yamllint","path":".yamllint","contentType":"file"},{"name":"AUTHORS","path":"AUTHORS","contentType":"file"},{"name":"CHANGELOG.md","path":"CHANGELOG.md","contentType":"file"},{"name":"CMakeLists.txt","path":"CMakeLists.txt","contentType":"file"},{"name":"CODE_OF_CONDUCT.md","path":"CODE_OF_CONDUCT.md","contentType":"file"},{"name":"CONTRIBUTING.md","path":"CONTRIBUTING.md","contentType":"file"},{"name":"LICENSE","path":"LICENSE","contentType":"file"},{"name":"PreLoad.cmake","path":"PreLoad.cmake","contentType":"file"},{"name":"README.md","path":"README.md","contentType":"file"},{"name":"SECURITY.md","path":"SECURITY.md","contentType":"file"},{"name":"format_sources","path":"format_sources","contentType":"file"}],"totalCount":32}},"fileTreeProcessingTime":6.875799,"foldersToFetch":[],"reducedMotionEnabled":"system","repo":{"id":60246359,"defaultBranch":"master","name":"ClickHouse","ownerLogin":"ClickHouse","currentUserCanPush":true,"isFork":false,"isEmpty":false,"createdAt":"2016-06-02T04:28:18.000-04:00","ownerAvatar":"https://avatars.githubusercontent.com/u/54801242?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"5f18640215159ed1ad50be1efce2cb996a49fd73","listCacheKey":"v0:1688403108.0","canEdit":false,"refType":"tree","currentOid":"5f18640215159ed1ad50be1efce2cb996a49fd73"},"path":"docs/_description_templates/template-function.md","currentUser":{"id":25182304,"login":"DanRoscigno","userEmail":"dan@roscigno.com"},"blob":{"rawBlob":null,"colorizedLines":null,"stylingDirectives":null,"csv":null,"csvError":null,"dependabotInfo":{"showConfigurationBanner":null,"configFilePath":null,"networkDependabotPath":"/ClickHouse/ClickHouse/network/updates","dismissConfigurationNoticePath":"/settings/dismiss-notice/dependabot_configuration_notice","configurationNoticeDismissed":false,"repoAlertsPath":"/ClickHouse/ClickHouse/security/dependabot","repoSecurityAndAnalysisPath":"/ClickHouse/ClickHouse/settings/security_analysis","repoOwnerIsOrg":true,"currentUserCanAdminRepo":false},"displayName":"template-function.md","displayUrl":"https://github.com/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-function.md?raw=true","headerInfo":{"blobSize":"1.12 KB","deleteInfo":{"deletePath":null,"deleteTooltip":"You must be on a branch to make or propose changes to this file"},"editInfo":{"editTooltip":"You must be on a branch to make or propose changes to this file"},"ghDesktopPath":null,"gitLfsPath":null,"onBranch":false,"shortPath":"6bdc764","siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FClickHouse%2FClickHouse%2Fblob%2F5f18640215159ed1ad50be1efce2cb996a49fd73%2Fdocs%2F_description_templates%2Ftemplate-function.md","isCSV":false,"isRichtext":true,"toc":[{"level":2,"text":"functionName {#functionname-in-lower-case}","anchor":"functionname-functionname-in-lower-case","htmlText":"functionName {#functionname-in-lower-case}"}],"lineInfo":{"truncatedLoc":"51","truncatedSloc":"29"},"mode":"file"},"image":false,"isCodeownersFile":null,"isValidLegacyIssueTemplate":false,"issueTemplateHelpUrl":"https://docs.github.com/articles/about-issue-and-pull-request-templates","issueTemplate":null,"discussionTemplate":null,"language":"Markdown","large":false,"loggedIn":true,"newDiscussionPath":"/ClickHouse/ClickHouse/discussions/new","newIssuePath":"/ClickHouse/ClickHouse/issues/new","planSupportInfo":{"repoIsFork":null,"repoOwnedByCurrentUser":null,"requestFullPath":"/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-function.md","showFreeOrgGatedFeatureMessage":null,"showPlanSupportBanner":null,"upgradeDataAttributes":null,"upgradePath":null},"publishBannersInfo":{"dismissActionNoticePath":"/settings/dismiss-notice/publish_action_from_dockerfile","dismissStackNoticePath":"/settings/dismiss-notice/publish_stack_from_file","releasePath":"/ClickHouse/ClickHouse/releases/new?marketplace=true","showPublishActionBanner":false,"showPublishStackBanner":false},"renderImageOrRaw":false,"richText":"

functionName {#functionname-in-lower-case}

\n

Short description.

\n

Syntax (without SELECT)

\n
<function syntax>
\n

Alias: <alias name>. (Optional)

\n

More text (Optional).

\n

Arguments (Optional)

\n
    \n
  • x — Description. Optional (only for optional arguments). Possible values: . Default value: . Type name.
  • \n
  • y — Description. Optional (only for optional arguments). Possible values: .Default value: . Type name.
  • \n
\n

Parameters (Optional, only for parametric aggregate functions)

\n
    \n
  • z — Description. Optional (only for optional parameters). Possible values: . Default value: . Type name.
  • \n
\n

Returned value(s)

\n
    \n
  • Returned values list.
  • \n
\n

Type: Type name.

\n

Example

\n

The example must show usage and/or a use cases. The following text contains recommended parts of an example.

\n

Input table (Optional):

\n
\n

Query:

\n
\n

Result:

\n
\n

See Also (Optional)

\n\n
","renderedFileInfo":null,"tabSize":8,"topBannersInfo":{"overridingGlobalFundingFile":false,"globalPreferredFundingPath":null,"repoOwner":"ClickHouse","repoName":"ClickHouse","showInvalidCitationWarning":false,"citationHelpUrl":"https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-on-github/about-citation-files","showDependabotConfigurationBanner":null,"actionsOnboardingTip":null},"truncated":false,"viewable":true,"workflowRedirectUrl":null,"symbols":{"timedOut":false,"notAnalyzed":true,"symbols":[]}},"csrf_tokens":{"/ClickHouse/ClickHouse/branches":{"post":"LBpdgwbUgHW-H_hinQl7e32v5vocOJKA0kZyCn6oG8tR5lPC1iT9pq4Vvrx0Oly9XdkeP4A3tycP_V8CNxUoCQ"}}},"title":"ClickHouse/docs/_description_templates/template-function.md at 5f18640215159ed1ad50be1efce2cb996a49fd73 · ClickHouse/ClickHouse","locale":"en"} \ No newline at end of file diff --git a/docs/_description_templates/template-server-setting.md b/docs/_description_templates/template-server-setting.md new file mode 100644 index 00000000000..fc474059f05 --- /dev/null +++ b/docs/_description_templates/template-server-setting.md @@ -0,0 +1 @@ +{"payload":{"allShortcutsEnabled":true,"fileTree":{"docs/_description_templates":{"items":[{"name":"template-data-type.md","path":"docs/_description_templates/template-data-type.md","contentType":"file"},{"name":"template-engine.md","path":"docs/_description_templates/template-engine.md","contentType":"file"},{"name":"template-function.md","path":"docs/_description_templates/template-function.md","contentType":"file"},{"name":"template-server-setting.md","path":"docs/_description_templates/template-server-setting.md","contentType":"file"},{"name":"template-setting.md","path":"docs/_description_templates/template-setting.md","contentType":"file"},{"name":"template-statement.md","path":"docs/_description_templates/template-statement.md","contentType":"file"},{"name":"template-system-table.md","path":"docs/_description_templates/template-system-table.md","contentType":"file"}],"totalCount":7},"docs":{"items":[{"name":"_description_templates","path":"docs/_description_templates","contentType":"directory"},{"name":"_includes","path":"docs/_includes","contentType":"directory"},{"name":"changelogs","path":"docs/changelogs","contentType":"directory"},{"name":"en","path":"docs/en","contentType":"directory"},{"name":"ru","path":"docs/ru","contentType":"directory"},{"name":"tools","path":"docs/tools","contentType":"directory"},{"name":"zh","path":"docs/zh","contentType":"directory"},{"name":".gitignore","path":"docs/.gitignore","contentType":"file"},{"name":"README.md","path":"docs/README.md","contentType":"file"},{"name":"clean","path":"docs/clean","contentType":"file"},{"name":"mkdocs.yml","path":"docs/mkdocs.yml","contentType":"file"},{"name":"redirects.txt","path":"docs/redirects.txt","contentType":"file"}],"totalCount":12},"":{"items":[{"name":".github","path":".github","contentType":"directory"},{"name":"base","path":"base","contentType":"directory"},{"name":"benchmark","path":"benchmark","contentType":"directory"},{"name":"cmake","path":"cmake","contentType":"directory"},{"name":"contrib","path":"contrib","contentType":"directory"},{"name":"docker","path":"docker","contentType":"directory"},{"name":"docs","path":"docs","contentType":"directory"},{"name":"packages","path":"packages","contentType":"directory"},{"name":"programs","path":"programs","contentType":"directory"},{"name":"src","path":"src","contentType":"directory"},{"name":"tests","path":"tests","contentType":"directory"},{"name":"utils","path":"utils","contentType":"directory"},{"name":"website","path":"website","contentType":"directory"},{"name":".clang-format","path":".clang-format","contentType":"file"},{"name":".clang-tidy","path":".clang-tidy","contentType":"file"},{"name":".editorconfig","path":".editorconfig","contentType":"file"},{"name":".gitattributes","path":".gitattributes","contentType":"file"},{"name":".gitignore","path":".gitignore","contentType":"file"},{"name":".gitmodules","path":".gitmodules","contentType":"file"},{"name":".pylintrc","path":".pylintrc","contentType":"file"},{"name":".vimrc","path":".vimrc","contentType":"file"},{"name":".yamllint","path":".yamllint","contentType":"file"},{"name":"AUTHORS","path":"AUTHORS","contentType":"file"},{"name":"CHANGELOG.md","path":"CHANGELOG.md","contentType":"file"},{"name":"CMakeLists.txt","path":"CMakeLists.txt","contentType":"file"},{"name":"CODE_OF_CONDUCT.md","path":"CODE_OF_CONDUCT.md","contentType":"file"},{"name":"CONTRIBUTING.md","path":"CONTRIBUTING.md","contentType":"file"},{"name":"LICENSE","path":"LICENSE","contentType":"file"},{"name":"PreLoad.cmake","path":"PreLoad.cmake","contentType":"file"},{"name":"README.md","path":"README.md","contentType":"file"},{"name":"SECURITY.md","path":"SECURITY.md","contentType":"file"},{"name":"format_sources","path":"format_sources","contentType":"file"}],"totalCount":32}},"fileTreeProcessingTime":7.7234929999999995,"foldersToFetch":[],"reducedMotionEnabled":"system","repo":{"id":60246359,"defaultBranch":"master","name":"ClickHouse","ownerLogin":"ClickHouse","currentUserCanPush":true,"isFork":false,"isEmpty":false,"createdAt":"2016-06-02T04:28:18.000-04:00","ownerAvatar":"https://avatars.githubusercontent.com/u/54801242?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"5f18640215159ed1ad50be1efce2cb996a49fd73","listCacheKey":"v0:1688403108.0","canEdit":false,"refType":"tree","currentOid":"5f18640215159ed1ad50be1efce2cb996a49fd73"},"path":"docs/_description_templates/template-server-setting.md","currentUser":{"id":25182304,"login":"DanRoscigno","userEmail":"dan@roscigno.com"},"blob":{"rawBlob":null,"colorizedLines":null,"stylingDirectives":null,"csv":null,"csvError":null,"dependabotInfo":{"showConfigurationBanner":null,"configFilePath":null,"networkDependabotPath":"/ClickHouse/ClickHouse/network/updates","dismissConfigurationNoticePath":"/settings/dismiss-notice/dependabot_configuration_notice","configurationNoticeDismissed":false,"repoAlertsPath":"/ClickHouse/ClickHouse/security/dependabot","repoSecurityAndAnalysisPath":"/ClickHouse/ClickHouse/settings/security_analysis","repoOwnerIsOrg":true,"currentUserCanAdminRepo":false},"displayName":"template-server-setting.md","displayUrl":"https://github.com/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-server-setting.md?raw=true","headerInfo":{"blobSize":"629 Bytes","deleteInfo":{"deletePath":null,"deleteTooltip":"You must be on a branch to make or propose changes to this file"},"editInfo":{"editTooltip":"You must be on a branch to make or propose changes to this file"},"ghDesktopPath":null,"gitLfsPath":null,"onBranch":false,"shortPath":"0b37d46","siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FClickHouse%2FClickHouse%2Fblob%2F5f18640215159ed1ad50be1efce2cb996a49fd73%2Fdocs%2F_description_templates%2Ftemplate-server-setting.md","isCSV":false,"isRichtext":true,"toc":[{"level":2,"text":"server_setting_name {#server_setting_name}","anchor":"server_setting_name-server_setting_name","htmlText":"server_setting_name {#server_setting_name}"}],"lineInfo":{"truncatedLoc":"33","truncatedSloc":"20"},"mode":"file"},"image":false,"isCodeownersFile":null,"isValidLegacyIssueTemplate":false,"issueTemplateHelpUrl":"https://docs.github.com/articles/about-issue-and-pull-request-templates","issueTemplate":null,"discussionTemplate":null,"language":"Markdown","large":false,"loggedIn":true,"newDiscussionPath":"/ClickHouse/ClickHouse/discussions/new","newIssuePath":"/ClickHouse/ClickHouse/issues/new","planSupportInfo":{"repoIsFork":null,"repoOwnedByCurrentUser":null,"requestFullPath":"/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-server-setting.md","showFreeOrgGatedFeatureMessage":null,"showPlanSupportBanner":null,"upgradeDataAttributes":null,"upgradePath":null},"publishBannersInfo":{"dismissActionNoticePath":"/settings/dismiss-notice/publish_action_from_dockerfile","dismissStackNoticePath":"/settings/dismiss-notice/publish_stack_from_file","releasePath":"/ClickHouse/ClickHouse/releases/new?marketplace=true","showPublishActionBanner":false,"showPublishStackBanner":false},"renderImageOrRaw":false,"richText":"

server_setting_name {#server_setting_name}

\n

Description.

\n

Describe what is configured in this section of settings.

\n

Possible value: ...

\n

Default value: ...

\n

Settings (Optional)

\n

If the section contains several settings, list them here. Specify possible values and default values:

\n
    \n
  • setting_1 — Description.
  • \n
  • setting_2 — Description.
  • \n
\n

Example

\n
<server_setting_name>\n    <setting_1> ... </setting_1>\n    <setting_2> ... </setting_2>\n</server_setting_name>
\n

Additional Info (Optional)

\n

The name of an additional section can be any, for example, Usage.

\n

See Also (Optional)

\n\n
","renderedFileInfo":null,"tabSize":8,"topBannersInfo":{"overridingGlobalFundingFile":false,"globalPreferredFundingPath":null,"repoOwner":"ClickHouse","repoName":"ClickHouse","showInvalidCitationWarning":false,"citationHelpUrl":"https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-on-github/about-citation-files","showDependabotConfigurationBanner":null,"actionsOnboardingTip":null},"truncated":false,"viewable":true,"workflowRedirectUrl":null,"symbols":{"timedOut":false,"notAnalyzed":true,"symbols":[]}},"csrf_tokens":{"/ClickHouse/ClickHouse/branches":{"post":"a6lijUe_lOiTRf_mzmSRAi4tMYD_1HdAhwqytaKk2q8WVWzMl0_pO4NPuTgnV7bEDlvJRWPbUudasZ-96xnpbQ"}}},"title":"ClickHouse/docs/_description_templates/template-server-setting.md at 5f18640215159ed1ad50be1efce2cb996a49fd73 · ClickHouse/ClickHouse","locale":"en"} \ No newline at end of file diff --git a/docs/_description_templates/template-setting.md b/docs/_description_templates/template-setting.md new file mode 100644 index 00000000000..df51a7e8241 --- /dev/null +++ b/docs/_description_templates/template-setting.md @@ -0,0 +1 @@ +{"payload":{"allShortcutsEnabled":true,"fileTree":{"docs/_description_templates":{"items":[{"name":"template-data-type.md","path":"docs/_description_templates/template-data-type.md","contentType":"file"},{"name":"template-engine.md","path":"docs/_description_templates/template-engine.md","contentType":"file"},{"name":"template-function.md","path":"docs/_description_templates/template-function.md","contentType":"file"},{"name":"template-server-setting.md","path":"docs/_description_templates/template-server-setting.md","contentType":"file"},{"name":"template-setting.md","path":"docs/_description_templates/template-setting.md","contentType":"file"},{"name":"template-statement.md","path":"docs/_description_templates/template-statement.md","contentType":"file"},{"name":"template-system-table.md","path":"docs/_description_templates/template-system-table.md","contentType":"file"}],"totalCount":7},"docs":{"items":[{"name":"_description_templates","path":"docs/_description_templates","contentType":"directory"},{"name":"_includes","path":"docs/_includes","contentType":"directory"},{"name":"changelogs","path":"docs/changelogs","contentType":"directory"},{"name":"en","path":"docs/en","contentType":"directory"},{"name":"ru","path":"docs/ru","contentType":"directory"},{"name":"tools","path":"docs/tools","contentType":"directory"},{"name":"zh","path":"docs/zh","contentType":"directory"},{"name":".gitignore","path":"docs/.gitignore","contentType":"file"},{"name":"README.md","path":"docs/README.md","contentType":"file"},{"name":"clean","path":"docs/clean","contentType":"file"},{"name":"mkdocs.yml","path":"docs/mkdocs.yml","contentType":"file"},{"name":"redirects.txt","path":"docs/redirects.txt","contentType":"file"}],"totalCount":12},"":{"items":[{"name":".github","path":".github","contentType":"directory"},{"name":"base","path":"base","contentType":"directory"},{"name":"benchmark","path":"benchmark","contentType":"directory"},{"name":"cmake","path":"cmake","contentType":"directory"},{"name":"contrib","path":"contrib","contentType":"directory"},{"name":"docker","path":"docker","contentType":"directory"},{"name":"docs","path":"docs","contentType":"directory"},{"name":"packages","path":"packages","contentType":"directory"},{"name":"programs","path":"programs","contentType":"directory"},{"name":"src","path":"src","contentType":"directory"},{"name":"tests","path":"tests","contentType":"directory"},{"name":"utils","path":"utils","contentType":"directory"},{"name":"website","path":"website","contentType":"directory"},{"name":".clang-format","path":".clang-format","contentType":"file"},{"name":".clang-tidy","path":".clang-tidy","contentType":"file"},{"name":".editorconfig","path":".editorconfig","contentType":"file"},{"name":".gitattributes","path":".gitattributes","contentType":"file"},{"name":".gitignore","path":".gitignore","contentType":"file"},{"name":".gitmodules","path":".gitmodules","contentType":"file"},{"name":".pylintrc","path":".pylintrc","contentType":"file"},{"name":".vimrc","path":".vimrc","contentType":"file"},{"name":".yamllint","path":".yamllint","contentType":"file"},{"name":"AUTHORS","path":"AUTHORS","contentType":"file"},{"name":"CHANGELOG.md","path":"CHANGELOG.md","contentType":"file"},{"name":"CMakeLists.txt","path":"CMakeLists.txt","contentType":"file"},{"name":"CODE_OF_CONDUCT.md","path":"CODE_OF_CONDUCT.md","contentType":"file"},{"name":"CONTRIBUTING.md","path":"CONTRIBUTING.md","contentType":"file"},{"name":"LICENSE","path":"LICENSE","contentType":"file"},{"name":"PreLoad.cmake","path":"PreLoad.cmake","contentType":"file"},{"name":"README.md","path":"README.md","contentType":"file"},{"name":"SECURITY.md","path":"SECURITY.md","contentType":"file"},{"name":"format_sources","path":"format_sources","contentType":"file"}],"totalCount":32}},"fileTreeProcessingTime":9.96412,"foldersToFetch":[],"reducedMotionEnabled":"system","repo":{"id":60246359,"defaultBranch":"master","name":"ClickHouse","ownerLogin":"ClickHouse","currentUserCanPush":true,"isFork":false,"isEmpty":false,"createdAt":"2016-06-02T04:28:18.000-04:00","ownerAvatar":"https://avatars.githubusercontent.com/u/54801242?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"5f18640215159ed1ad50be1efce2cb996a49fd73","listCacheKey":"v0:1688403108.0","canEdit":false,"refType":"tree","currentOid":"5f18640215159ed1ad50be1efce2cb996a49fd73"},"path":"docs/_description_templates/template-setting.md","currentUser":{"id":25182304,"login":"DanRoscigno","userEmail":"dan@roscigno.com"},"blob":{"rawBlob":null,"colorizedLines":null,"stylingDirectives":null,"csv":null,"csvError":null,"dependabotInfo":{"showConfigurationBanner":null,"configFilePath":null,"networkDependabotPath":"/ClickHouse/ClickHouse/network/updates","dismissConfigurationNoticePath":"/settings/dismiss-notice/dependabot_configuration_notice","configurationNoticeDismissed":false,"repoAlertsPath":"/ClickHouse/ClickHouse/security/dependabot","repoSecurityAndAnalysisPath":"/ClickHouse/ClickHouse/settings/security_analysis","repoOwnerIsOrg":true,"currentUserCanAdminRepo":false},"displayName":"template-setting.md","displayUrl":"https://github.com/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-setting.md?raw=true","headerInfo":{"blobSize":"503 Bytes","deleteInfo":{"deletePath":null,"deleteTooltip":"You must be on a branch to make or propose changes to this file"},"editInfo":{"editTooltip":"You must be on a branch to make or propose changes to this file"},"ghDesktopPath":null,"gitLfsPath":null,"onBranch":false,"shortPath":"fc912ab","siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FClickHouse%2FClickHouse%2Fblob%2F5f18640215159ed1ad50be1efce2cb996a49fd73%2Fdocs%2F_description_templates%2Ftemplate-setting.md","isCSV":false,"isRichtext":true,"toc":[{"level":2,"text":"setting_name {#setting_name}","anchor":"setting_name-setting_name","htmlText":"setting_name {#setting_name}"}],"lineInfo":{"truncatedLoc":"27","truncatedSloc":"15"},"mode":"file"},"image":false,"isCodeownersFile":null,"isValidLegacyIssueTemplate":false,"issueTemplateHelpUrl":"https://docs.github.com/articles/about-issue-and-pull-request-templates","issueTemplate":null,"discussionTemplate":null,"language":"Markdown","large":false,"loggedIn":true,"newDiscussionPath":"/ClickHouse/ClickHouse/discussions/new","newIssuePath":"/ClickHouse/ClickHouse/issues/new","planSupportInfo":{"repoIsFork":null,"repoOwnedByCurrentUser":null,"requestFullPath":"/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-setting.md","showFreeOrgGatedFeatureMessage":null,"showPlanSupportBanner":null,"upgradeDataAttributes":null,"upgradePath":null},"publishBannersInfo":{"dismissActionNoticePath":"/settings/dismiss-notice/publish_action_from_dockerfile","dismissStackNoticePath":"/settings/dismiss-notice/publish_stack_from_file","releasePath":"/ClickHouse/ClickHouse/releases/new?marketplace=true","showPublishActionBanner":false,"showPublishStackBanner":false},"renderImageOrRaw":false,"richText":"

setting_name {#setting_name}

\n

Description.

\n

For the switch setting, use the typical phrase: “Enables or disables something …”.

\n

Possible values:

\n

For switcher setting:

\n
    \n
  • 0 — Disabled.
  • \n
  • 1 — Enabled.
  • \n
\n

For another setting (typical phrases):

\n
    \n
  • Positive integer.
  • \n
  • 0 — Disabled or unlimited or something else.
  • \n
\n

Default value: value.

\n

Additional Info (Optional)

\n

The name of an additional section can be any, for example, Usage.

\n

See Also (Optional)

\n\n
","renderedFileInfo":null,"tabSize":8,"topBannersInfo":{"overridingGlobalFundingFile":false,"globalPreferredFundingPath":null,"repoOwner":"ClickHouse","repoName":"ClickHouse","showInvalidCitationWarning":false,"citationHelpUrl":"https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-on-github/about-citation-files","showDependabotConfigurationBanner":null,"actionsOnboardingTip":null},"truncated":false,"viewable":true,"workflowRedirectUrl":null,"symbols":{"timedOut":false,"notAnalyzed":true,"symbols":[]}},"csrf_tokens":{"/ClickHouse/ClickHouse/branches":{"post":"gFv6-keA6Z1eYZATbMVq8_sVPc-l2b8pTaH03K1R_zf9p_S7l3CUTk5r1s2F9k0122PFCjnWmo6QGtnU5OzM9Q"}}},"title":"ClickHouse/docs/_description_templates/template-setting.md at 5f18640215159ed1ad50be1efce2cb996a49fd73 · ClickHouse/ClickHouse","locale":"en"} \ No newline at end of file diff --git a/docs/_description_templates/template-statement.md b/docs/_description_templates/template-statement.md new file mode 100644 index 00000000000..b5ae0d9b26b --- /dev/null +++ b/docs/_description_templates/template-statement.md @@ -0,0 +1 @@ +{"payload":{"allShortcutsEnabled":true,"fileTree":{"docs/_description_templates":{"items":[{"name":"template-data-type.md","path":"docs/_description_templates/template-data-type.md","contentType":"file"},{"name":"template-engine.md","path":"docs/_description_templates/template-engine.md","contentType":"file"},{"name":"template-function.md","path":"docs/_description_templates/template-function.md","contentType":"file"},{"name":"template-server-setting.md","path":"docs/_description_templates/template-server-setting.md","contentType":"file"},{"name":"template-setting.md","path":"docs/_description_templates/template-setting.md","contentType":"file"},{"name":"template-statement.md","path":"docs/_description_templates/template-statement.md","contentType":"file"},{"name":"template-system-table.md","path":"docs/_description_templates/template-system-table.md","contentType":"file"}],"totalCount":7},"docs":{"items":[{"name":"_description_templates","path":"docs/_description_templates","contentType":"directory"},{"name":"_includes","path":"docs/_includes","contentType":"directory"},{"name":"changelogs","path":"docs/changelogs","contentType":"directory"},{"name":"en","path":"docs/en","contentType":"directory"},{"name":"ru","path":"docs/ru","contentType":"directory"},{"name":"tools","path":"docs/tools","contentType":"directory"},{"name":"zh","path":"docs/zh","contentType":"directory"},{"name":".gitignore","path":"docs/.gitignore","contentType":"file"},{"name":"README.md","path":"docs/README.md","contentType":"file"},{"name":"clean","path":"docs/clean","contentType":"file"},{"name":"mkdocs.yml","path":"docs/mkdocs.yml","contentType":"file"},{"name":"redirects.txt","path":"docs/redirects.txt","contentType":"file"}],"totalCount":12},"":{"items":[{"name":".github","path":".github","contentType":"directory"},{"name":"base","path":"base","contentType":"directory"},{"name":"benchmark","path":"benchmark","contentType":"directory"},{"name":"cmake","path":"cmake","contentType":"directory"},{"name":"contrib","path":"contrib","contentType":"directory"},{"name":"docker","path":"docker","contentType":"directory"},{"name":"docs","path":"docs","contentType":"directory"},{"name":"packages","path":"packages","contentType":"directory"},{"name":"programs","path":"programs","contentType":"directory"},{"name":"src","path":"src","contentType":"directory"},{"name":"tests","path":"tests","contentType":"directory"},{"name":"utils","path":"utils","contentType":"directory"},{"name":"website","path":"website","contentType":"directory"},{"name":".clang-format","path":".clang-format","contentType":"file"},{"name":".clang-tidy","path":".clang-tidy","contentType":"file"},{"name":".editorconfig","path":".editorconfig","contentType":"file"},{"name":".gitattributes","path":".gitattributes","contentType":"file"},{"name":".gitignore","path":".gitignore","contentType":"file"},{"name":".gitmodules","path":".gitmodules","contentType":"file"},{"name":".pylintrc","path":".pylintrc","contentType":"file"},{"name":".vimrc","path":".vimrc","contentType":"file"},{"name":".yamllint","path":".yamllint","contentType":"file"},{"name":"AUTHORS","path":"AUTHORS","contentType":"file"},{"name":"CHANGELOG.md","path":"CHANGELOG.md","contentType":"file"},{"name":"CMakeLists.txt","path":"CMakeLists.txt","contentType":"file"},{"name":"CODE_OF_CONDUCT.md","path":"CODE_OF_CONDUCT.md","contentType":"file"},{"name":"CONTRIBUTING.md","path":"CONTRIBUTING.md","contentType":"file"},{"name":"LICENSE","path":"LICENSE","contentType":"file"},{"name":"PreLoad.cmake","path":"PreLoad.cmake","contentType":"file"},{"name":"README.md","path":"README.md","contentType":"file"},{"name":"SECURITY.md","path":"SECURITY.md","contentType":"file"},{"name":"format_sources","path":"format_sources","contentType":"file"}],"totalCount":32}},"fileTreeProcessingTime":9.954742000000001,"foldersToFetch":[],"reducedMotionEnabled":"system","repo":{"id":60246359,"defaultBranch":"master","name":"ClickHouse","ownerLogin":"ClickHouse","currentUserCanPush":true,"isFork":false,"isEmpty":false,"createdAt":"2016-06-02T04:28:18.000-04:00","ownerAvatar":"https://avatars.githubusercontent.com/u/54801242?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"5f18640215159ed1ad50be1efce2cb996a49fd73","listCacheKey":"v0:1688403108.0","canEdit":false,"refType":"tree","currentOid":"5f18640215159ed1ad50be1efce2cb996a49fd73"},"path":"docs/_description_templates/template-statement.md","currentUser":{"id":25182304,"login":"DanRoscigno","userEmail":"dan@roscigno.com"},"blob":{"rawBlob":null,"colorizedLines":null,"stylingDirectives":null,"csv":null,"csvError":null,"dependabotInfo":{"showConfigurationBanner":null,"configFilePath":null,"networkDependabotPath":"/ClickHouse/ClickHouse/network/updates","dismissConfigurationNoticePath":"/settings/dismiss-notice/dependabot_configuration_notice","configurationNoticeDismissed":false,"repoAlertsPath":"/ClickHouse/ClickHouse/security/dependabot","repoSecurityAndAnalysisPath":"/ClickHouse/ClickHouse/settings/security_analysis","repoOwnerIsOrg":true,"currentUserCanAdminRepo":false},"displayName":"template-statement.md","displayUrl":"https://github.com/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-statement.md?raw=true","headerInfo":{"blobSize":"575 Bytes","deleteInfo":{"deletePath":null,"deleteTooltip":"You must be on a branch to make or propose changes to this file"},"editInfo":{"editTooltip":"You must be on a branch to make or propose changes to this file"},"ghDesktopPath":null,"gitLfsPath":null,"onBranch":false,"shortPath":"238570c","siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FClickHouse%2FClickHouse%2Fblob%2F5f18640215159ed1ad50be1efce2cb996a49fd73%2Fdocs%2F_description_templates%2Ftemplate-statement.md","isCSV":false,"isRichtext":true,"toc":[{"level":1,"text":"Statement name (for example, SHOW USER) {#statement-name-in-lower-case}","anchor":"statement-name-for-example-show-user-statement-name-in-lower-case","htmlText":"Statement name (for example, SHOW USER) {#statement-name-in-lower-case}"},{"level":2,"text":"Other necessary sections of the description (Optional) {#anchor}","anchor":"other-necessary-sections-of-the-description-optional-anchor","htmlText":"Other necessary sections of the description (Optional) {#anchor}"}],"lineInfo":{"truncatedLoc":"24","truncatedSloc":"14"},"mode":"file"},"image":false,"isCodeownersFile":null,"isValidLegacyIssueTemplate":false,"issueTemplateHelpUrl":"https://docs.github.com/articles/about-issue-and-pull-request-templates","issueTemplate":null,"discussionTemplate":null,"language":"Markdown","large":false,"loggedIn":true,"newDiscussionPath":"/ClickHouse/ClickHouse/discussions/new","newIssuePath":"/ClickHouse/ClickHouse/issues/new","planSupportInfo":{"repoIsFork":null,"repoOwnedByCurrentUser":null,"requestFullPath":"/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-statement.md","showFreeOrgGatedFeatureMessage":null,"showPlanSupportBanner":null,"upgradeDataAttributes":null,"upgradePath":null},"publishBannersInfo":{"dismissActionNoticePath":"/settings/dismiss-notice/publish_action_from_dockerfile","dismissStackNoticePath":"/settings/dismiss-notice/publish_stack_from_file","releasePath":"/ClickHouse/ClickHouse/releases/new?marketplace=true","showPublishActionBanner":false,"showPublishStackBanner":false},"renderImageOrRaw":false,"richText":"

Statement name (for example, SHOW USER) {#statement-name-in-lower-case}

\n

Brief description of what the statement does.

\n

Syntax

\n
Syntax of the statement.
\n

Other necessary sections of the description (Optional) {#anchor}

\n

Examples of descriptions with a complicated structure:

\n\n

See Also (Optional)

\n

Links to related topics as a list.

\n\n
","renderedFileInfo":null,"tabSize":8,"topBannersInfo":{"overridingGlobalFundingFile":false,"globalPreferredFundingPath":null,"repoOwner":"ClickHouse","repoName":"ClickHouse","showInvalidCitationWarning":false,"citationHelpUrl":"https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-on-github/about-citation-files","showDependabotConfigurationBanner":null,"actionsOnboardingTip":null},"truncated":false,"viewable":true,"workflowRedirectUrl":null,"symbols":{"timedOut":false,"notAnalyzed":true,"symbols":[]}},"csrf_tokens":{"/ClickHouse/ClickHouse/branches":{"post":"xohFK8TmjwjnIXPER5IKigBaxPXoFkjI0tOrZrMZen-7dEtqFBby2_crNRquoS1MICw8MHQZbW8PaIZu-qRJvQ"}}},"title":"ClickHouse/docs/_description_templates/template-statement.md at 5f18640215159ed1ad50be1efce2cb996a49fd73 · ClickHouse/ClickHouse","locale":"en"} \ No newline at end of file diff --git a/docs/_description_templates/template-system-table.md b/docs/_description_templates/template-system-table.md new file mode 100644 index 00000000000..02d622a52cf --- /dev/null +++ b/docs/_description_templates/template-system-table.md @@ -0,0 +1 @@ +{"payload":{"allShortcutsEnabled":true,"fileTree":{"docs/_description_templates":{"items":[{"name":"template-data-type.md","path":"docs/_description_templates/template-data-type.md","contentType":"file"},{"name":"template-engine.md","path":"docs/_description_templates/template-engine.md","contentType":"file"},{"name":"template-function.md","path":"docs/_description_templates/template-function.md","contentType":"file"},{"name":"template-server-setting.md","path":"docs/_description_templates/template-server-setting.md","contentType":"file"},{"name":"template-setting.md","path":"docs/_description_templates/template-setting.md","contentType":"file"},{"name":"template-statement.md","path":"docs/_description_templates/template-statement.md","contentType":"file"},{"name":"template-system-table.md","path":"docs/_description_templates/template-system-table.md","contentType":"file"}],"totalCount":7},"docs":{"items":[{"name":"_description_templates","path":"docs/_description_templates","contentType":"directory"},{"name":"_includes","path":"docs/_includes","contentType":"directory"},{"name":"changelogs","path":"docs/changelogs","contentType":"directory"},{"name":"en","path":"docs/en","contentType":"directory"},{"name":"ru","path":"docs/ru","contentType":"directory"},{"name":"tools","path":"docs/tools","contentType":"directory"},{"name":"zh","path":"docs/zh","contentType":"directory"},{"name":".gitignore","path":"docs/.gitignore","contentType":"file"},{"name":"README.md","path":"docs/README.md","contentType":"file"},{"name":"clean","path":"docs/clean","contentType":"file"},{"name":"mkdocs.yml","path":"docs/mkdocs.yml","contentType":"file"},{"name":"redirects.txt","path":"docs/redirects.txt","contentType":"file"}],"totalCount":12},"":{"items":[{"name":".github","path":".github","contentType":"directory"},{"name":"base","path":"base","contentType":"directory"},{"name":"benchmark","path":"benchmark","contentType":"directory"},{"name":"cmake","path":"cmake","contentType":"directory"},{"name":"contrib","path":"contrib","contentType":"directory"},{"name":"docker","path":"docker","contentType":"directory"},{"name":"docs","path":"docs","contentType":"directory"},{"name":"packages","path":"packages","contentType":"directory"},{"name":"programs","path":"programs","contentType":"directory"},{"name":"src","path":"src","contentType":"directory"},{"name":"tests","path":"tests","contentType":"directory"},{"name":"utils","path":"utils","contentType":"directory"},{"name":"website","path":"website","contentType":"directory"},{"name":".clang-format","path":".clang-format","contentType":"file"},{"name":".clang-tidy","path":".clang-tidy","contentType":"file"},{"name":".editorconfig","path":".editorconfig","contentType":"file"},{"name":".gitattributes","path":".gitattributes","contentType":"file"},{"name":".gitignore","path":".gitignore","contentType":"file"},{"name":".gitmodules","path":".gitmodules","contentType":"file"},{"name":".pylintrc","path":".pylintrc","contentType":"file"},{"name":".vimrc","path":".vimrc","contentType":"file"},{"name":".yamllint","path":".yamllint","contentType":"file"},{"name":"AUTHORS","path":"AUTHORS","contentType":"file"},{"name":"CHANGELOG.md","path":"CHANGELOG.md","contentType":"file"},{"name":"CMakeLists.txt","path":"CMakeLists.txt","contentType":"file"},{"name":"CODE_OF_CONDUCT.md","path":"CODE_OF_CONDUCT.md","contentType":"file"},{"name":"CONTRIBUTING.md","path":"CONTRIBUTING.md","contentType":"file"},{"name":"LICENSE","path":"LICENSE","contentType":"file"},{"name":"PreLoad.cmake","path":"PreLoad.cmake","contentType":"file"},{"name":"README.md","path":"README.md","contentType":"file"},{"name":"SECURITY.md","path":"SECURITY.md","contentType":"file"},{"name":"format_sources","path":"format_sources","contentType":"file"}],"totalCount":32}},"fileTreeProcessingTime":8.697185,"foldersToFetch":[],"reducedMotionEnabled":"system","repo":{"id":60246359,"defaultBranch":"master","name":"ClickHouse","ownerLogin":"ClickHouse","currentUserCanPush":true,"isFork":false,"isEmpty":false,"createdAt":"2016-06-02T04:28:18.000-04:00","ownerAvatar":"https://avatars.githubusercontent.com/u/54801242?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"5f18640215159ed1ad50be1efce2cb996a49fd73","listCacheKey":"v0:1688403108.0","canEdit":false,"refType":"tree","currentOid":"5f18640215159ed1ad50be1efce2cb996a49fd73"},"path":"docs/_description_templates/template-system-table.md","currentUser":{"id":25182304,"login":"DanRoscigno","userEmail":"dan@roscigno.com"},"blob":{"rawBlob":null,"colorizedLines":null,"stylingDirectives":null,"csv":null,"csvError":null,"dependabotInfo":{"showConfigurationBanner":null,"configFilePath":null,"networkDependabotPath":"/ClickHouse/ClickHouse/network/updates","dismissConfigurationNoticePath":"/settings/dismiss-notice/dependabot_configuration_notice","configurationNoticeDismissed":false,"repoAlertsPath":"/ClickHouse/ClickHouse/security/dependabot","repoSecurityAndAnalysisPath":"/ClickHouse/ClickHouse/settings/security_analysis","repoOwnerIsOrg":true,"currentUserCanAdminRepo":false},"displayName":"template-system-table.md","displayUrl":"https://github.com/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-system-table.md?raw=true","headerInfo":{"blobSize":"381 Bytes","deleteInfo":{"deletePath":null,"deleteTooltip":"You must be on a branch to make or propose changes to this file"},"editInfo":{"editTooltip":"You must be on a branch to make or propose changes to this file"},"ghDesktopPath":null,"gitLfsPath":null,"onBranch":false,"shortPath":"f2decc4","siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FClickHouse%2FClickHouse%2Fblob%2F5f18640215159ed1ad50be1efce2cb996a49fd73%2Fdocs%2F_description_templates%2Ftemplate-system-table.md","isCSV":false,"isRichtext":true,"toc":[{"level":1,"text":"system.table_name {#system-tables_table-name}","anchor":"systemtable_name-system-tables_table-name","htmlText":"system.table_name {#system-tables_table-name}"}],"lineInfo":{"truncatedLoc":"25","truncatedSloc":"15"},"mode":"file"},"image":false,"isCodeownersFile":null,"isValidLegacyIssueTemplate":false,"issueTemplateHelpUrl":"https://docs.github.com/articles/about-issue-and-pull-request-templates","issueTemplate":null,"discussionTemplate":null,"language":"Markdown","large":false,"loggedIn":true,"newDiscussionPath":"/ClickHouse/ClickHouse/discussions/new","newIssuePath":"/ClickHouse/ClickHouse/issues/new","planSupportInfo":{"repoIsFork":null,"repoOwnedByCurrentUser":null,"requestFullPath":"/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-system-table.md","showFreeOrgGatedFeatureMessage":null,"showPlanSupportBanner":null,"upgradeDataAttributes":null,"upgradePath":null},"publishBannersInfo":{"dismissActionNoticePath":"/settings/dismiss-notice/publish_action_from_dockerfile","dismissStackNoticePath":"/settings/dismiss-notice/publish_stack_from_file","releasePath":"/ClickHouse/ClickHouse/releases/new?marketplace=true","showPublishActionBanner":false,"showPublishStackBanner":false},"renderImageOrRaw":false,"richText":"

system.table_name {#system-tables_table-name}

\n

Description.

\n

Columns:

\n\n

Example

\n

Query:

\n
SELECT * FROM system.table_name
\n

Result:

\n
Some output. It shouldn't be too long.\n
\n

See Also

\n
    \n
  • Article name — Some words about referenced information.
  • \n
\n
","renderedFileInfo":null,"tabSize":8,"topBannersInfo":{"overridingGlobalFundingFile":false,"globalPreferredFundingPath":null,"repoOwner":"ClickHouse","repoName":"ClickHouse","showInvalidCitationWarning":false,"citationHelpUrl":"https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-on-github/about-citation-files","showDependabotConfigurationBanner":null,"actionsOnboardingTip":null},"truncated":false,"viewable":true,"workflowRedirectUrl":null,"symbols":{"timedOut":false,"notAnalyzed":true,"symbols":[]}},"csrf_tokens":{"/ClickHouse/ClickHouse/branches":{"post":"9K5xHq3WERnrVDYaCTfS2yzVWybDSHIH0WqtxpxdRGaJUn9ffSZsyvtecMTgBPUdDKOj419HV6AM0YDO1eB3pA"}}},"title":"ClickHouse/docs/_description_templates/template-system-table.md at 5f18640215159ed1ad50be1efce2cb996a49fd73 · ClickHouse/ClickHouse","locale":"en"} \ No newline at end of file From 584b46c5ca358d077db30777a0050cf690166a08 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 30 Jun 2023 14:38:59 +0200 Subject: [PATCH 1132/1997] Add documentation for building in docker --- docs/en/development/build.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/en/development/build.md b/docs/en/development/build.md index 83a4550df88..ab47ee8aac5 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -13,6 +13,20 @@ Supported platforms: - AArch64 - Power9 (experimental) +## Building in docker +We use the docker image `clickhouse/binary-builder` for our CI builds. It contains everything necessary to build the binary and packages. There is a script `docker/packager/packager` to ease the image usage: + +```bash +# define a directory for the output artifacts +output_dir="build_results" +# a simplest build +./docker/packager/packager --package-type=binary --output-dir "$output_dir" +# build debian packages +./docker/packager/packager --package-type=deb --output-dir "$output_dir" +# by default, debian packages use thin LTO, so we can override it to speed up the build +CMAKE_FLAGS='-DENABLE_THINLTO=' ./docker/packager/packager --package-type=deb --output-dir "$output_dir" +``` + ## Building on Ubuntu The following tutorial is based on Ubuntu Linux. From c715ee5cef3646e9ce71a26c46d72fb91ff3d551 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 3 Jul 2023 17:28:48 +0200 Subject: [PATCH 1133/1997] Replace `--build-type=debug` by `--debug-build` --- docker/packager/README.md | 2 +- docker/packager/packager | 16 ++++++++-------- tests/ci/build_check.py | 6 +++--- tests/ci/build_report_check.py | 4 ++-- tests/ci/ci_config.py | 34 +++++++++++++++++----------------- tests/ci/report.py | 6 +++--- 6 files changed, 34 insertions(+), 34 deletions(-) diff --git a/docker/packager/README.md b/docker/packager/README.md index a78feb8d7fc..3a91f9a63f0 100644 --- a/docker/packager/README.md +++ b/docker/packager/README.md @@ -6,7 +6,7 @@ Usage: Build deb package with `clang-14` in `debug` mode: ``` $ mkdir deb/test_output -$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --build-type=debug +$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --debug-build $ ls -l deb/test_output -rw-r--r-- 1 root root 3730 clickhouse-client_22.2.2+debug_all.deb -rw-r--r-- 1 root root 84221888 clickhouse-common-static_22.2.2+debug_amd64.deb diff --git a/docker/packager/packager b/docker/packager/packager index 1b3df858cd2..3c3304165b3 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -112,12 +112,12 @@ def run_docker_image_with_env( subprocess.check_call(cmd, shell=True) -def is_release_build(build_type: str, package_type: str, sanitizer: str) -> bool: - return build_type == "" and package_type == "deb" and sanitizer == "" +def is_release_build(debug_build: bool, package_type: str, sanitizer: str) -> bool: + return not debug_build and package_type == "deb" and sanitizer == "" def parse_env_variables( - build_type: str, + debug_build: bool, compiler: str, sanitizer: str, package_type: str, @@ -233,7 +233,7 @@ def parse_env_variables( build_target = ( f"{build_target} clickhouse-odbc-bridge clickhouse-library-bridge" ) - if is_release_build(build_type, package_type, sanitizer): + if is_release_build(debug_build, package_type, sanitizer): cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON") result.append("WITH_PERFORMANCE=1") if is_cross_arm: @@ -253,8 +253,8 @@ def parse_env_variables( if sanitizer: result.append(f"SANITIZER={sanitizer}") - if build_type: - result.append(f"BUILD_TYPE={build_type.capitalize()}") + if debug_build: + result.append("BUILD_TYPE=DEBUG") else: result.append("BUILD_TYPE=None") @@ -359,7 +359,7 @@ def parse_args() -> argparse.Namespace: help="ClickHouse git repository", ) parser.add_argument("--output-dir", type=dir_name, required=True) - parser.add_argument("--build-type", choices=("debug", ""), default="") + parser.add_argument("--debug-build", action="store_true") parser.add_argument( "--compiler", @@ -464,7 +464,7 @@ def main(): build_image(image_with_version, dockerfile) env_prepared = parse_env_variables( - args.build_type, + args.debug_build, args.compiler, args.sanitizer, args.package_type, diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 35b98a7c3bb..2a636faf967 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -45,7 +45,7 @@ def _can_export_binaries(build_config: BuildConfig) -> bool: return False if build_config["sanitizer"] != "": return True - if build_config["build_type"] != "": + if build_config["debug_build"]: return True return False @@ -66,8 +66,8 @@ def get_packager_cmd( f"--package-type={package_type} --compiler={comp}" ) - if build_config["build_type"]: - cmd += f" --build-type={build_config['build_type']}" + if build_config["debug_build"]: + cmd += " --debug-build" if build_config["sanitizer"]: cmd += f" --sanitizer={build_config['sanitizer']}" if build_config["tidy"] == "enable": diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 1362f3c8934..295b6cf9740 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -70,7 +70,7 @@ def get_failed_report( message = f"{job_name} failed" build_result = BuildResult( compiler="unknown", - build_type="unknown", + debug_build=False, sanitizer="unknown", status=message, elapsed_seconds=0, @@ -85,7 +85,7 @@ def process_report( build_config = build_report["build_config"] build_result = BuildResult( compiler=build_config["compiler"], - build_type=build_config["build_type"], + debug_build=build_config["debug_build"], sanitizer=build_config["sanitizer"], status="success" if build_report["status"] else "failure", elapsed_seconds=build_report["elapsed_seconds"], diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index c680b5810fc..875c5a3c8bd 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -10,7 +10,7 @@ CI_CONFIG = { "build_config": { "package_release": { "compiler": "clang-16", - "build_type": "", + "debug_build": False, "sanitizer": "", "package_type": "deb", "static_binary_name": "amd64", @@ -21,7 +21,7 @@ CI_CONFIG = { }, "coverity": { "compiler": "clang-16", - "build_type": "", + "debug_build": False, "sanitizer": "", "package_type": "coverity", "tidy": "disable", @@ -31,7 +31,7 @@ CI_CONFIG = { }, "package_aarch64": { "compiler": "clang-16-aarch64", - "build_type": "", + "debug_build": False, "sanitizer": "", "package_type": "deb", "static_binary_name": "aarch64", @@ -42,7 +42,7 @@ CI_CONFIG = { }, "package_asan": { "compiler": "clang-16", - "build_type": "", + "debug_build": False, "sanitizer": "address", "package_type": "deb", "tidy": "disable", @@ -51,7 +51,7 @@ CI_CONFIG = { }, "package_ubsan": { "compiler": "clang-16", - "build_type": "", + "debug_build": False, "sanitizer": "undefined", "package_type": "deb", "tidy": "disable", @@ -60,7 +60,7 @@ CI_CONFIG = { }, "package_tsan": { "compiler": "clang-16", - "build_type": "", + "debug_build": False, "sanitizer": "thread", "package_type": "deb", "tidy": "disable", @@ -69,7 +69,7 @@ CI_CONFIG = { }, "package_msan": { "compiler": "clang-16", - "build_type": "", + "debug_build": False, "sanitizer": "memory", "package_type": "deb", "tidy": "disable", @@ -78,7 +78,7 @@ CI_CONFIG = { }, "package_debug": { "compiler": "clang-16", - "build_type": "debug", + "debug_build": True, "sanitizer": "", "package_type": "deb", "tidy": "disable", @@ -87,7 +87,7 @@ CI_CONFIG = { }, "binary_release": { "compiler": "clang-16", - "build_type": "", + "debug_build": False, "sanitizer": "", "package_type": "binary", "tidy": "disable", @@ -96,7 +96,7 @@ CI_CONFIG = { }, "binary_tidy": { "compiler": "clang-16", - "build_type": "debug", + "debug_build": True, "sanitizer": "", "package_type": "binary", "static_binary_name": "debug-amd64", @@ -106,7 +106,7 @@ CI_CONFIG = { }, "binary_darwin": { "compiler": "clang-16-darwin", - "build_type": "", + "debug_build": False, "sanitizer": "", "package_type": "binary", "static_binary_name": "macos", @@ -116,7 +116,7 @@ CI_CONFIG = { }, "binary_aarch64": { "compiler": "clang-16-aarch64", - "build_type": "", + "debug_build": False, "sanitizer": "", "package_type": "binary", "tidy": "disable", @@ -125,7 +125,7 @@ CI_CONFIG = { }, "binary_aarch64_v80compat": { "compiler": "clang-16-aarch64-v80compat", - "build_type": "", + "debug_build": False, "sanitizer": "", "package_type": "binary", "static_binary_name": "aarch64v80compat", @@ -135,7 +135,7 @@ CI_CONFIG = { }, "binary_freebsd": { "compiler": "clang-16-freebsd", - "build_type": "", + "debug_build": False, "sanitizer": "", "package_type": "binary", "static_binary_name": "freebsd", @@ -145,7 +145,7 @@ CI_CONFIG = { }, "binary_darwin_aarch64": { "compiler": "clang-16-darwin-aarch64", - "build_type": "", + "debug_build": False, "sanitizer": "", "package_type": "binary", "static_binary_name": "macos-aarch64", @@ -155,7 +155,7 @@ CI_CONFIG = { }, "binary_ppc64le": { "compiler": "clang-16-ppc64le", - "build_type": "", + "debug_build": False, "sanitizer": "", "package_type": "binary", "static_binary_name": "powerpc64le", @@ -165,7 +165,7 @@ CI_CONFIG = { }, "binary_amd64_compat": { "compiler": "clang-16-amd64-compat", - "build_type": "", + "debug_build": False, "sanitizer": "", "package_type": "binary", "static_binary_name": "amd64compat", diff --git a/tests/ci/report.py b/tests/ci/report.py index a9014acec12..0f84fbcaeb2 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -239,7 +239,7 @@ def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestRes @dataclass class BuildResult: compiler: str - build_type: str + debug_build: bool sanitizer: str status: str elapsed_seconds: int @@ -484,8 +484,8 @@ def create_build_html_report( ): row = "" row += f"{build_result.compiler}" - if build_result.build_type: - row += f"{build_result.build_type}" + if build_result.debug_build: + row += "debug" else: row += "relwithdebuginfo" if build_result.sanitizer: From 5b85e1ce8aa6c79ac8cb74be492ed06060ec2e73 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 3 Jul 2023 18:09:08 +0000 Subject: [PATCH 1134/1997] Special build check fix --- src/Functions/initcap.cpp | 2 ++ src/Functions/initcapUTF8.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Functions/initcap.cpp b/src/Functions/initcap.cpp index 7d0749ecb12..5460ee06792 100644 --- a/src/Functions/initcap.cpp +++ b/src/Functions/initcap.cpp @@ -14,6 +14,8 @@ struct InitcapImpl ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { + if (data.empty()) + return; res_data.resize(data.size()); res_offsets.assign(offsets); array(data.data(), data.data() + data.size(), res_data.data()); diff --git a/src/Functions/initcapUTF8.cpp b/src/Functions/initcapUTF8.cpp index 333ebe266d3..076dcff6622 100644 --- a/src/Functions/initcapUTF8.cpp +++ b/src/Functions/initcapUTF8.cpp @@ -81,7 +81,7 @@ private: static void array(const UInt8 * src, const UInt8 * src_end, const ColumnString::Offsets & offsets, UInt8 * dst) { - auto offset_it = offsets.begin(); + const auto * offset_it = offsets.begin(); const UInt8 * begin = src; /// handle remaining symbols, row by row (to avoid influence of bad UTF8 symbols from one row, to another) From 4191e3eb952bdcc136da29213f8a74d895a12ee6 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 3 Jul 2023 14:15:20 -0400 Subject: [PATCH 1135/1997] add missing doc templates back --- .../template-data-type.md | 30 ++++++++- .../_description_templates/template-engine.md | 64 ++++++++++++++++++- .../template-function.md | 52 ++++++++++++++- .../template-server-setting.md | 34 +++++++++- .../template-setting.md | 28 +++++++- .../template-statement.md | 25 +++++++- .../template-system-table.md | 26 +++++++- 7 files changed, 252 insertions(+), 7 deletions(-) diff --git a/docs/_description_templates/template-data-type.md b/docs/_description_templates/template-data-type.md index b66d77c7a77..239edb2808b 100644 --- a/docs/_description_templates/template-data-type.md +++ b/docs/_description_templates/template-data-type.md @@ -1 +1,29 @@ -{"payload":{"allShortcutsEnabled":true,"fileTree":{"docs/_description_templates":{"items":[{"name":"template-data-type.md","path":"docs/_description_templates/template-data-type.md","contentType":"file"},{"name":"template-engine.md","path":"docs/_description_templates/template-engine.md","contentType":"file"},{"name":"template-function.md","path":"docs/_description_templates/template-function.md","contentType":"file"},{"name":"template-server-setting.md","path":"docs/_description_templates/template-server-setting.md","contentType":"file"},{"name":"template-setting.md","path":"docs/_description_templates/template-setting.md","contentType":"file"},{"name":"template-statement.md","path":"docs/_description_templates/template-statement.md","contentType":"file"},{"name":"template-system-table.md","path":"docs/_description_templates/template-system-table.md","contentType":"file"}],"totalCount":7},"docs":{"items":[{"name":"_description_templates","path":"docs/_description_templates","contentType":"directory"},{"name":"_includes","path":"docs/_includes","contentType":"directory"},{"name":"changelogs","path":"docs/changelogs","contentType":"directory"},{"name":"en","path":"docs/en","contentType":"directory"},{"name":"ru","path":"docs/ru","contentType":"directory"},{"name":"tools","path":"docs/tools","contentType":"directory"},{"name":"zh","path":"docs/zh","contentType":"directory"},{"name":".gitignore","path":"docs/.gitignore","contentType":"file"},{"name":"README.md","path":"docs/README.md","contentType":"file"},{"name":"clean","path":"docs/clean","contentType":"file"},{"name":"mkdocs.yml","path":"docs/mkdocs.yml","contentType":"file"},{"name":"redirects.txt","path":"docs/redirects.txt","contentType":"file"}],"totalCount":12},"":{"items":[{"name":".github","path":".github","contentType":"directory"},{"name":"base","path":"base","contentType":"directory"},{"name":"benchmark","path":"benchmark","contentType":"directory"},{"name":"cmake","path":"cmake","contentType":"directory"},{"name":"contrib","path":"contrib","contentType":"directory"},{"name":"docker","path":"docker","contentType":"directory"},{"name":"docs","path":"docs","contentType":"directory"},{"name":"packages","path":"packages","contentType":"directory"},{"name":"programs","path":"programs","contentType":"directory"},{"name":"src","path":"src","contentType":"directory"},{"name":"tests","path":"tests","contentType":"directory"},{"name":"utils","path":"utils","contentType":"directory"},{"name":"website","path":"website","contentType":"directory"},{"name":".clang-format","path":".clang-format","contentType":"file"},{"name":".clang-tidy","path":".clang-tidy","contentType":"file"},{"name":".editorconfig","path":".editorconfig","contentType":"file"},{"name":".gitattributes","path":".gitattributes","contentType":"file"},{"name":".gitignore","path":".gitignore","contentType":"file"},{"name":".gitmodules","path":".gitmodules","contentType":"file"},{"name":".pylintrc","path":".pylintrc","contentType":"file"},{"name":".vimrc","path":".vimrc","contentType":"file"},{"name":".yamllint","path":".yamllint","contentType":"file"},{"name":"AUTHORS","path":"AUTHORS","contentType":"file"},{"name":"CHANGELOG.md","path":"CHANGELOG.md","contentType":"file"},{"name":"CMakeLists.txt","path":"CMakeLists.txt","contentType":"file"},{"name":"CODE_OF_CONDUCT.md","path":"CODE_OF_CONDUCT.md","contentType":"file"},{"name":"CONTRIBUTING.md","path":"CONTRIBUTING.md","contentType":"file"},{"name":"LICENSE","path":"LICENSE","contentType":"file"},{"name":"PreLoad.cmake","path":"PreLoad.cmake","contentType":"file"},{"name":"README.md","path":"README.md","contentType":"file"},{"name":"SECURITY.md","path":"SECURITY.md","contentType":"file"},{"name":"format_sources","path":"format_sources","contentType":"file"}],"totalCount":32}},"fileTreeProcessingTime":14.564540000000001,"foldersToFetch":[],"reducedMotionEnabled":"system","repo":{"id":60246359,"defaultBranch":"master","name":"ClickHouse","ownerLogin":"ClickHouse","currentUserCanPush":true,"isFork":false,"isEmpty":false,"createdAt":"2016-06-02T04:28:18.000-04:00","ownerAvatar":"https://avatars.githubusercontent.com/u/54801242?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"5f18640215159ed1ad50be1efce2cb996a49fd73","listCacheKey":"v0:1688403108.0","canEdit":false,"refType":"tree","currentOid":"5f18640215159ed1ad50be1efce2cb996a49fd73"},"path":"docs/_description_templates/template-data-type.md","currentUser":{"id":25182304,"login":"DanRoscigno","userEmail":"dan@roscigno.com"},"blob":{"rawBlob":null,"colorizedLines":null,"stylingDirectives":null,"csv":null,"csvError":null,"dependabotInfo":{"showConfigurationBanner":null,"configFilePath":null,"networkDependabotPath":"/ClickHouse/ClickHouse/network/updates","dismissConfigurationNoticePath":"/settings/dismiss-notice/dependabot_configuration_notice","configurationNoticeDismissed":false,"repoAlertsPath":"/ClickHouse/ClickHouse/security/dependabot","repoSecurityAndAnalysisPath":"/ClickHouse/ClickHouse/settings/security_analysis","repoOwnerIsOrg":true,"currentUserCanAdminRepo":false},"displayName":"template-data-type.md","displayUrl":"https://github.com/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-data-type.md?raw=true","headerInfo":{"blobSize":"540 Bytes","deleteInfo":{"deletePath":null,"deleteTooltip":"You must be on a branch to make or propose changes to this file"},"editInfo":{"editTooltip":"You must be on a branch to make or propose changes to this file"},"ghDesktopPath":null,"gitLfsPath":null,"onBranch":false,"shortPath":"239edb2","siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FClickHouse%2FClickHouse%2Fblob%2F5f18640215159ed1ad50be1efce2cb996a49fd73%2Fdocs%2F_description_templates%2Ftemplate-data-type.md","isCSV":false,"isRichtext":true,"toc":[{"level":1,"text":"data_type_name {#data_type-name}","anchor":"data_type_name-data_type-name","htmlText":"data_type_name {#data_type-name}"},{"level":2,"text":"Additional Info {#additional-info} (Optional)","anchor":"additional-info-additional-info-optional","htmlText":"Additional Info {#additional-info} (Optional)"}],"lineInfo":{"truncatedLoc":"29","truncatedSloc":"17"},"mode":"file"},"image":false,"isCodeownersFile":null,"isValidLegacyIssueTemplate":false,"issueTemplateHelpUrl":"https://docs.github.com/articles/about-issue-and-pull-request-templates","issueTemplate":null,"discussionTemplate":null,"language":"Markdown","large":false,"loggedIn":true,"newDiscussionPath":"/ClickHouse/ClickHouse/discussions/new","newIssuePath":"/ClickHouse/ClickHouse/issues/new","planSupportInfo":{"repoIsFork":null,"repoOwnedByCurrentUser":null,"requestFullPath":"/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-data-type.md","showFreeOrgGatedFeatureMessage":null,"showPlanSupportBanner":null,"upgradeDataAttributes":null,"upgradePath":null},"publishBannersInfo":{"dismissActionNoticePath":"/settings/dismiss-notice/publish_action_from_dockerfile","dismissStackNoticePath":"/settings/dismiss-notice/publish_stack_from_file","releasePath":"/ClickHouse/ClickHouse/releases/new?marketplace=true","showPublishActionBanner":false,"showPublishStackBanner":false},"renderImageOrRaw":false,"richText":"
\n \n \n \n \n \n \n \n \n
toc_prioritytoc_title
\n\n

data_type_name {#data_type-name}

\n

Description.

\n

Parameters (Optional)

\n\n

Examples

\n
\n

Additional Info {#additional-info} (Optional)

\n

The name of an additional section can be any, for example, Usage.

\n

See Also (Optional)

\n\n

Original article

\n
","renderedFileInfo":null,"tabSize":8,"topBannersInfo":{"overridingGlobalFundingFile":false,"globalPreferredFundingPath":null,"repoOwner":"ClickHouse","repoName":"ClickHouse","showInvalidCitationWarning":false,"citationHelpUrl":"https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-on-github/about-citation-files","showDependabotConfigurationBanner":null,"actionsOnboardingTip":null},"truncated":false,"viewable":true,"workflowRedirectUrl":null,"symbols":{"timedOut":false,"notAnalyzed":true,"symbols":[]}},"csrf_tokens":{"/ClickHouse/ClickHouse/branches":{"post":"N-EHr0Rgr9I38z0iGu5sqtyb4o4AwtOW1kjllryDXwFKHQnulJDSASf5e_zz3Uts_O0aS5zN9jEL88ie9T5sww"}}},"title":"ClickHouse/docs/_description_templates/template-data-type.md at 5f18640215159ed1ad50be1efce2cb996a49fd73 · ClickHouse/ClickHouse","locale":"en"} \ No newline at end of file +--- +toc_priority: +toc_title: +--- + +# data_type_name {#data_type-name} + +Description. + +**Parameters** (Optional) + +- `x` — Description. [Type name](relative/path/to/type/dscr.md#type). +- `y` — Description. [Type name](relative/path/to/type/dscr.md#type). + +**Examples** + +```sql + +``` + +## Additional Info {#additional-info} (Optional) + +The name of an additional section can be any, for example, **Usage**. + +**See Also** (Optional) + +- [link](#) + +[Original article](https://clickhouse.com/docs/en/data-types//) diff --git a/docs/_description_templates/template-engine.md b/docs/_description_templates/template-engine.md index c5bb5feb85d..392bc59ed33 100644 --- a/docs/_description_templates/template-engine.md +++ b/docs/_description_templates/template-engine.md @@ -1 +1,63 @@ -{"payload":{"allShortcutsEnabled":true,"fileTree":{"docs/_description_templates":{"items":[{"name":"template-data-type.md","path":"docs/_description_templates/template-data-type.md","contentType":"file"},{"name":"template-engine.md","path":"docs/_description_templates/template-engine.md","contentType":"file"},{"name":"template-function.md","path":"docs/_description_templates/template-function.md","contentType":"file"},{"name":"template-server-setting.md","path":"docs/_description_templates/template-server-setting.md","contentType":"file"},{"name":"template-setting.md","path":"docs/_description_templates/template-setting.md","contentType":"file"},{"name":"template-statement.md","path":"docs/_description_templates/template-statement.md","contentType":"file"},{"name":"template-system-table.md","path":"docs/_description_templates/template-system-table.md","contentType":"file"}],"totalCount":7},"docs":{"items":[{"name":"_description_templates","path":"docs/_description_templates","contentType":"directory"},{"name":"_includes","path":"docs/_includes","contentType":"directory"},{"name":"changelogs","path":"docs/changelogs","contentType":"directory"},{"name":"en","path":"docs/en","contentType":"directory"},{"name":"ru","path":"docs/ru","contentType":"directory"},{"name":"tools","path":"docs/tools","contentType":"directory"},{"name":"zh","path":"docs/zh","contentType":"directory"},{"name":".gitignore","path":"docs/.gitignore","contentType":"file"},{"name":"README.md","path":"docs/README.md","contentType":"file"},{"name":"clean","path":"docs/clean","contentType":"file"},{"name":"mkdocs.yml","path":"docs/mkdocs.yml","contentType":"file"},{"name":"redirects.txt","path":"docs/redirects.txt","contentType":"file"}],"totalCount":12},"":{"items":[{"name":".github","path":".github","contentType":"directory"},{"name":"base","path":"base","contentType":"directory"},{"name":"benchmark","path":"benchmark","contentType":"directory"},{"name":"cmake","path":"cmake","contentType":"directory"},{"name":"contrib","path":"contrib","contentType":"directory"},{"name":"docker","path":"docker","contentType":"directory"},{"name":"docs","path":"docs","contentType":"directory"},{"name":"packages","path":"packages","contentType":"directory"},{"name":"programs","path":"programs","contentType":"directory"},{"name":"src","path":"src","contentType":"directory"},{"name":"tests","path":"tests","contentType":"directory"},{"name":"utils","path":"utils","contentType":"directory"},{"name":"website","path":"website","contentType":"directory"},{"name":".clang-format","path":".clang-format","contentType":"file"},{"name":".clang-tidy","path":".clang-tidy","contentType":"file"},{"name":".editorconfig","path":".editorconfig","contentType":"file"},{"name":".gitattributes","path":".gitattributes","contentType":"file"},{"name":".gitignore","path":".gitignore","contentType":"file"},{"name":".gitmodules","path":".gitmodules","contentType":"file"},{"name":".pylintrc","path":".pylintrc","contentType":"file"},{"name":".vimrc","path":".vimrc","contentType":"file"},{"name":".yamllint","path":".yamllint","contentType":"file"},{"name":"AUTHORS","path":"AUTHORS","contentType":"file"},{"name":"CHANGELOG.md","path":"CHANGELOG.md","contentType":"file"},{"name":"CMakeLists.txt","path":"CMakeLists.txt","contentType":"file"},{"name":"CODE_OF_CONDUCT.md","path":"CODE_OF_CONDUCT.md","contentType":"file"},{"name":"CONTRIBUTING.md","path":"CONTRIBUTING.md","contentType":"file"},{"name":"LICENSE","path":"LICENSE","contentType":"file"},{"name":"PreLoad.cmake","path":"PreLoad.cmake","contentType":"file"},{"name":"README.md","path":"README.md","contentType":"file"},{"name":"SECURITY.md","path":"SECURITY.md","contentType":"file"},{"name":"format_sources","path":"format_sources","contentType":"file"}],"totalCount":32}},"fileTreeProcessingTime":10.334819,"foldersToFetch":[],"reducedMotionEnabled":"system","repo":{"id":60246359,"defaultBranch":"master","name":"ClickHouse","ownerLogin":"ClickHouse","currentUserCanPush":true,"isFork":false,"isEmpty":false,"createdAt":"2016-06-02T04:28:18.000-04:00","ownerAvatar":"https://avatars.githubusercontent.com/u/54801242?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"5f18640215159ed1ad50be1efce2cb996a49fd73","listCacheKey":"v0:1688403108.0","canEdit":false,"refType":"tree","currentOid":"5f18640215159ed1ad50be1efce2cb996a49fd73"},"path":"docs/_description_templates/template-engine.md","currentUser":{"id":25182304,"login":"DanRoscigno","userEmail":"dan@roscigno.com"},"blob":{"rawBlob":null,"colorizedLines":null,"stylingDirectives":null,"csv":null,"csvError":null,"dependabotInfo":{"showConfigurationBanner":null,"configFilePath":null,"networkDependabotPath":"/ClickHouse/ClickHouse/network/updates","dismissConfigurationNoticePath":"/settings/dismiss-notice/dependabot_configuration_notice","configurationNoticeDismissed":false,"repoAlertsPath":"/ClickHouse/ClickHouse/security/dependabot","repoSecurityAndAnalysisPath":"/ClickHouse/ClickHouse/settings/security_analysis","repoOwnerIsOrg":true,"currentUserCanAdminRepo":false},"displayName":"template-engine.md","displayUrl":"https://github.com/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-engine.md?raw=true","headerInfo":{"blobSize":"1.19 KB","deleteInfo":{"deletePath":null,"deleteTooltip":"You must be on a branch to make or propose changes to this file"},"editInfo":{"editTooltip":"You must be on a branch to make or propose changes to this file"},"ghDesktopPath":null,"gitLfsPath":null,"onBranch":false,"shortPath":"392bc59","siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FClickHouse%2FClickHouse%2Fblob%2F5f18640215159ed1ad50be1efce2cb996a49fd73%2Fdocs%2F_description_templates%2Ftemplate-engine.md","isCSV":false,"isRichtext":true,"toc":[{"level":1,"text":"EngineName {#enginename}","anchor":"enginename-enginename","htmlText":"EngineName {#enginename}"},{"level":2,"text":"Creating a Database {#creating-a-database}","anchor":"creating-a-database-creating-a-database","htmlText":"Creating a Database {#creating-a-database}"},{"level":2,"text":"Creating a Table {#creating-a-table}","anchor":"creating-a-table-creating-a-table","htmlText":"Creating a Table {#creating-a-table}"},{"level":2,"text":"Virtual columns {#virtual-columns} (for Table engines only)","anchor":"virtual-columns-virtual-columns-for-table-engines-only","htmlText":"Virtual columns {#virtual-columns} (for Table engines only)"},{"level":2,"text":"Data Types Support {#data_types-support} (for Database engines only)","anchor":"data-types-support-data_types-support-for-database-engines-only","htmlText":"Data Types Support {#data_types-support} (for Database engines only)"},{"level":2,"text":"Specifics and recommendations {#specifics-and-recommendations}","anchor":"specifics-and-recommendations-specifics-and-recommendations","htmlText":"Specifics and recommendations {#specifics-and-recommendations}"},{"level":2,"text":"Usage Example {#usage-example}","anchor":"usage-example-usage-example","htmlText":"Usage Example {#usage-example}"}],"lineInfo":{"truncatedLoc":"63","truncatedSloc":"40"},"mode":"file"},"image":false,"isCodeownersFile":null,"isValidLegacyIssueTemplate":false,"issueTemplateHelpUrl":"https://docs.github.com/articles/about-issue-and-pull-request-templates","issueTemplate":null,"discussionTemplate":null,"language":"Markdown","large":false,"loggedIn":true,"newDiscussionPath":"/ClickHouse/ClickHouse/discussions/new","newIssuePath":"/ClickHouse/ClickHouse/issues/new","planSupportInfo":{"repoIsFork":null,"repoOwnedByCurrentUser":null,"requestFullPath":"/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-engine.md","showFreeOrgGatedFeatureMessage":null,"showPlanSupportBanner":null,"upgradeDataAttributes":null,"upgradePath":null},"publishBannersInfo":{"dismissActionNoticePath":"/settings/dismiss-notice/publish_action_from_dockerfile","dismissStackNoticePath":"/settings/dismiss-notice/publish_stack_from_file","releasePath":"/ClickHouse/ClickHouse/releases/new?marketplace=true","showPublishActionBanner":false,"showPublishStackBanner":false},"renderImageOrRaw":false,"richText":"

EngineName {#enginename}

\n
    \n
  • What the Database/Table engine does.
  • \n
  • Relations with other engines if they exist.
  • \n
\n

Creating a Database {#creating-a-database}

\n
    CREATE DATABASE ...
\n

or

\n

Creating a Table {#creating-a-table}

\n
    CREATE TABLE ...
\n

Engine Parameters

\n

Query Clauses (for Table engines only)

\n

Virtual columns {#virtual-columns} (for Table engines only)

\n

List and virtual columns with description, if they exist.

\n

Data Types Support {#data_types-support} (for Database engines only)

\n\n\n\n\n\n\n\n\n\n\n\n\n\n
EngineNameClickHouse
NativeDataTypeNameClickHouseDataTypeName
\n

Specifics and recommendations {#specifics-and-recommendations}

\n

Algorithms\nSpecifics of read and write processes\nExamples of tasks\nRecommendations for usage\nSpecifics of data storage

\n

Usage Example {#usage-example}

\n

The example must show usage and use cases. The following text contains the recommended parts of this section.

\n

Input table:

\n
\n

Query:

\n
\n

Result:

\n
\n

Follow up with any text to clarify the example.

\n

See Also

\n\n
","renderedFileInfo":null,"tabSize":8,"topBannersInfo":{"overridingGlobalFundingFile":false,"globalPreferredFundingPath":null,"repoOwner":"ClickHouse","repoName":"ClickHouse","showInvalidCitationWarning":false,"citationHelpUrl":"https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-on-github/about-citation-files","showDependabotConfigurationBanner":null,"actionsOnboardingTip":null},"truncated":false,"viewable":true,"workflowRedirectUrl":null,"symbols":{"timedOut":false,"notAnalyzed":true,"symbols":[]}},"csrf_tokens":{"/ClickHouse/ClickHouse/branches":{"post":"mbr-hnliBQCB5MoHGpQf6OTZtY7x7nHuM4vvq5uev5jkRvDHqZJ405HujNnzpzguxK9NS23hVEnuMMKj0iOMWg"}}},"title":"ClickHouse/docs/_description_templates/template-engine.md at 5f18640215159ed1ad50be1efce2cb996a49fd73 · ClickHouse/ClickHouse","locale":"en"} \ No newline at end of file +# EngineName {#enginename} + +- What the Database/Table engine does. +- Relations with other engines if they exist. + +## Creating a Database {#creating-a-database} +``` sql + CREATE DATABASE ... +``` +or + +## Creating a Table {#creating-a-table} +``` sql + CREATE TABLE ... +``` + +**Engine Parameters** + +**Query Clauses** (for Table engines only) + +## Virtual columns {#virtual-columns} (for Table engines only) + +List and virtual columns with description, if they exist. + +## Data Types Support {#data_types-support} (for Database engines only) + +| EngineName | ClickHouse | +|-----------------------|------------------------------------| +| NativeDataTypeName | [ClickHouseDataTypeName](link#) | + + +## Specifics and recommendations {#specifics-and-recommendations} + +Algorithms +Specifics of read and write processes +Examples of tasks +Recommendations for usage +Specifics of data storage + +## Usage Example {#usage-example} + +The example must show usage and use cases. The following text contains the recommended parts of this section. + +Input table: + +``` text +``` + +Query: + +``` sql +``` + +Result: + +``` text +``` + +Follow up with any text to clarify the example. + +**See Also** + +- [link](#) diff --git a/docs/_description_templates/template-function.md b/docs/_description_templates/template-function.md index 0891e5d872d..6bdc764c449 100644 --- a/docs/_description_templates/template-function.md +++ b/docs/_description_templates/template-function.md @@ -1 +1,51 @@ -{"payload":{"allShortcutsEnabled":true,"fileTree":{"docs/_description_templates":{"items":[{"name":"template-data-type.md","path":"docs/_description_templates/template-data-type.md","contentType":"file"},{"name":"template-engine.md","path":"docs/_description_templates/template-engine.md","contentType":"file"},{"name":"template-function.md","path":"docs/_description_templates/template-function.md","contentType":"file"},{"name":"template-server-setting.md","path":"docs/_description_templates/template-server-setting.md","contentType":"file"},{"name":"template-setting.md","path":"docs/_description_templates/template-setting.md","contentType":"file"},{"name":"template-statement.md","path":"docs/_description_templates/template-statement.md","contentType":"file"},{"name":"template-system-table.md","path":"docs/_description_templates/template-system-table.md","contentType":"file"}],"totalCount":7},"docs":{"items":[{"name":"_description_templates","path":"docs/_description_templates","contentType":"directory"},{"name":"_includes","path":"docs/_includes","contentType":"directory"},{"name":"changelogs","path":"docs/changelogs","contentType":"directory"},{"name":"en","path":"docs/en","contentType":"directory"},{"name":"ru","path":"docs/ru","contentType":"directory"},{"name":"tools","path":"docs/tools","contentType":"directory"},{"name":"zh","path":"docs/zh","contentType":"directory"},{"name":".gitignore","path":"docs/.gitignore","contentType":"file"},{"name":"README.md","path":"docs/README.md","contentType":"file"},{"name":"clean","path":"docs/clean","contentType":"file"},{"name":"mkdocs.yml","path":"docs/mkdocs.yml","contentType":"file"},{"name":"redirects.txt","path":"docs/redirects.txt","contentType":"file"}],"totalCount":12},"":{"items":[{"name":".github","path":".github","contentType":"directory"},{"name":"base","path":"base","contentType":"directory"},{"name":"benchmark","path":"benchmark","contentType":"directory"},{"name":"cmake","path":"cmake","contentType":"directory"},{"name":"contrib","path":"contrib","contentType":"directory"},{"name":"docker","path":"docker","contentType":"directory"},{"name":"docs","path":"docs","contentType":"directory"},{"name":"packages","path":"packages","contentType":"directory"},{"name":"programs","path":"programs","contentType":"directory"},{"name":"src","path":"src","contentType":"directory"},{"name":"tests","path":"tests","contentType":"directory"},{"name":"utils","path":"utils","contentType":"directory"},{"name":"website","path":"website","contentType":"directory"},{"name":".clang-format","path":".clang-format","contentType":"file"},{"name":".clang-tidy","path":".clang-tidy","contentType":"file"},{"name":".editorconfig","path":".editorconfig","contentType":"file"},{"name":".gitattributes","path":".gitattributes","contentType":"file"},{"name":".gitignore","path":".gitignore","contentType":"file"},{"name":".gitmodules","path":".gitmodules","contentType":"file"},{"name":".pylintrc","path":".pylintrc","contentType":"file"},{"name":".vimrc","path":".vimrc","contentType":"file"},{"name":".yamllint","path":".yamllint","contentType":"file"},{"name":"AUTHORS","path":"AUTHORS","contentType":"file"},{"name":"CHANGELOG.md","path":"CHANGELOG.md","contentType":"file"},{"name":"CMakeLists.txt","path":"CMakeLists.txt","contentType":"file"},{"name":"CODE_OF_CONDUCT.md","path":"CODE_OF_CONDUCT.md","contentType":"file"},{"name":"CONTRIBUTING.md","path":"CONTRIBUTING.md","contentType":"file"},{"name":"LICENSE","path":"LICENSE","contentType":"file"},{"name":"PreLoad.cmake","path":"PreLoad.cmake","contentType":"file"},{"name":"README.md","path":"README.md","contentType":"file"},{"name":"SECURITY.md","path":"SECURITY.md","contentType":"file"},{"name":"format_sources","path":"format_sources","contentType":"file"}],"totalCount":32}},"fileTreeProcessingTime":6.875799,"foldersToFetch":[],"reducedMotionEnabled":"system","repo":{"id":60246359,"defaultBranch":"master","name":"ClickHouse","ownerLogin":"ClickHouse","currentUserCanPush":true,"isFork":false,"isEmpty":false,"createdAt":"2016-06-02T04:28:18.000-04:00","ownerAvatar":"https://avatars.githubusercontent.com/u/54801242?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"5f18640215159ed1ad50be1efce2cb996a49fd73","listCacheKey":"v0:1688403108.0","canEdit":false,"refType":"tree","currentOid":"5f18640215159ed1ad50be1efce2cb996a49fd73"},"path":"docs/_description_templates/template-function.md","currentUser":{"id":25182304,"login":"DanRoscigno","userEmail":"dan@roscigno.com"},"blob":{"rawBlob":null,"colorizedLines":null,"stylingDirectives":null,"csv":null,"csvError":null,"dependabotInfo":{"showConfigurationBanner":null,"configFilePath":null,"networkDependabotPath":"/ClickHouse/ClickHouse/network/updates","dismissConfigurationNoticePath":"/settings/dismiss-notice/dependabot_configuration_notice","configurationNoticeDismissed":false,"repoAlertsPath":"/ClickHouse/ClickHouse/security/dependabot","repoSecurityAndAnalysisPath":"/ClickHouse/ClickHouse/settings/security_analysis","repoOwnerIsOrg":true,"currentUserCanAdminRepo":false},"displayName":"template-function.md","displayUrl":"https://github.com/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-function.md?raw=true","headerInfo":{"blobSize":"1.12 KB","deleteInfo":{"deletePath":null,"deleteTooltip":"You must be on a branch to make or propose changes to this file"},"editInfo":{"editTooltip":"You must be on a branch to make or propose changes to this file"},"ghDesktopPath":null,"gitLfsPath":null,"onBranch":false,"shortPath":"6bdc764","siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FClickHouse%2FClickHouse%2Fblob%2F5f18640215159ed1ad50be1efce2cb996a49fd73%2Fdocs%2F_description_templates%2Ftemplate-function.md","isCSV":false,"isRichtext":true,"toc":[{"level":2,"text":"functionName {#functionname-in-lower-case}","anchor":"functionname-functionname-in-lower-case","htmlText":"functionName {#functionname-in-lower-case}"}],"lineInfo":{"truncatedLoc":"51","truncatedSloc":"29"},"mode":"file"},"image":false,"isCodeownersFile":null,"isValidLegacyIssueTemplate":false,"issueTemplateHelpUrl":"https://docs.github.com/articles/about-issue-and-pull-request-templates","issueTemplate":null,"discussionTemplate":null,"language":"Markdown","large":false,"loggedIn":true,"newDiscussionPath":"/ClickHouse/ClickHouse/discussions/new","newIssuePath":"/ClickHouse/ClickHouse/issues/new","planSupportInfo":{"repoIsFork":null,"repoOwnedByCurrentUser":null,"requestFullPath":"/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-function.md","showFreeOrgGatedFeatureMessage":null,"showPlanSupportBanner":null,"upgradeDataAttributes":null,"upgradePath":null},"publishBannersInfo":{"dismissActionNoticePath":"/settings/dismiss-notice/publish_action_from_dockerfile","dismissStackNoticePath":"/settings/dismiss-notice/publish_stack_from_file","releasePath":"/ClickHouse/ClickHouse/releases/new?marketplace=true","showPublishActionBanner":false,"showPublishStackBanner":false},"renderImageOrRaw":false,"richText":"

functionName {#functionname-in-lower-case}

\n

Short description.

\n

Syntax (without SELECT)

\n
<function syntax>
\n

Alias: <alias name>. (Optional)

\n

More text (Optional).

\n

Arguments (Optional)

\n
    \n
  • x — Description. Optional (only for optional arguments). Possible values: . Default value: . Type name.
  • \n
  • y — Description. Optional (only for optional arguments). Possible values: .Default value: . Type name.
  • \n
\n

Parameters (Optional, only for parametric aggregate functions)

\n
    \n
  • z — Description. Optional (only for optional parameters). Possible values: . Default value: . Type name.
  • \n
\n

Returned value(s)

\n
    \n
  • Returned values list.
  • \n
\n

Type: Type name.

\n

Example

\n

The example must show usage and/or a use cases. The following text contains recommended parts of an example.

\n

Input table (Optional):

\n
\n

Query:

\n
\n

Result:

\n
\n

See Also (Optional)

\n\n
","renderedFileInfo":null,"tabSize":8,"topBannersInfo":{"overridingGlobalFundingFile":false,"globalPreferredFundingPath":null,"repoOwner":"ClickHouse","repoName":"ClickHouse","showInvalidCitationWarning":false,"citationHelpUrl":"https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-on-github/about-citation-files","showDependabotConfigurationBanner":null,"actionsOnboardingTip":null},"truncated":false,"viewable":true,"workflowRedirectUrl":null,"symbols":{"timedOut":false,"notAnalyzed":true,"symbols":[]}},"csrf_tokens":{"/ClickHouse/ClickHouse/branches":{"post":"LBpdgwbUgHW-H_hinQl7e32v5vocOJKA0kZyCn6oG8tR5lPC1iT9pq4Vvrx0Oly9XdkeP4A3tycP_V8CNxUoCQ"}}},"title":"ClickHouse/docs/_description_templates/template-function.md at 5f18640215159ed1ad50be1efce2cb996a49fd73 · ClickHouse/ClickHouse","locale":"en"} \ No newline at end of file +## functionName {#functionname-in-lower-case} + +Short description. + +**Syntax** (without SELECT) + +``` sql + +``` + +Alias: ``. (Optional) + +More text (Optional). + +**Arguments** (Optional) + +- `x` — Description. Optional (only for optional arguments). Possible values: . Default value: . [Type name](relative/path/to/type/dscr.md#type). +- `y` — Description. Optional (only for optional arguments). Possible values: .Default value: . [Type name](relative/path/to/type/dscr.md#type). + +**Parameters** (Optional, only for parametric aggregate functions) + +- `z` — Description. Optional (only for optional parameters). Possible values: . Default value: . [Type name](relative/path/to/type/dscr.md#type). + +**Returned value(s)** + +- Returned values list. + +Type: [Type name](relative/path/to/type/dscr.md#type). + +**Example** + +The example must show usage and/or a use cases. The following text contains recommended parts of an example. + +Input table (Optional): + +``` text +``` + +Query: + +``` sql +``` + +Result: + +``` text +``` + +**See Also** (Optional) + +- [link](#) diff --git a/docs/_description_templates/template-server-setting.md b/docs/_description_templates/template-server-setting.md index fc474059f05..0b37d46cf41 100644 --- a/docs/_description_templates/template-server-setting.md +++ b/docs/_description_templates/template-server-setting.md @@ -1 +1,33 @@ -{"payload":{"allShortcutsEnabled":true,"fileTree":{"docs/_description_templates":{"items":[{"name":"template-data-type.md","path":"docs/_description_templates/template-data-type.md","contentType":"file"},{"name":"template-engine.md","path":"docs/_description_templates/template-engine.md","contentType":"file"},{"name":"template-function.md","path":"docs/_description_templates/template-function.md","contentType":"file"},{"name":"template-server-setting.md","path":"docs/_description_templates/template-server-setting.md","contentType":"file"},{"name":"template-setting.md","path":"docs/_description_templates/template-setting.md","contentType":"file"},{"name":"template-statement.md","path":"docs/_description_templates/template-statement.md","contentType":"file"},{"name":"template-system-table.md","path":"docs/_description_templates/template-system-table.md","contentType":"file"}],"totalCount":7},"docs":{"items":[{"name":"_description_templates","path":"docs/_description_templates","contentType":"directory"},{"name":"_includes","path":"docs/_includes","contentType":"directory"},{"name":"changelogs","path":"docs/changelogs","contentType":"directory"},{"name":"en","path":"docs/en","contentType":"directory"},{"name":"ru","path":"docs/ru","contentType":"directory"},{"name":"tools","path":"docs/tools","contentType":"directory"},{"name":"zh","path":"docs/zh","contentType":"directory"},{"name":".gitignore","path":"docs/.gitignore","contentType":"file"},{"name":"README.md","path":"docs/README.md","contentType":"file"},{"name":"clean","path":"docs/clean","contentType":"file"},{"name":"mkdocs.yml","path":"docs/mkdocs.yml","contentType":"file"},{"name":"redirects.txt","path":"docs/redirects.txt","contentType":"file"}],"totalCount":12},"":{"items":[{"name":".github","path":".github","contentType":"directory"},{"name":"base","path":"base","contentType":"directory"},{"name":"benchmark","path":"benchmark","contentType":"directory"},{"name":"cmake","path":"cmake","contentType":"directory"},{"name":"contrib","path":"contrib","contentType":"directory"},{"name":"docker","path":"docker","contentType":"directory"},{"name":"docs","path":"docs","contentType":"directory"},{"name":"packages","path":"packages","contentType":"directory"},{"name":"programs","path":"programs","contentType":"directory"},{"name":"src","path":"src","contentType":"directory"},{"name":"tests","path":"tests","contentType":"directory"},{"name":"utils","path":"utils","contentType":"directory"},{"name":"website","path":"website","contentType":"directory"},{"name":".clang-format","path":".clang-format","contentType":"file"},{"name":".clang-tidy","path":".clang-tidy","contentType":"file"},{"name":".editorconfig","path":".editorconfig","contentType":"file"},{"name":".gitattributes","path":".gitattributes","contentType":"file"},{"name":".gitignore","path":".gitignore","contentType":"file"},{"name":".gitmodules","path":".gitmodules","contentType":"file"},{"name":".pylintrc","path":".pylintrc","contentType":"file"},{"name":".vimrc","path":".vimrc","contentType":"file"},{"name":".yamllint","path":".yamllint","contentType":"file"},{"name":"AUTHORS","path":"AUTHORS","contentType":"file"},{"name":"CHANGELOG.md","path":"CHANGELOG.md","contentType":"file"},{"name":"CMakeLists.txt","path":"CMakeLists.txt","contentType":"file"},{"name":"CODE_OF_CONDUCT.md","path":"CODE_OF_CONDUCT.md","contentType":"file"},{"name":"CONTRIBUTING.md","path":"CONTRIBUTING.md","contentType":"file"},{"name":"LICENSE","path":"LICENSE","contentType":"file"},{"name":"PreLoad.cmake","path":"PreLoad.cmake","contentType":"file"},{"name":"README.md","path":"README.md","contentType":"file"},{"name":"SECURITY.md","path":"SECURITY.md","contentType":"file"},{"name":"format_sources","path":"format_sources","contentType":"file"}],"totalCount":32}},"fileTreeProcessingTime":7.7234929999999995,"foldersToFetch":[],"reducedMotionEnabled":"system","repo":{"id":60246359,"defaultBranch":"master","name":"ClickHouse","ownerLogin":"ClickHouse","currentUserCanPush":true,"isFork":false,"isEmpty":false,"createdAt":"2016-06-02T04:28:18.000-04:00","ownerAvatar":"https://avatars.githubusercontent.com/u/54801242?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"5f18640215159ed1ad50be1efce2cb996a49fd73","listCacheKey":"v0:1688403108.0","canEdit":false,"refType":"tree","currentOid":"5f18640215159ed1ad50be1efce2cb996a49fd73"},"path":"docs/_description_templates/template-server-setting.md","currentUser":{"id":25182304,"login":"DanRoscigno","userEmail":"dan@roscigno.com"},"blob":{"rawBlob":null,"colorizedLines":null,"stylingDirectives":null,"csv":null,"csvError":null,"dependabotInfo":{"showConfigurationBanner":null,"configFilePath":null,"networkDependabotPath":"/ClickHouse/ClickHouse/network/updates","dismissConfigurationNoticePath":"/settings/dismiss-notice/dependabot_configuration_notice","configurationNoticeDismissed":false,"repoAlertsPath":"/ClickHouse/ClickHouse/security/dependabot","repoSecurityAndAnalysisPath":"/ClickHouse/ClickHouse/settings/security_analysis","repoOwnerIsOrg":true,"currentUserCanAdminRepo":false},"displayName":"template-server-setting.md","displayUrl":"https://github.com/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-server-setting.md?raw=true","headerInfo":{"blobSize":"629 Bytes","deleteInfo":{"deletePath":null,"deleteTooltip":"You must be on a branch to make or propose changes to this file"},"editInfo":{"editTooltip":"You must be on a branch to make or propose changes to this file"},"ghDesktopPath":null,"gitLfsPath":null,"onBranch":false,"shortPath":"0b37d46","siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FClickHouse%2FClickHouse%2Fblob%2F5f18640215159ed1ad50be1efce2cb996a49fd73%2Fdocs%2F_description_templates%2Ftemplate-server-setting.md","isCSV":false,"isRichtext":true,"toc":[{"level":2,"text":"server_setting_name {#server_setting_name}","anchor":"server_setting_name-server_setting_name","htmlText":"server_setting_name {#server_setting_name}"}],"lineInfo":{"truncatedLoc":"33","truncatedSloc":"20"},"mode":"file"},"image":false,"isCodeownersFile":null,"isValidLegacyIssueTemplate":false,"issueTemplateHelpUrl":"https://docs.github.com/articles/about-issue-and-pull-request-templates","issueTemplate":null,"discussionTemplate":null,"language":"Markdown","large":false,"loggedIn":true,"newDiscussionPath":"/ClickHouse/ClickHouse/discussions/new","newIssuePath":"/ClickHouse/ClickHouse/issues/new","planSupportInfo":{"repoIsFork":null,"repoOwnedByCurrentUser":null,"requestFullPath":"/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-server-setting.md","showFreeOrgGatedFeatureMessage":null,"showPlanSupportBanner":null,"upgradeDataAttributes":null,"upgradePath":null},"publishBannersInfo":{"dismissActionNoticePath":"/settings/dismiss-notice/publish_action_from_dockerfile","dismissStackNoticePath":"/settings/dismiss-notice/publish_stack_from_file","releasePath":"/ClickHouse/ClickHouse/releases/new?marketplace=true","showPublishActionBanner":false,"showPublishStackBanner":false},"renderImageOrRaw":false,"richText":"

server_setting_name {#server_setting_name}

\n

Description.

\n

Describe what is configured in this section of settings.

\n

Possible value: ...

\n

Default value: ...

\n

Settings (Optional)

\n

If the section contains several settings, list them here. Specify possible values and default values:

\n
    \n
  • setting_1 — Description.
  • \n
  • setting_2 — Description.
  • \n
\n

Example

\n
<server_setting_name>\n    <setting_1> ... </setting_1>\n    <setting_2> ... </setting_2>\n</server_setting_name>
\n

Additional Info (Optional)

\n

The name of an additional section can be any, for example, Usage.

\n

See Also (Optional)

\n\n
","renderedFileInfo":null,"tabSize":8,"topBannersInfo":{"overridingGlobalFundingFile":false,"globalPreferredFundingPath":null,"repoOwner":"ClickHouse","repoName":"ClickHouse","showInvalidCitationWarning":false,"citationHelpUrl":"https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-on-github/about-citation-files","showDependabotConfigurationBanner":null,"actionsOnboardingTip":null},"truncated":false,"viewable":true,"workflowRedirectUrl":null,"symbols":{"timedOut":false,"notAnalyzed":true,"symbols":[]}},"csrf_tokens":{"/ClickHouse/ClickHouse/branches":{"post":"a6lijUe_lOiTRf_mzmSRAi4tMYD_1HdAhwqytaKk2q8WVWzMl0_pO4NPuTgnV7bEDlvJRWPbUudasZ-96xnpbQ"}}},"title":"ClickHouse/docs/_description_templates/template-server-setting.md at 5f18640215159ed1ad50be1efce2cb996a49fd73 · ClickHouse/ClickHouse","locale":"en"} \ No newline at end of file +## server_setting_name {#server_setting_name} + +Description. + +Describe what is configured in this section of settings. + +Possible value: ... + +Default value: ... + +**Settings** (Optional) + +If the section contains several settings, list them here. Specify possible values and default values: + +- setting_1 — Description. +- setting_2 — Description. + +**Example** + +```xml + + ... + ... + +``` + +**Additional Info** (Optional) + +The name of an additional section can be any, for example, **Usage**. + +**See Also** (Optional) + +- [link](#) diff --git a/docs/_description_templates/template-setting.md b/docs/_description_templates/template-setting.md index df51a7e8241..fc912aba3e1 100644 --- a/docs/_description_templates/template-setting.md +++ b/docs/_description_templates/template-setting.md @@ -1 +1,27 @@ -{"payload":{"allShortcutsEnabled":true,"fileTree":{"docs/_description_templates":{"items":[{"name":"template-data-type.md","path":"docs/_description_templates/template-data-type.md","contentType":"file"},{"name":"template-engine.md","path":"docs/_description_templates/template-engine.md","contentType":"file"},{"name":"template-function.md","path":"docs/_description_templates/template-function.md","contentType":"file"},{"name":"template-server-setting.md","path":"docs/_description_templates/template-server-setting.md","contentType":"file"},{"name":"template-setting.md","path":"docs/_description_templates/template-setting.md","contentType":"file"},{"name":"template-statement.md","path":"docs/_description_templates/template-statement.md","contentType":"file"},{"name":"template-system-table.md","path":"docs/_description_templates/template-system-table.md","contentType":"file"}],"totalCount":7},"docs":{"items":[{"name":"_description_templates","path":"docs/_description_templates","contentType":"directory"},{"name":"_includes","path":"docs/_includes","contentType":"directory"},{"name":"changelogs","path":"docs/changelogs","contentType":"directory"},{"name":"en","path":"docs/en","contentType":"directory"},{"name":"ru","path":"docs/ru","contentType":"directory"},{"name":"tools","path":"docs/tools","contentType":"directory"},{"name":"zh","path":"docs/zh","contentType":"directory"},{"name":".gitignore","path":"docs/.gitignore","contentType":"file"},{"name":"README.md","path":"docs/README.md","contentType":"file"},{"name":"clean","path":"docs/clean","contentType":"file"},{"name":"mkdocs.yml","path":"docs/mkdocs.yml","contentType":"file"},{"name":"redirects.txt","path":"docs/redirects.txt","contentType":"file"}],"totalCount":12},"":{"items":[{"name":".github","path":".github","contentType":"directory"},{"name":"base","path":"base","contentType":"directory"},{"name":"benchmark","path":"benchmark","contentType":"directory"},{"name":"cmake","path":"cmake","contentType":"directory"},{"name":"contrib","path":"contrib","contentType":"directory"},{"name":"docker","path":"docker","contentType":"directory"},{"name":"docs","path":"docs","contentType":"directory"},{"name":"packages","path":"packages","contentType":"directory"},{"name":"programs","path":"programs","contentType":"directory"},{"name":"src","path":"src","contentType":"directory"},{"name":"tests","path":"tests","contentType":"directory"},{"name":"utils","path":"utils","contentType":"directory"},{"name":"website","path":"website","contentType":"directory"},{"name":".clang-format","path":".clang-format","contentType":"file"},{"name":".clang-tidy","path":".clang-tidy","contentType":"file"},{"name":".editorconfig","path":".editorconfig","contentType":"file"},{"name":".gitattributes","path":".gitattributes","contentType":"file"},{"name":".gitignore","path":".gitignore","contentType":"file"},{"name":".gitmodules","path":".gitmodules","contentType":"file"},{"name":".pylintrc","path":".pylintrc","contentType":"file"},{"name":".vimrc","path":".vimrc","contentType":"file"},{"name":".yamllint","path":".yamllint","contentType":"file"},{"name":"AUTHORS","path":"AUTHORS","contentType":"file"},{"name":"CHANGELOG.md","path":"CHANGELOG.md","contentType":"file"},{"name":"CMakeLists.txt","path":"CMakeLists.txt","contentType":"file"},{"name":"CODE_OF_CONDUCT.md","path":"CODE_OF_CONDUCT.md","contentType":"file"},{"name":"CONTRIBUTING.md","path":"CONTRIBUTING.md","contentType":"file"},{"name":"LICENSE","path":"LICENSE","contentType":"file"},{"name":"PreLoad.cmake","path":"PreLoad.cmake","contentType":"file"},{"name":"README.md","path":"README.md","contentType":"file"},{"name":"SECURITY.md","path":"SECURITY.md","contentType":"file"},{"name":"format_sources","path":"format_sources","contentType":"file"}],"totalCount":32}},"fileTreeProcessingTime":9.96412,"foldersToFetch":[],"reducedMotionEnabled":"system","repo":{"id":60246359,"defaultBranch":"master","name":"ClickHouse","ownerLogin":"ClickHouse","currentUserCanPush":true,"isFork":false,"isEmpty":false,"createdAt":"2016-06-02T04:28:18.000-04:00","ownerAvatar":"https://avatars.githubusercontent.com/u/54801242?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"5f18640215159ed1ad50be1efce2cb996a49fd73","listCacheKey":"v0:1688403108.0","canEdit":false,"refType":"tree","currentOid":"5f18640215159ed1ad50be1efce2cb996a49fd73"},"path":"docs/_description_templates/template-setting.md","currentUser":{"id":25182304,"login":"DanRoscigno","userEmail":"dan@roscigno.com"},"blob":{"rawBlob":null,"colorizedLines":null,"stylingDirectives":null,"csv":null,"csvError":null,"dependabotInfo":{"showConfigurationBanner":null,"configFilePath":null,"networkDependabotPath":"/ClickHouse/ClickHouse/network/updates","dismissConfigurationNoticePath":"/settings/dismiss-notice/dependabot_configuration_notice","configurationNoticeDismissed":false,"repoAlertsPath":"/ClickHouse/ClickHouse/security/dependabot","repoSecurityAndAnalysisPath":"/ClickHouse/ClickHouse/settings/security_analysis","repoOwnerIsOrg":true,"currentUserCanAdminRepo":false},"displayName":"template-setting.md","displayUrl":"https://github.com/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-setting.md?raw=true","headerInfo":{"blobSize":"503 Bytes","deleteInfo":{"deletePath":null,"deleteTooltip":"You must be on a branch to make or propose changes to this file"},"editInfo":{"editTooltip":"You must be on a branch to make or propose changes to this file"},"ghDesktopPath":null,"gitLfsPath":null,"onBranch":false,"shortPath":"fc912ab","siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FClickHouse%2FClickHouse%2Fblob%2F5f18640215159ed1ad50be1efce2cb996a49fd73%2Fdocs%2F_description_templates%2Ftemplate-setting.md","isCSV":false,"isRichtext":true,"toc":[{"level":2,"text":"setting_name {#setting_name}","anchor":"setting_name-setting_name","htmlText":"setting_name {#setting_name}"}],"lineInfo":{"truncatedLoc":"27","truncatedSloc":"15"},"mode":"file"},"image":false,"isCodeownersFile":null,"isValidLegacyIssueTemplate":false,"issueTemplateHelpUrl":"https://docs.github.com/articles/about-issue-and-pull-request-templates","issueTemplate":null,"discussionTemplate":null,"language":"Markdown","large":false,"loggedIn":true,"newDiscussionPath":"/ClickHouse/ClickHouse/discussions/new","newIssuePath":"/ClickHouse/ClickHouse/issues/new","planSupportInfo":{"repoIsFork":null,"repoOwnedByCurrentUser":null,"requestFullPath":"/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-setting.md","showFreeOrgGatedFeatureMessage":null,"showPlanSupportBanner":null,"upgradeDataAttributes":null,"upgradePath":null},"publishBannersInfo":{"dismissActionNoticePath":"/settings/dismiss-notice/publish_action_from_dockerfile","dismissStackNoticePath":"/settings/dismiss-notice/publish_stack_from_file","releasePath":"/ClickHouse/ClickHouse/releases/new?marketplace=true","showPublishActionBanner":false,"showPublishStackBanner":false},"renderImageOrRaw":false,"richText":"

setting_name {#setting_name}

\n

Description.

\n

For the switch setting, use the typical phrase: “Enables or disables something …”.

\n

Possible values:

\n

For switcher setting:

\n
    \n
  • 0 — Disabled.
  • \n
  • 1 — Enabled.
  • \n
\n

For another setting (typical phrases):

\n
    \n
  • Positive integer.
  • \n
  • 0 — Disabled or unlimited or something else.
  • \n
\n

Default value: value.

\n

Additional Info (Optional)

\n

The name of an additional section can be any, for example, Usage.

\n

See Also (Optional)

\n\n
","renderedFileInfo":null,"tabSize":8,"topBannersInfo":{"overridingGlobalFundingFile":false,"globalPreferredFundingPath":null,"repoOwner":"ClickHouse","repoName":"ClickHouse","showInvalidCitationWarning":false,"citationHelpUrl":"https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-on-github/about-citation-files","showDependabotConfigurationBanner":null,"actionsOnboardingTip":null},"truncated":false,"viewable":true,"workflowRedirectUrl":null,"symbols":{"timedOut":false,"notAnalyzed":true,"symbols":[]}},"csrf_tokens":{"/ClickHouse/ClickHouse/branches":{"post":"gFv6-keA6Z1eYZATbMVq8_sVPc-l2b8pTaH03K1R_zf9p_S7l3CUTk5r1s2F9k0122PFCjnWmo6QGtnU5OzM9Q"}}},"title":"ClickHouse/docs/_description_templates/template-setting.md at 5f18640215159ed1ad50be1efce2cb996a49fd73 · ClickHouse/ClickHouse","locale":"en"} \ No newline at end of file +## setting_name {#setting_name} + +Description. + +For the switch setting, use the typical phrase: “Enables or disables something …”. + +Possible values: + +*For switcher setting:* + +- 0 — Disabled. +- 1 — Enabled. + +*For another setting (typical phrases):* + +- Positive integer. +- 0 — Disabled or unlimited or something else. + +Default value: `value`. + +**Additional Info** (Optional) + +The name of an additional section can be any, for example, **Usage**. + +**See Also** (Optional) + +- [link](#) diff --git a/docs/_description_templates/template-statement.md b/docs/_description_templates/template-statement.md index b5ae0d9b26b..238570c2217 100644 --- a/docs/_description_templates/template-statement.md +++ b/docs/_description_templates/template-statement.md @@ -1 +1,24 @@ -{"payload":{"allShortcutsEnabled":true,"fileTree":{"docs/_description_templates":{"items":[{"name":"template-data-type.md","path":"docs/_description_templates/template-data-type.md","contentType":"file"},{"name":"template-engine.md","path":"docs/_description_templates/template-engine.md","contentType":"file"},{"name":"template-function.md","path":"docs/_description_templates/template-function.md","contentType":"file"},{"name":"template-server-setting.md","path":"docs/_description_templates/template-server-setting.md","contentType":"file"},{"name":"template-setting.md","path":"docs/_description_templates/template-setting.md","contentType":"file"},{"name":"template-statement.md","path":"docs/_description_templates/template-statement.md","contentType":"file"},{"name":"template-system-table.md","path":"docs/_description_templates/template-system-table.md","contentType":"file"}],"totalCount":7},"docs":{"items":[{"name":"_description_templates","path":"docs/_description_templates","contentType":"directory"},{"name":"_includes","path":"docs/_includes","contentType":"directory"},{"name":"changelogs","path":"docs/changelogs","contentType":"directory"},{"name":"en","path":"docs/en","contentType":"directory"},{"name":"ru","path":"docs/ru","contentType":"directory"},{"name":"tools","path":"docs/tools","contentType":"directory"},{"name":"zh","path":"docs/zh","contentType":"directory"},{"name":".gitignore","path":"docs/.gitignore","contentType":"file"},{"name":"README.md","path":"docs/README.md","contentType":"file"},{"name":"clean","path":"docs/clean","contentType":"file"},{"name":"mkdocs.yml","path":"docs/mkdocs.yml","contentType":"file"},{"name":"redirects.txt","path":"docs/redirects.txt","contentType":"file"}],"totalCount":12},"":{"items":[{"name":".github","path":".github","contentType":"directory"},{"name":"base","path":"base","contentType":"directory"},{"name":"benchmark","path":"benchmark","contentType":"directory"},{"name":"cmake","path":"cmake","contentType":"directory"},{"name":"contrib","path":"contrib","contentType":"directory"},{"name":"docker","path":"docker","contentType":"directory"},{"name":"docs","path":"docs","contentType":"directory"},{"name":"packages","path":"packages","contentType":"directory"},{"name":"programs","path":"programs","contentType":"directory"},{"name":"src","path":"src","contentType":"directory"},{"name":"tests","path":"tests","contentType":"directory"},{"name":"utils","path":"utils","contentType":"directory"},{"name":"website","path":"website","contentType":"directory"},{"name":".clang-format","path":".clang-format","contentType":"file"},{"name":".clang-tidy","path":".clang-tidy","contentType":"file"},{"name":".editorconfig","path":".editorconfig","contentType":"file"},{"name":".gitattributes","path":".gitattributes","contentType":"file"},{"name":".gitignore","path":".gitignore","contentType":"file"},{"name":".gitmodules","path":".gitmodules","contentType":"file"},{"name":".pylintrc","path":".pylintrc","contentType":"file"},{"name":".vimrc","path":".vimrc","contentType":"file"},{"name":".yamllint","path":".yamllint","contentType":"file"},{"name":"AUTHORS","path":"AUTHORS","contentType":"file"},{"name":"CHANGELOG.md","path":"CHANGELOG.md","contentType":"file"},{"name":"CMakeLists.txt","path":"CMakeLists.txt","contentType":"file"},{"name":"CODE_OF_CONDUCT.md","path":"CODE_OF_CONDUCT.md","contentType":"file"},{"name":"CONTRIBUTING.md","path":"CONTRIBUTING.md","contentType":"file"},{"name":"LICENSE","path":"LICENSE","contentType":"file"},{"name":"PreLoad.cmake","path":"PreLoad.cmake","contentType":"file"},{"name":"README.md","path":"README.md","contentType":"file"},{"name":"SECURITY.md","path":"SECURITY.md","contentType":"file"},{"name":"format_sources","path":"format_sources","contentType":"file"}],"totalCount":32}},"fileTreeProcessingTime":9.954742000000001,"foldersToFetch":[],"reducedMotionEnabled":"system","repo":{"id":60246359,"defaultBranch":"master","name":"ClickHouse","ownerLogin":"ClickHouse","currentUserCanPush":true,"isFork":false,"isEmpty":false,"createdAt":"2016-06-02T04:28:18.000-04:00","ownerAvatar":"https://avatars.githubusercontent.com/u/54801242?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"5f18640215159ed1ad50be1efce2cb996a49fd73","listCacheKey":"v0:1688403108.0","canEdit":false,"refType":"tree","currentOid":"5f18640215159ed1ad50be1efce2cb996a49fd73"},"path":"docs/_description_templates/template-statement.md","currentUser":{"id":25182304,"login":"DanRoscigno","userEmail":"dan@roscigno.com"},"blob":{"rawBlob":null,"colorizedLines":null,"stylingDirectives":null,"csv":null,"csvError":null,"dependabotInfo":{"showConfigurationBanner":null,"configFilePath":null,"networkDependabotPath":"/ClickHouse/ClickHouse/network/updates","dismissConfigurationNoticePath":"/settings/dismiss-notice/dependabot_configuration_notice","configurationNoticeDismissed":false,"repoAlertsPath":"/ClickHouse/ClickHouse/security/dependabot","repoSecurityAndAnalysisPath":"/ClickHouse/ClickHouse/settings/security_analysis","repoOwnerIsOrg":true,"currentUserCanAdminRepo":false},"displayName":"template-statement.md","displayUrl":"https://github.com/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-statement.md?raw=true","headerInfo":{"blobSize":"575 Bytes","deleteInfo":{"deletePath":null,"deleteTooltip":"You must be on a branch to make or propose changes to this file"},"editInfo":{"editTooltip":"You must be on a branch to make or propose changes to this file"},"ghDesktopPath":null,"gitLfsPath":null,"onBranch":false,"shortPath":"238570c","siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FClickHouse%2FClickHouse%2Fblob%2F5f18640215159ed1ad50be1efce2cb996a49fd73%2Fdocs%2F_description_templates%2Ftemplate-statement.md","isCSV":false,"isRichtext":true,"toc":[{"level":1,"text":"Statement name (for example, SHOW USER) {#statement-name-in-lower-case}","anchor":"statement-name-for-example-show-user-statement-name-in-lower-case","htmlText":"Statement name (for example, SHOW USER) {#statement-name-in-lower-case}"},{"level":2,"text":"Other necessary sections of the description (Optional) {#anchor}","anchor":"other-necessary-sections-of-the-description-optional-anchor","htmlText":"Other necessary sections of the description (Optional) {#anchor}"}],"lineInfo":{"truncatedLoc":"24","truncatedSloc":"14"},"mode":"file"},"image":false,"isCodeownersFile":null,"isValidLegacyIssueTemplate":false,"issueTemplateHelpUrl":"https://docs.github.com/articles/about-issue-and-pull-request-templates","issueTemplate":null,"discussionTemplate":null,"language":"Markdown","large":false,"loggedIn":true,"newDiscussionPath":"/ClickHouse/ClickHouse/discussions/new","newIssuePath":"/ClickHouse/ClickHouse/issues/new","planSupportInfo":{"repoIsFork":null,"repoOwnedByCurrentUser":null,"requestFullPath":"/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-statement.md","showFreeOrgGatedFeatureMessage":null,"showPlanSupportBanner":null,"upgradeDataAttributes":null,"upgradePath":null},"publishBannersInfo":{"dismissActionNoticePath":"/settings/dismiss-notice/publish_action_from_dockerfile","dismissStackNoticePath":"/settings/dismiss-notice/publish_stack_from_file","releasePath":"/ClickHouse/ClickHouse/releases/new?marketplace=true","showPublishActionBanner":false,"showPublishStackBanner":false},"renderImageOrRaw":false,"richText":"

Statement name (for example, SHOW USER) {#statement-name-in-lower-case}

\n

Brief description of what the statement does.

\n

Syntax

\n
Syntax of the statement.
\n

Other necessary sections of the description (Optional) {#anchor}

\n

Examples of descriptions with a complicated structure:

\n\n

See Also (Optional)

\n

Links to related topics as a list.

\n\n
","renderedFileInfo":null,"tabSize":8,"topBannersInfo":{"overridingGlobalFundingFile":false,"globalPreferredFundingPath":null,"repoOwner":"ClickHouse","repoName":"ClickHouse","showInvalidCitationWarning":false,"citationHelpUrl":"https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-on-github/about-citation-files","showDependabotConfigurationBanner":null,"actionsOnboardingTip":null},"truncated":false,"viewable":true,"workflowRedirectUrl":null,"symbols":{"timedOut":false,"notAnalyzed":true,"symbols":[]}},"csrf_tokens":{"/ClickHouse/ClickHouse/branches":{"post":"xohFK8TmjwjnIXPER5IKigBaxPXoFkjI0tOrZrMZen-7dEtqFBby2_crNRquoS1MICw8MHQZbW8PaIZu-qRJvQ"}}},"title":"ClickHouse/docs/_description_templates/template-statement.md at 5f18640215159ed1ad50be1efce2cb996a49fd73 · ClickHouse/ClickHouse","locale":"en"} \ No newline at end of file +# Statement name (for example, SHOW USER) {#statement-name-in-lower-case} + +Brief description of what the statement does. + +**Syntax** + +```sql +Syntax of the statement. +``` + +## Other necessary sections of the description (Optional) {#anchor} + +Examples of descriptions with a complicated structure: + +- https://clickhouse.com/docs/en/sql-reference/statements/grant/ +- https://clickhouse.com/docs/en/sql-reference/statements/revoke/ +- https://clickhouse.com/docs/en/sql-reference/statements/select/join/ + + +**See Also** (Optional) + +Links to related topics as a list. + +- [link](#) diff --git a/docs/_description_templates/template-system-table.md b/docs/_description_templates/template-system-table.md index 02d622a52cf..f2decc4bb6d 100644 --- a/docs/_description_templates/template-system-table.md +++ b/docs/_description_templates/template-system-table.md @@ -1 +1,25 @@ -{"payload":{"allShortcutsEnabled":true,"fileTree":{"docs/_description_templates":{"items":[{"name":"template-data-type.md","path":"docs/_description_templates/template-data-type.md","contentType":"file"},{"name":"template-engine.md","path":"docs/_description_templates/template-engine.md","contentType":"file"},{"name":"template-function.md","path":"docs/_description_templates/template-function.md","contentType":"file"},{"name":"template-server-setting.md","path":"docs/_description_templates/template-server-setting.md","contentType":"file"},{"name":"template-setting.md","path":"docs/_description_templates/template-setting.md","contentType":"file"},{"name":"template-statement.md","path":"docs/_description_templates/template-statement.md","contentType":"file"},{"name":"template-system-table.md","path":"docs/_description_templates/template-system-table.md","contentType":"file"}],"totalCount":7},"docs":{"items":[{"name":"_description_templates","path":"docs/_description_templates","contentType":"directory"},{"name":"_includes","path":"docs/_includes","contentType":"directory"},{"name":"changelogs","path":"docs/changelogs","contentType":"directory"},{"name":"en","path":"docs/en","contentType":"directory"},{"name":"ru","path":"docs/ru","contentType":"directory"},{"name":"tools","path":"docs/tools","contentType":"directory"},{"name":"zh","path":"docs/zh","contentType":"directory"},{"name":".gitignore","path":"docs/.gitignore","contentType":"file"},{"name":"README.md","path":"docs/README.md","contentType":"file"},{"name":"clean","path":"docs/clean","contentType":"file"},{"name":"mkdocs.yml","path":"docs/mkdocs.yml","contentType":"file"},{"name":"redirects.txt","path":"docs/redirects.txt","contentType":"file"}],"totalCount":12},"":{"items":[{"name":".github","path":".github","contentType":"directory"},{"name":"base","path":"base","contentType":"directory"},{"name":"benchmark","path":"benchmark","contentType":"directory"},{"name":"cmake","path":"cmake","contentType":"directory"},{"name":"contrib","path":"contrib","contentType":"directory"},{"name":"docker","path":"docker","contentType":"directory"},{"name":"docs","path":"docs","contentType":"directory"},{"name":"packages","path":"packages","contentType":"directory"},{"name":"programs","path":"programs","contentType":"directory"},{"name":"src","path":"src","contentType":"directory"},{"name":"tests","path":"tests","contentType":"directory"},{"name":"utils","path":"utils","contentType":"directory"},{"name":"website","path":"website","contentType":"directory"},{"name":".clang-format","path":".clang-format","contentType":"file"},{"name":".clang-tidy","path":".clang-tidy","contentType":"file"},{"name":".editorconfig","path":".editorconfig","contentType":"file"},{"name":".gitattributes","path":".gitattributes","contentType":"file"},{"name":".gitignore","path":".gitignore","contentType":"file"},{"name":".gitmodules","path":".gitmodules","contentType":"file"},{"name":".pylintrc","path":".pylintrc","contentType":"file"},{"name":".vimrc","path":".vimrc","contentType":"file"},{"name":".yamllint","path":".yamllint","contentType":"file"},{"name":"AUTHORS","path":"AUTHORS","contentType":"file"},{"name":"CHANGELOG.md","path":"CHANGELOG.md","contentType":"file"},{"name":"CMakeLists.txt","path":"CMakeLists.txt","contentType":"file"},{"name":"CODE_OF_CONDUCT.md","path":"CODE_OF_CONDUCT.md","contentType":"file"},{"name":"CONTRIBUTING.md","path":"CONTRIBUTING.md","contentType":"file"},{"name":"LICENSE","path":"LICENSE","contentType":"file"},{"name":"PreLoad.cmake","path":"PreLoad.cmake","contentType":"file"},{"name":"README.md","path":"README.md","contentType":"file"},{"name":"SECURITY.md","path":"SECURITY.md","contentType":"file"},{"name":"format_sources","path":"format_sources","contentType":"file"}],"totalCount":32}},"fileTreeProcessingTime":8.697185,"foldersToFetch":[],"reducedMotionEnabled":"system","repo":{"id":60246359,"defaultBranch":"master","name":"ClickHouse","ownerLogin":"ClickHouse","currentUserCanPush":true,"isFork":false,"isEmpty":false,"createdAt":"2016-06-02T04:28:18.000-04:00","ownerAvatar":"https://avatars.githubusercontent.com/u/54801242?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"5f18640215159ed1ad50be1efce2cb996a49fd73","listCacheKey":"v0:1688403108.0","canEdit":false,"refType":"tree","currentOid":"5f18640215159ed1ad50be1efce2cb996a49fd73"},"path":"docs/_description_templates/template-system-table.md","currentUser":{"id":25182304,"login":"DanRoscigno","userEmail":"dan@roscigno.com"},"blob":{"rawBlob":null,"colorizedLines":null,"stylingDirectives":null,"csv":null,"csvError":null,"dependabotInfo":{"showConfigurationBanner":null,"configFilePath":null,"networkDependabotPath":"/ClickHouse/ClickHouse/network/updates","dismissConfigurationNoticePath":"/settings/dismiss-notice/dependabot_configuration_notice","configurationNoticeDismissed":false,"repoAlertsPath":"/ClickHouse/ClickHouse/security/dependabot","repoSecurityAndAnalysisPath":"/ClickHouse/ClickHouse/settings/security_analysis","repoOwnerIsOrg":true,"currentUserCanAdminRepo":false},"displayName":"template-system-table.md","displayUrl":"https://github.com/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-system-table.md?raw=true","headerInfo":{"blobSize":"381 Bytes","deleteInfo":{"deletePath":null,"deleteTooltip":"You must be on a branch to make or propose changes to this file"},"editInfo":{"editTooltip":"You must be on a branch to make or propose changes to this file"},"ghDesktopPath":null,"gitLfsPath":null,"onBranch":false,"shortPath":"f2decc4","siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FClickHouse%2FClickHouse%2Fblob%2F5f18640215159ed1ad50be1efce2cb996a49fd73%2Fdocs%2F_description_templates%2Ftemplate-system-table.md","isCSV":false,"isRichtext":true,"toc":[{"level":1,"text":"system.table_name {#system-tables_table-name}","anchor":"systemtable_name-system-tables_table-name","htmlText":"system.table_name {#system-tables_table-name}"}],"lineInfo":{"truncatedLoc":"25","truncatedSloc":"15"},"mode":"file"},"image":false,"isCodeownersFile":null,"isValidLegacyIssueTemplate":false,"issueTemplateHelpUrl":"https://docs.github.com/articles/about-issue-and-pull-request-templates","issueTemplate":null,"discussionTemplate":null,"language":"Markdown","large":false,"loggedIn":true,"newDiscussionPath":"/ClickHouse/ClickHouse/discussions/new","newIssuePath":"/ClickHouse/ClickHouse/issues/new","planSupportInfo":{"repoIsFork":null,"repoOwnedByCurrentUser":null,"requestFullPath":"/ClickHouse/ClickHouse/blob/5f18640215159ed1ad50be1efce2cb996a49fd73/docs/_description_templates/template-system-table.md","showFreeOrgGatedFeatureMessage":null,"showPlanSupportBanner":null,"upgradeDataAttributes":null,"upgradePath":null},"publishBannersInfo":{"dismissActionNoticePath":"/settings/dismiss-notice/publish_action_from_dockerfile","dismissStackNoticePath":"/settings/dismiss-notice/publish_stack_from_file","releasePath":"/ClickHouse/ClickHouse/releases/new?marketplace=true","showPublishActionBanner":false,"showPublishStackBanner":false},"renderImageOrRaw":false,"richText":"

system.table_name {#system-tables_table-name}

\n

Description.

\n

Columns:

\n\n

Example

\n

Query:

\n
SELECT * FROM system.table_name
\n

Result:

\n
Some output. It shouldn't be too long.\n
\n

See Also

\n
    \n
  • Article name — Some words about referenced information.
  • \n
\n
","renderedFileInfo":null,"tabSize":8,"topBannersInfo":{"overridingGlobalFundingFile":false,"globalPreferredFundingPath":null,"repoOwner":"ClickHouse","repoName":"ClickHouse","showInvalidCitationWarning":false,"citationHelpUrl":"https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-on-github/about-citation-files","showDependabotConfigurationBanner":null,"actionsOnboardingTip":null},"truncated":false,"viewable":true,"workflowRedirectUrl":null,"symbols":{"timedOut":false,"notAnalyzed":true,"symbols":[]}},"csrf_tokens":{"/ClickHouse/ClickHouse/branches":{"post":"9K5xHq3WERnrVDYaCTfS2yzVWybDSHIH0WqtxpxdRGaJUn9ffSZsyvtecMTgBPUdDKOj419HV6AM0YDO1eB3pA"}}},"title":"ClickHouse/docs/_description_templates/template-system-table.md at 5f18640215159ed1ad50be1efce2cb996a49fd73 · ClickHouse/ClickHouse","locale":"en"} \ No newline at end of file +# system.table_name {#system-tables_table-name} + +Description. + +Columns: + +- `column_name` ([data_type_name](path/to/data_type.md)) — Description. + +**Example** + +Query: + +``` sql +SELECT * FROM system.table_name +``` + +Result: + +``` text +Some output. It shouldn't be too long. +``` + +**See Also** + +- [Article name](path/to/article_name.md) — Some words about referenced information. From 9660291ab312ad0639ffc3058ad28015d76d08aa Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 3 Jul 2023 18:33:59 +0000 Subject: [PATCH 1136/1997] my fixes --- .../functions/array-functions.md | 2 +- src/Functions/array/arrayJaccardIndex.cpp | 9 ++-- .../0_stateless/02737_arrayJaccardIndex.sql | 41 +++++++------------ 3 files changed, 21 insertions(+), 31 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 921e9765080..862ecc42158 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -998,7 +998,7 @@ SELECT ## arrayJaccardIndex -Returns the jaccard similarity between two arrays. +Returns the [Jaccard index](https://en.wikipedia.org/wiki/Jaccard_index) of two arrays. **Example** diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp index 211680092b3..078687a6431 100644 --- a/src/Functions/array/arrayJaccardIndex.cpp +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -37,6 +37,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ + // XXX {"array_1", [](const IDataType & type) { return isArray(type.getPtr()); }, nullptr, "Array"}, {"array_2", [](const IDataType & type) { return isArray(type.getPtr()); }, nullptr, "Array"}, }; @@ -45,7 +46,7 @@ public: } template - static inline void getArraySize(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, size_t & left_size, size_t & right_size, const size_t & i) + static void getArraySize(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, size_t & left_size, size_t & right_size, const size_t & i) { if constexpr (is_const_left) left_size = left_offsets[0]; @@ -58,7 +59,7 @@ public: } template - static inline void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + static void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) { size_t left_size; size_t right_size; @@ -71,14 +72,14 @@ public: } template - static inline void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + static void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) { size_t left_size; size_t right_size; for (size_t i = 0; i < res.size(); ++i) { getArraySize(left_offsets, right_offsets, left_size, right_size, i); - if (unlikely(!left_size && !right_size)) + if ((!left_size && !right_size)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "array aggregate functions cannot be performed on two empty arrays"); res[i] = 0; } diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex.sql b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql index 000106e93b7..ba5a93f1658 100644 --- a/tests/queries/0_stateless/02737_arrayJaccardIndex.sql +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql @@ -1,31 +1,20 @@ -drop table if exists array_jaccard_index; +SELECT ['a'] AS arr_1, ['a', 'aa', 'aaa'] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); +SELECT [1, 1.1, 2.2] AS arr_1, [2.2, 3.3, 444] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); +SELECT [toUInt16(1)] AS arr_1, [toUInt32(1)] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); +SELECT [1,2] AS arr_1, [1,2,3,4] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); +SELECT [[1,2], [3,4]] AS arr_1, [[1,2], [3,5]] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); -create table array_jaccard_index (arr Array(UInt8)) engine = MergeTree order by arr; +DROP TABLE IF EXISTS array_jaccard_index; -insert into array_jaccard_index values ([1,2,3]); +CREATE TABLE array_jaccard_index (arr Array(UInt8)) engine = MergeTree ORDER BY arr; +INSERT INTO array_jaccard_index values ([1,2,3]); +INSERT INTO array_jaccard_index values ([1,2]); +INSERT INTO array_jaccard_index values ([1]); -insert into array_jaccard_index values ([1,2]); - -insert into array_jaccard_index values ([1]); - -select arr as arr_1, [1,2] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; - -select arr as arr_1, [] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; - -select [] as arr_1, arr as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; - -select [1,2] as arr_1, arr as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; - -select arr as arr_1, arr as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; +SELECT arr AS arr_1, [1,2] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; +SELECT arr AS arr_1, [] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; +SELECT [] AS arr_1, arr AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; +SELECT [1,2] AS arr_1, arr AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; +SELECT arr AS arr_1, arr AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; drop table array_jaccard_index; - -select ['a'] as arr_1, ['a', 'aa', 'aaa'] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); - -select [1, 1.1, 2.2] as arr_1, [2.2, 3.3, 444] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); - -select [toUInt16(1)] as arr_1, [toUInt32(1)] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); - -select [1,2] as arr_1, [1,2,3,4] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); - -select [[1,2], [3,4]] as arr_1, [[1,2], [3,5]] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); From 29f93bd06dc7cb7ba6a768644bc5a0eda79126d4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 3 Jul 2023 20:25:51 +0000 Subject: [PATCH 1137/1997] More fixes, pt. II --- src/DataTypes/IDataType.h | 34 +++-- src/Functions/array/arrayJaccardIndex.cpp | 137 ++++++++++-------- .../02737_arrayJaccardIndex.reference | 19 ++- .../0_stateless/02737_arrayJaccardIndex.sql | 32 ++-- ...2737_arrayJaccardIndex_exception.reference | 2 - .../02737_arrayJaccardIndex_exception.sh | 11 -- 6 files changed, 126 insertions(+), 109 deletions(-) delete mode 100644 tests/queries/0_stateless/02737_arrayJaccardIndex_exception.reference delete mode 100755 tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index bfc4a71083d..4adafe5d212 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -410,21 +410,29 @@ inline bool isDateTime(const T & data_type) { return WhichDataType(data_type).is template inline bool isDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTime64(); } -inline bool isEnum(const DataTypePtr & data_type) { return WhichDataType(data_type).isEnum(); } -inline bool isDecimal(const DataTypePtr & data_type) { return WhichDataType(data_type).isDecimal(); } -inline bool isTuple(const DataTypePtr & data_type) { return WhichDataType(data_type).isTuple(); } -inline bool isArray(const DataTypePtr & data_type) { return WhichDataType(data_type).isArray(); } -inline bool isMap(const DataTypePtr & data_type) {return WhichDataType(data_type).isMap(); } -inline bool isInterval(const DataTypePtr & data_type) {return WhichDataType(data_type).isInterval(); } -inline bool isNothing(const DataTypePtr & data_type) { return WhichDataType(data_type).isNothing(); } -inline bool isUUID(const DataTypePtr & data_type) { return WhichDataType(data_type).isUUID(); } -inline bool isIPv4(const DataTypePtr & data_type) { return WhichDataType(data_type).isIPv4(); } -inline bool isIPv6(const DataTypePtr & data_type) { return WhichDataType(data_type).isIPv6(); } +template +inline bool isEnum(const T & data_type) { return WhichDataType(data_type).isEnum(); } +template +inline bool isDecimal(const T & data_type) { return WhichDataType(data_type).isDecimal(); } +template +inline bool isTuple(const T & data_type) { return WhichDataType(data_type).isTuple(); } +template +inline bool isArray(const T & data_type) { return WhichDataType(data_type).isArray(); } +template +inline bool isMap(const T & data_type) {return WhichDataType(data_type).isMap(); } +template +inline bool isInterval(const T & data_type) {return WhichDataType(data_type).isInterval(); } +template +inline bool isNothing(const T & data_type) { return WhichDataType(data_type).isNothing(); } +template +inline bool isUUID(const T & data_type) { return WhichDataType(data_type).isUUID(); } +template +inline bool isIPv4(const T & data_type) { return WhichDataType(data_type).isIPv4(); } +template +inline bool isIPv6(const T & data_type) { return WhichDataType(data_type).isIPv6(); } template -inline bool isObject(const T & data_type) -{ - return WhichDataType(data_type).isObject(); +inline bool isObject(const T & data_type) { return WhichDataType(data_type).isObject(); } template diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp index 078687a6431..755e0f8278f 100644 --- a/src/Functions/array/arrayJaccardIndex.cpp +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -24,8 +24,58 @@ namespace ErrorCodes class FunctionArrayJaccardIndex : public IFunction { -public: +private: using ResultType = Float64; + + struct LeftAndRightSizes + { + size_t left_size; + size_t right_size; + }; + + template + static LeftAndRightSizes getArraySizes(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, size_t i) + { + size_t left_size; + size_t right_size; + + if constexpr (left_is_const) + left_size = left_offsets[0]; + else + left_size = left_offsets[i] - left_offsets[i - 1]; + + if constexpr (right_is_const) + right_size = right_offsets[0]; + else + right_size = right_offsets[i] - right_offsets[i - 1]; + + return {left_size, right_size}; + } + + template + static void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + { + for (size_t i = 0; i < res.size(); ++i) + { + LeftAndRightSizes sizes = getArraySizes(left_offsets, right_offsets, i); + size_t intersect_size = intersect_offsets[i] - intersect_offsets[i - 1]; + res[i] = static_cast(intersect_size) / (sizes.left_size + sizes.right_size - intersect_size); + } + } + + template + static void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + { + for (size_t i = 0; i < res.size(); ++i) + { + LeftAndRightSizes sizes = getArraySizes(left_offsets, right_offsets, i); + if (sizes.left_size == 0 && sizes.right_size == 0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "array aggregate functions cannot be performed on two empty arrays"); + res[i] = 0; + } + } + +public: static constexpr auto name = "arrayJaccardIndex"; String getName() const override { return name; } static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } @@ -37,100 +87,59 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - // XXX - {"array_1", [](const IDataType & type) { return isArray(type.getPtr()); }, nullptr, "Array"}, - {"array_2", [](const IDataType & type) { return isArray(type.getPtr()); }, nullptr, "Array"}, + {"array_1", &isArray, nullptr, "Array"}, + {"array_2", &isArray, nullptr, "Array"}, }; validateFunctionArgumentTypes(*this, arguments, args); return std::make_shared>(); } - template - static void getArraySize(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, size_t & left_size, size_t & right_size, const size_t & i) - { - if constexpr (is_const_left) - left_size = left_offsets[0]; - else - left_size = left_offsets[i] - left_offsets[i - 1]; - if constexpr (is_const_right) - right_size = right_offsets[0]; - else - right_size = right_offsets[i] - right_offsets[i - 1]; - } - - template - static void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) - { - size_t left_size; - size_t right_size; - for (size_t i = 0; i < res.size(); ++i) - { - getArraySize(left_offsets, right_offsets, left_size, right_size, i); - size_t intersect_size = intersect_offsets[i] - intersect_offsets[i - 1]; - res[i] = static_cast(intersect_size) / (left_size + right_size - intersect_size); - } - } - - template - static void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) - { - size_t left_size; - size_t right_size; - for (size_t i = 0; i < res.size(); ++i) - { - getArraySize(left_offsets, right_offsets, left_size, right_size, i); - if ((!left_size && !right_size)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "array aggregate functions cannot be performed on two empty arrays"); - res[i] = 0; - } - } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - auto cast_array = [&](const ColumnWithTypeAndName & col) -> std::pair + auto cast_to_array = [&](const ColumnWithTypeAndName & col) -> std::pair { - const ColumnArray * res; - bool is_const = false; if (const ColumnConst * col_const = typeid_cast(col.column.get())) { - res = checkAndGetColumn(col_const->getDataColumnPtr().get()); - is_const = true; + const ColumnArray * col_const_array = checkAndGetColumn(col_const->getDataColumnPtr().get()); + return {col_const_array, true}; } - else if (!(res = checkAndGetColumn(col.column.get()))) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument for function {} must be array but it has type {}.", - col.column->getName(), getName()); - return {res, is_const}; + else if (const ColumnArray * col_non_const_array = checkAndGetColumn(col.column.get())) + return {col_non_const_array, false}; + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument for function {} must be array but it has type {}.", col.column->getName(), getName()); }; - const auto & [left_array, is_const_left] = cast_array(arguments[0]); - const auto & [right_array, is_const_right] = cast_array(arguments[1]); + const auto & [left_array, left_is_const] = cast_to_array(arguments[0]); + const auto & [right_array, right_is_const] = cast_to_array(arguments[1]); auto intersect_array = FunctionFactory::instance().get("arrayIntersect", context)->build(arguments); + ColumnWithTypeAndName intersect_column; intersect_column.type = intersect_array->getResultType(); intersect_column.column = intersect_array->execute(arguments, intersect_column.type, input_rows_count); - const auto * return_type_intersect = checkAndGetDataType(intersect_column.type.get()); - if (!return_type_intersect) + + const auto * intersect_column_type = checkAndGetDataType(intersect_column.type.get()); + if (!intersect_column_type) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected return type for function arrayIntersect"); auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_res = col_res->getData(); vec_res.resize(input_rows_count); -#define EXECUTE_VECTOR(is_const_left, is_const_right) \ - if (typeid_cast(return_type_intersect->getNestedType().get())) \ - vectorWithEmptyIntersect(left_array->getOffsets(), right_array->getOffsets(), vec_res); \ +#define EXECUTE_VECTOR(left_is_const, right_is_const) \ + if (typeid_cast(intersect_column_type->getNestedType().get())) \ + vectorWithEmptyIntersect(left_array->getOffsets(), right_array->getOffsets(), vec_res); \ else \ { \ const ColumnArray * intersect_column_array = checkAndGetColumn(intersect_column.column.get()); \ - vector(intersect_column_array->getOffsets(), left_array->getOffsets(), right_array->getOffsets(), vec_res); \ + vector(intersect_column_array->getOffsets(), left_array->getOffsets(), right_array->getOffsets(), vec_res); \ } - if (!is_const_left && !is_const_right) + if (!left_is_const && !right_is_const) EXECUTE_VECTOR(false, false) - else if (!is_const_left && is_const_right) + else if (!left_is_const && right_is_const) EXECUTE_VECTOR(false, true) - else if (is_const_left && !is_const_right) + else if (left_is_const && !right_is_const) EXECUTE_VECTOR(true, false) else EXECUTE_VECTOR(true, true) diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex.reference b/tests/queries/0_stateless/02737_arrayJaccardIndex.reference index 0b7969889c0..62a51ec0ab2 100644 --- a/tests/queries/0_stateless/02737_arrayJaccardIndex.reference +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex.reference @@ -1,20 +1,23 @@ +negative tests +const arguments +[1,2] [1,2,3,4] 0.5 +[1,1.1,2.2] [2.2,3.3,444] 0.2 +[1] [1] 1 +['a'] ['a','aa','aaa'] 0.33 +[[1,2],[3,4]] [[1,2],[3,5]] 0.33 +non-const arguments [1] [1,2] 0.5 [1,2] [1,2] 1 [1,2,3] [1,2] 0.67 [1] [] 0 [1,2] [] 0 [1,2,3] [] 0 -[] [1] 0 -[] [1,2] 0 -[] [1,2,3] 0 [1,2] [1] 0.5 [1,2] [1,2] 1 [1,2] [1,2,3] 0.67 +[] [1] 0 +[] [1,2] 0 +[] [1,2,3] 0 [1] [1] 1 [1,2] [1,2] 1 [1,2,3] [1,2,3] 1 -['a'] ['a','aa','aaa'] 0.33 -[1,1.1,2.2] [2.2,3.3,444] 0.2 -[1] [1] 1 -[1,2] [1,2,3,4] 0.5 -[[1,2],[3,4]] [[1,2],[3,5]] 0.33 diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex.sql b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql index ba5a93f1658..499debd94b7 100644 --- a/tests/queries/0_stateless/02737_arrayJaccardIndex.sql +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql @@ -1,8 +1,18 @@ -SELECT ['a'] AS arr_1, ['a', 'aa', 'aaa'] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); -SELECT [1, 1.1, 2.2] AS arr_1, [2.2, 3.3, 444] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); -SELECT [toUInt16(1)] AS arr_1, [toUInt32(1)] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); -SELECT [1,2] AS arr_1, [1,2,3,4] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); -SELECT [[1,2], [3,4]] AS arr_1, [[1,2], [3,5]] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); +SELECT 'negative tests'; + +SELECT 'a' AS arr1, 2 AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT [] AS arr1, [] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT ['1', '2'] AS arr1, [1,2] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); -- { serverError NO_COMMON_TYPE } + +SELECT 'const arguments'; + +SELECT [1,2] AS arr1, [1,2,3,4] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); +SELECT [1, 1.1, 2.2] AS arr1, [2.2, 3.3, 444] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); +SELECT [toUInt16(1)] AS arr1, [toUInt32(1)] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); +SELECT ['a'] AS arr1, ['a', 'aa', 'aaa'] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); +SELECT [[1,2], [3,4]] AS arr1, [[1,2], [3,5]] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); + +SELECT 'non-const arguments'; DROP TABLE IF EXISTS array_jaccard_index; @@ -11,10 +21,10 @@ INSERT INTO array_jaccard_index values ([1,2,3]); INSERT INTO array_jaccard_index values ([1,2]); INSERT INTO array_jaccard_index values ([1]); -SELECT arr AS arr_1, [1,2] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; -SELECT arr AS arr_1, [] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; -SELECT [] AS arr_1, arr AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; -SELECT [1,2] AS arr_1, arr AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; -SELECT arr AS arr_1, arr AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; +SELECT arr, [1,2] AS other, round(arrayJaccardIndex(arr, other), 2) FROM array_jaccard_index ORDER BY arr; +SELECT arr, [] AS other, round(arrayJaccardIndex(arr, other), 2) FROM array_jaccard_index ORDER BY arr; +SELECT [1,2] AS other, arr, round(arrayJaccardIndex(other, arr), 2) FROM array_jaccard_index ORDER BY arr; +SELECT [] AS other, arr, round(arrayJaccardIndex(other, arr), 2) FROM array_jaccard_index ORDER BY arr; +SELECT arr, arr, round(arrayJaccardIndex(arr, arr), 2) FROM array_jaccard_index ORDER BY arr; -drop table array_jaccard_index; +DROP TABLE array_jaccard_index; diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.reference b/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.reference deleted file mode 100644 index 307d9a195b0..00000000000 --- a/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.reference +++ /dev/null @@ -1,2 +0,0 @@ -Code: 43 -Code: 386 diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh b/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh deleted file mode 100755 index 49e80e06cba..00000000000 --- a/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: array aggregate functions cannot be performed on two empty arrays: While processing arrayJaccardIndex([], []). (ILLEGAL_TYPE_OF_ARGUMENT) -$CLICKHOUSE_CLIENT -q "SELECT arrayJaccardIndex([], [])" |& grep -o "Code: 43" - -# Code: 386. DB::Exception: Received from localhost:9000. DB::Exception: There is no subtype for types UInt8, String because some of them are String/FixedString and some of them are not: While processing [1, 2] AS arr_1, ['1', '2'] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2). (NO_COMMON_TYPE) -$CLICKHOUSE_CLIENT -q "select [1,2] as arr_1, ['1','2'] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2)" |& grep -o "Code: 386" From ec5c9a013744baee5f3e501eadbb98cd596b73e0 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 3 Jul 2023 23:43:38 +0300 Subject: [PATCH 1138/1997] Revert "Fix: Invalid number of rows in Chunk column Object" --- src/Processors/QueryPlan/AggregatingStep.cpp | 2 -- .../0_stateless/02789_object_type_invalid_num_of_rows.reference | 1 - .../0_stateless/02789_object_type_invalid_num_of_rows.sql | 2 -- 3 files changed, 5 deletions(-) delete mode 100644 tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference delete mode 100644 tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index eebbfc04304..4ac972e2a79 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -319,8 +319,6 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B { auto column_with_default = col.column->cloneEmpty(); col.type->insertDefaultInto(*column_with_default); - column_with_default->finalize(); - auto column = ColumnConst::create(std::move(column_with_default), 0); const auto * node = &dag->addColumn({ColumnPtr(std::move(column)), col.type, col.name}); node = &dag->materializeNode(*node); diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference deleted file mode 100644 index 7dec35f7acb..00000000000 --- a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference +++ /dev/null @@ -1 +0,0 @@ -0.02 diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql deleted file mode 100644 index a9c8a844aa0..00000000000 --- a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql +++ /dev/null @@ -1,2 +0,0 @@ -set allow_experimental_object_type=1; -SELECT '0.02' GROUP BY GROUPING SETS (('6553.6'), (CAST('{"x" : 1}', 'Object(\'json\')'))) settings max_threads=1; -- { serverError NOT_IMPLEMENTED } From 29200341addefeeece3a437a740cacf249f35a61 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 3 Jul 2023 21:05:13 +0000 Subject: [PATCH 1139/1997] Add SonarCloud to README --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index eac036c2d9b..a1147f81077 100644 --- a/README.md +++ b/README.md @@ -16,8 +16,9 @@ curl https://clickhouse.com/ | sh * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format. * [Slack](https://clickhouse.com/slack) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time. * [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events. -* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation. -* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev. +* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlighting and navigation. +* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlighting, powered by github.dev. +* [Static Analysis (SonarCloud)](https://sonarcloud.io/project/issues?resolved=false&id=ClickHouse_ClickHouse) proposes C++ quality improvements. * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any. ## Upcoming Events From d263b6bf1739f4cd3431c469e60643e29dd10fe7 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 3 Jul 2023 21:17:13 +0000 Subject: [PATCH 1140/1997] Docs: Mention homebrew as an alternative and non-production install method --- docs/en/getting-started/install.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index d44dc861888..d2e7ab30478 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -378,6 +378,10 @@ request](https://github.com/ClickHouse/ClickHouse/commits/master) and find CI ch https://s3.amazonaws.com/clickhouse/builds/PRs/.../.../binary_aarch64_v80compat/clickhouse". You can then click the link to download the build. +### macOS-only: Install with Homebrew + +To install ClickHouse using the popular `brew` package manager, follow the instructions listed in the [ClickHouse Homebrew tap](https://github.com/ClickHouse/homebrew-clickhouse). + ## Launch {#launch} To start the server as a daemon, run: From 6742432fd2801380350df489dd882ab538598a7e Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 3 Jul 2023 21:58:56 +0000 Subject: [PATCH 1141/1997] Number of bucket always increased by 2, so there is no reason provide it as parameter --- src/Interpreters/GraceHashJoin.cpp | 20 +++++++++----------- src/Interpreters/GraceHashJoin.h | 2 +- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 4bfe0315138..f5b2386fd1e 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -356,16 +356,16 @@ bool GraceHashJoin::hasMemoryOverflow(const InMemoryJoinPtr & hash_join_) const return hasMemoryOverflow(total_rows, total_bytes); } -GraceHashJoin::Buckets GraceHashJoin::rehashBuckets(size_t to_size) +GraceHashJoin::Buckets GraceHashJoin::rehashBuckets() { std::unique_lock lock(rehash_mutex); + + if (!isPowerOf2(buckets.size())) [[unlikely]] + throw Exception(ErrorCodes::LOGICAL_ERROR, "Number of buckets should be power of 2 but it's {}", buckets.size()); + + const size_t to_size = buckets.size() * 2; size_t current_size = buckets.size(); - if (to_size <= current_size) - return buckets; - - chassert(isPowerOf2(to_size)); - if (to_size > max_num_buckets) { throw Exception(ErrorCodes::LIMIT_EXCEEDED, @@ -623,6 +623,8 @@ Block GraceHashJoin::prepareRightBlock(const Block & block) void GraceHashJoin::addJoinedBlockImpl(Block block) { + LOG_ERROR(&Poco::Logger::get(__PRETTY_FUNCTION__), ""); + block = prepareRightBlock(block); Buckets buckets_snapshot = getCurrentBuckets(); size_t bucket_index = current_bucket->idx; @@ -638,10 +640,6 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) if (current_block.rows() > 0) { std::lock_guard lock(hash_join_mutex); - if (!isPowerOf2(buckets_snapshot.size())) [[unlikely]] - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Broken buckets. its size({}) is not power of 2", buckets_snapshot.size()); - } if (!hash_join) hash_join = makeInMemoryJoin(); @@ -653,7 +651,7 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) current_block = {}; // Must use the latest buckets snapshot in case that it has been rehashed by other threads. - buckets_snapshot = rehashBuckets(buckets_snapshot.size() * 2); + buckets_snapshot = rehashBuckets(); auto right_blocks = hash_join->releaseJoinedBlocks(/* restructure */ false); hash_join = nullptr; diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h index b8d83f4cad0..fd3397ba15e 100644 --- a/src/Interpreters/GraceHashJoin.h +++ b/src/Interpreters/GraceHashJoin.h @@ -109,7 +109,7 @@ private: /// /// NB: after @rehashBuckets there may be rows that are written to the buckets that they do not belong to. /// It is fine; these rows will be written to the corresponding buckets during the third stage. - Buckets rehashBuckets(size_t to_size); + Buckets rehashBuckets(); /// Perform some bookkeeping after all calls to @joinBlock. void startReadingDelayedBlocks(); From 2e245f4a438544f2270ef2be9336c8fb6e4aa6ed Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 4 Jul 2023 00:23:00 +0200 Subject: [PATCH 1142/1997] Even better --- src/Core/Field.h | 58 +- .../fuzzers/codegen_fuzzer/CMakeLists.txt | 2 +- .../fuzzers/codegen_fuzzer/clickhouse.g | 1592 ++++ src/Parsers/fuzzers/codegen_fuzzer/out.cpp | 6461 +++++++++++++++++ src/Parsers/fuzzers/codegen_fuzzer/out.proto | 1587 ++++ 5 files changed, 9686 insertions(+), 14 deletions(-) create mode 100644 src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g create mode 100644 src/Parsers/fuzzers/codegen_fuzzer/out.cpp create mode 100644 src/Parsers/fuzzers/codegen_fuzzer/out.proto diff --git a/src/Core/Field.h b/src/Core/Field.h index 0b3c5b7f48d..97a32ab5bb1 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -42,10 +42,13 @@ using FieldVector = std::vector>; /// construct a Field of Array or a Tuple type. An alternative approach would be /// to construct both of these types from FieldVector, and have the caller /// specify the desired Field type explicitly. +/// As the result stack overflow on destruction is possible +/// and to avoid it we need to count the depth and have a threshold. #define DEFINE_FIELD_VECTOR(X) \ struct X : public FieldVector \ { \ using FieldVector::FieldVector; \ + size_t nested_field_depth = 0; \ } DEFINE_FIELD_VECTOR(Array); @@ -62,6 +65,7 @@ using FieldMap = std::map, AllocatorWithMemoryTrackin struct X : public FieldMap \ { \ using FieldMap::FieldMap; \ + size_t nested_field_depth = 0; \ } DEFINE_FIELD_MAP(Object); @@ -677,21 +681,43 @@ private: Types::Which which; - /// Field may contain a Field inside in case when Field stores Array, Tuple, Map or Object. - /// As the result stack overflow on destruction is possible - /// and to avoid it we need to count the depth and have a threshold. - size_t nested_field_depth = 0; - - /// Check whether T is already a Field with composite underlying type. + /// StorageType and Original are the same for Array, Tuple, Map, Object template size_t calculateAndCheckFieldDepth(Original && x) { size_t result = 0; - if constexpr (std::is_same_v || std::is_same_v || std::is_same_v) - std::for_each(x.begin(), x.end(), [this, &result](auto & elem){ result = std::max(result, elem.nested_field_depth); }); - else if constexpr (std::is_same_v) - std::for_each(x.begin(), x.end(), [this, &result](auto & elem){ result = std::max(result, elem.second.nested_field_depth); }); + if constexpr (std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v) + { + result = x.nested_field_depth; + + auto calculate_max = [](const Field & elem, size_t result) + { + switch (elem.which) + { + case Types::Array: + return std::max(result, elem.template get().nested_field_depth); + case Types::Tuple: + return std::max(result, elem.template get().nested_field_depth); + case Types::Map: + return std::max(result, elem.template get().nested_field_depth); + case Types::Object: + return std::max(result, elem.template get().nested_field_depth); + default: + return result; + } + }; + + if constexpr (std::is_same_v) + for (auto & [_, value] : x) + result = calculate_max(value, result); + else + for (auto & value : x) + result = calculate_max(value, result); + } if (result >= DBMS_MAX_NESTED_FIELD_DEPTH) throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "Too deep Field"); @@ -711,9 +737,17 @@ private: // we must initialize the entire wide stored type, and not just the // nominal type. using StorageType = NearestFieldType; + /// Incrementing the depth since we create a new Field. - nested_field_depth = calculateAndCheckFieldDepth(x) + 1; + auto depth = calculateAndCheckFieldDepth(x) + 1; new (&storage) StorageType(std::forward(x)); + + if constexpr (std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v) + reinterpret_cast(&storage)->nested_field_depth = depth + 1; + which = TypeToEnum::value; } @@ -724,8 +758,6 @@ private: using JustT = std::decay_t; assert(which == TypeToEnum::value); JustT * MAY_ALIAS ptr = reinterpret_cast(&storage); - /// Do not increment the depth, because it is an assignment. - nested_field_depth = calculateAndCheckFieldDepth(x); *ptr = std::forward(x); } diff --git a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt index 727c49cfc4d..30f0e91a75b 100644 --- a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt +++ b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt @@ -42,4 +42,4 @@ clickhouse_add_executable(codegen_select_fuzzer ${FUZZER_SRCS}) set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier") target_include_directories(codegen_select_fuzzer SYSTEM BEFORE PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") -target_link_libraries(codegen_select_fuzzer PRIVATE ch_contrib::protobuf_mutator ch_contrib::protoc dbms ${LIB_FUZZING_ENGINE}) +target_link_libraries(codegen_select_fuzzer PRIVATE ch_contrib::protobuf ch_contrib::protobuf_mutator ch_contrib::protoc dbms ${LIB_FUZZING_ENGINE}) diff --git a/src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g b/src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g new file mode 100644 index 00000000000..0ae74055eda --- /dev/null +++ b/src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g @@ -0,0 +1,1592 @@ +" "; +" "; +" "; +";"; + + +"(" $1 ")"; +"(" $1 ", " $2 ")"; +"(" $1 ", " $2 ", " $3 ")"; + +$1 ", " $2 ; +$1 ", " $2 ", " $3 ; +$1 ", " $2 ", " $3 ", " $4 ; +$1 ", " $2 ", " $3 ", " $4 ", " $5 ; + +"[" $1 ", " $2 "]"; +"[" $1 ", " $2 ", " $3 "]"; +"[" $1 ", " $2 ", " $3 ", " $4 "]"; +"[" $1 ", " $2 ", " $3 ", " $4 ", " $5 "]"; + +$0 "(" $1 ")"; +$0 "(" $1 ", " $2 ")"; +$0 "(" $1 ", " $2 ", " $3 ")"; + +$1 " as " $2 ; + + +// TODO: add more clickhouse specific stuff +"SELECT " $1 " FROM " $2 " WHERE " $3 ; +"SELECT " $1 " FROM " $2 " GROUP BY " $3 ; +"SELECT " $1 " FROM " $2 " SORT BY " $3 ; +"SELECT " $1 " FROM " $2 " LIMIT " $3 ; +"SELECT " $1 " FROM " $2 " JOIN " $3 ; +"SELECT " $1 " FROM " $2 " ARRAY JOIN " $3 ; +"SELECT " $1 " FROM " $2 " JOIN " $3 " ON " $4 ; +"SELECT " $1 " FROM " $2 " JOIN " $3 " USING " $5 ; +"SELECT " $1 " INTO OUTFILE " $2 ; + +"WITH " $1 " AS " $2 ; + +"{" $1 ":" $2 "}"; +"[" $1 "," $2 "]"; +"[]"; + + +" x "; +"x"; +" `x` "; +"`x`"; + +" \"value\" "; +"\"value\""; +" 0 "; +"0"; +"1"; +"2"; +"123123123123123123"; +"182374019873401982734091873420923123123123123123"; +"1e-1"; +"1.1"; +"\"\""; +" '../../../../../../../../../etc/passwd' "; + +"/"; +"="; +"=="; +"!="; +"<>"; +"<"; +"<="; +">"; +">="; +"<<"; +"|<<"; +"&"; +"|"; +"||"; +"<|"; +"|>"; +"+"; +"-"; +"~"; +"*"; +"/"; +"\\"; +"%"; +""; +"."; +","; +","; +","; +","; +","; +","; +"("; +")"; +"("; +")"; +"("; +")"; +"("; +")"; +"("; +")"; +"("; +")"; +"?"; +":"; +"@"; +"@@"; +"$"; +"\""; +"`"; +"{"; +"}"; +"^"; +"::"; +"->"; +"]"; +"["; + +" abs "; +" accurate_Cast "; +" accurateCast "; +" accurate_CastOrNull "; +" accurateCastOrNull "; +" acos "; +" acosh "; +" ADD "; +" ADD COLUMN "; +" ADD CONSTRAINT "; +" addDays "; +" addHours "; +" ADD INDEX "; +" addMinutes "; +" addMonths "; +" addQuarters "; +" addressToLine "; +" addressToSymbol "; +" addSeconds "; +" addWeeks "; +" addYears "; +" aes_decrypt_mysql "; +" aes_encrypt_mysql "; +" AFTER "; +" AggregateFunction "; +" aggThrow "; +" ALIAS "; +" ALL "; +" alphaTokens "; +" ALTER "; +" ALTER LIVE VIEW "; +" ALTER TABLE "; +" and "; +" AND "; +" ANTI "; +" any "; +" ANY "; +" anyHeavy "; +" anyLast "; +" appendTrailingCharIfAbsent "; +" argMax "; +" argMin "; +" array "; +" Array "; +" ARRAY "; +" arrayAll "; +" arrayAUC "; +" arrayAvg "; +" arrayCompact "; +" arrayConcat "; +" arrayCount "; +" arrayCumSum "; +" arrayCumSumNonNegative "; +" arrayDifference "; +" arrayDistinct "; +" arrayElement "; +" arrayEnumerate "; +" arrayEnumerateDense "; +" arrayEnumerateDenseRanked "; +" arrayEnumerateUniq "; +" arrayEnumerateUniqRanked "; +" arrayExists "; +" arrayFill "; +" arrayFilter "; +" arrayFirst "; +" arrayFirstIndex "; +" arrayFlatten "; +" arrayIntersect "; +" arrayJoin "; +" ARRAY JOIN "; +" arrayMap "; +" arrayMax "; +" arrayMin "; +" arrayPartialReverseSort "; +" arrayPartialShuffle "; +" arrayPartialSort "; +" arrayPopBack "; +" arrayPopFront "; +" arrayProduct "; +" arrayPushBack "; +" arrayPushFront "; +" arrayReduce "; +" arrayReduceInRanges "; +" arrayResize "; +" arrayReverse "; +" arrayReverseFill "; +" arrayReverseSort "; +" arrayReverseSplit "; +" arrayShuffle "; +" arraySlice "; +" arraySort "; +" arraySplit "; +" arrayStringConcat "; +" arraySum "; +" arrayUniq "; +" arrayWithConstant "; +" arrayZip "; +" AS "; +" ASC "; +" ASCENDING "; +" asin "; +" asinh "; +" ASOF "; +" assumeNotNull "; +" AST "; +" ASYNC "; +" atan "; +" atan2 "; +" atanh "; +" ATTACH "; +" ATTACH PART "; +" ATTACH PARTITION "; +" avg "; +" avgWeighted "; +" bar "; +" base64Decode "; +" base64Encode "; +" basename "; +" bayesAB "; +" BETWEEN "; +" BIGINT "; +" BIGINT SIGNED "; +" BIGINT UNSIGNED "; +" bin "; +" BINARY "; +" BINARY LARGE OBJECT "; +" BINARY VARYING "; +" bitAnd "; +" BIT_AND "; +" __bitBoolMaskAnd "; +" __bitBoolMaskOr "; +" bitCount "; +" bitHammingDistance "; +" bitmapAnd "; +" bitmapAndCardinality "; +" bitmapAndnot "; +" bitmapAndnotCardinality "; +" bitmapBuild "; +" bitmapCardinality "; +" bitmapContains "; +" bitmapHasAll "; +" bitmapHasAny "; +" bitmapMax "; +" bitmapMin "; +" bitmapOr "; +" bitmapOrCardinality "; +" bitmapSubsetInRange "; +" bitmapSubsetLimit "; +" bitmapToArray "; +" bitmapTransform "; +" bitmapXor "; +" bitmapXorCardinality "; +" bitmaskToArray "; +" bitmaskToList "; +" bitNot "; +" bitOr "; +" BIT_OR "; +" bitPositionsToArray "; +" bitRotateLeft "; +" bitRotateRight "; +" bitShiftLeft "; +" bitShiftRight "; +" __bitSwapLastTwo "; +" bitTest "; +" bitTestAll "; +" bitTestAny "; +" __bitWrapperFunc "; +" bitXor "; +" BIT_XOR "; +" BLOB "; +" blockNumber "; +" blockSerializedSize "; +" blockSize "; +" BOOL "; +" BOOLEAN "; +" BOTH "; +" boundingRatio "; +" buildId "; +" BY "; +" BYTE "; +" BYTEA "; +" byteSize "; +" CASE "; +" caseWithExpr "; +" caseWithExpression "; +" caseWithoutExpr "; +" caseWithoutExpression "; +" _CAST "; +" CAST "; +" categoricalInformationValue "; +" cbrt "; +" ceil "; +" ceiling "; +" char "; +" CHAR "; +" CHARACTER "; +" CHARACTER LARGE OBJECT "; +" CHARACTER_LENGTH "; +" CHARACTER VARYING "; +" CHAR LARGE OBJECT "; +" CHAR_LENGTH "; +" CHAR VARYING "; +" CHECK "; +" CHECK TABLE "; +" cityHash64 "; +" CLEAR "; +" CLEAR COLUMN "; +" CLEAR INDEX "; +" CLOB "; +" CLUSTER "; +" coalesce "; +" CODEC "; +" COLLATE "; +" COLUMN "; +" COLUMNS "; +" COMMENT "; +" COMMENT COLUMN "; +" concat "; +" concatAssumeInjective "; +" connection_id "; +" connectionid "; +" connectionId "; +" CONSTRAINT "; +" convertCharset "; +" corr "; +" corrStable "; +" cos "; +" cosh "; +" count "; +" countDigits "; +" countEqual "; +" countMatches "; +" countMatchesCaseInsensitive "; +" countSubstrings "; +" countSubstringsCaseInsensitive "; +" countSubstringsCaseInsensitiveUTF8 "; +" covarPop "; +" COVAR_POP "; +" covarPopStable "; +" covarSamp "; +" COVAR_SAMP "; +" covarSampStable "; +" CRC32 "; +" CRC32IEEE "; +" CRC64 "; +" CREATE "; +" CROSS "; +" CUBE "; +" currentDatabase "; +" currentProfiles "; +" currentRoles "; +" currentUser "; +" cutFragment "; +" cutIPv6 "; +" cutQueryString "; +" cutQueryStringAndFragment "; +" cutToFirstSignificantSubdomain "; +" cutToFirstSignificantSubdomainCustom "; +" cutToFirstSignificantSubdomainCustomWithWWW "; +" cutToFirstSignificantSubdomainWithWWW "; +" cutURLParameter "; +" cutWWW "; +" D "; +" DATABASE "; +" DATABASES "; +" Date "; +" DATE "; +" Date32 "; +" DATE_ADD "; +" DATEADD "; +" dateDiff "; +" DATE_DIFF "; +" DATEDIFF "; +" dateName "; +" DATE_SUB "; +" DATESUB "; +" DateTime "; +" DateTime32 "; +" DateTime64 "; +" dateTime64ToSnowflake "; +" dateTimeToSnowflake "; +" date_trunc "; +" dateTrunc "; +" DAY "; +" DAYOFMONTH "; +" DAYOFWEEK "; +" DAYOFYEAR "; +" DD "; +" DEC "; +" Decimal "; +" Decimal128 "; +" Decimal256 "; +" Decimal32 "; +" Decimal64 "; +" decodeURLComponent "; +" decodeXMLComponent "; +" decrypt "; +" DEDUPLICATE "; +" DEFAULT "; +" defaultProfiles "; +" defaultRoles "; +" defaultValueOfArgumentType "; +" defaultValueOfTypeName "; +" DELAY "; +" DELETE "; +" DELETE WHERE "; +" deltaSum "; +" deltaSumTimestamp "; +" demangle "; +" dense_rank "; +" DESC "; +" DESCENDING "; +" DESCRIBE "; +" DETACH "; +" DETACH PARTITION "; +" dictGet "; +" dictGetChildren "; +" dictGetDate "; +" dictGetDateOrDefault "; +" dictGetDateTime "; +" dictGetDateTimeOrDefault "; +" dictGetDescendants "; +" dictGetFloat32 "; +" dictGetFloat32OrDefault "; +" dictGetFloat64 "; +" dictGetFloat64OrDefault "; +" dictGetHierarchy "; +" dictGetInt16 "; +" dictGetInt16OrDefault "; +" dictGetInt32 "; +" dictGetInt32OrDefault "; +" dictGetInt64 "; +" dictGetInt64OrDefault "; +" dictGetInt8 "; +" dictGetInt8OrDefault "; +" dictGetOrDefault "; +" dictGetOrNull "; +" dictGetString "; +" dictGetStringOrDefault "; +" dictGetUInt16 "; +" dictGetUInt16OrDefault "; +" dictGetUInt32 "; +" dictGetUInt32OrDefault "; +" dictGetUInt64 "; +" dictGetUInt64OrDefault "; +" dictGetUInt8 "; +" dictGetUInt8OrDefault "; +" dictGetUUID "; +" dictGetUUIDOrDefault "; +" dictHas "; +" DICTIONARIES "; +" DICTIONARY "; +" dictIsIn "; +" DISK "; +" DISTINCT "; +" DISTRIBUTED "; +" divide "; +" domain "; +" domainWithoutWWW "; +" DOUBLE "; +" DOUBLE PRECISION "; +" DROP "; +" DROP COLUMN "; +" DROP CONSTRAINT "; +" DROP DETACHED PART "; +" DROP DETACHED PARTITION "; +" DROP INDEX "; +" DROP PARTITION "; +" dumpColumnStructure "; +" e "; +" ELSE "; +" empty "; +" emptyArrayDate "; +" emptyArrayDateTime "; +" emptyArrayFloat32 "; +" emptyArrayFloat64 "; +" emptyArrayInt16 "; +" emptyArrayInt32 "; +" emptyArrayInt64 "; +" emptyArrayInt8 "; +" emptyArrayString "; +" emptyArrayToSingle "; +" emptyArrayUInt16 "; +" emptyArrayUInt32 "; +" emptyArrayUInt64 "; +" emptyArrayUInt8 "; +" enabledProfiles "; +" enabledRoles "; +" encodeXMLComponent "; +" encrypt "; +" END "; +" endsWith "; +" ENGINE "; +" entropy "; +" Enum "; +" ENUM "; +" Enum16 "; +" Enum8 "; +" equals "; +" erf "; +" erfc "; +" errorCodeToName "; +" evalMLMethod "; +" EVENTS "; +" EXCHANGE TABLES "; +" EXISTS "; +" exp "; +" exp10 "; +" exp2 "; +" EXPLAIN "; +" exponentialMovingAverage "; +" EXPRESSION "; +" extract "; +" EXTRACT "; +" extractAll "; +" extractAllGroups "; +" extractAllGroupsHorizontal "; +" extractAllGroupsVertical "; +" extractGroups "; +" extractTextFromHTML "; +" extractURLParameter "; +" extractURLParameterNames "; +" extractURLParameters "; +" farmFingerprint64 "; +" farmHash64 "; +" FETCHES "; +" FETCH PART "; +" FETCH PARTITION "; +" file "; +" filesystemAvailable "; +" filesystemCapacity "; +" filesystemFree "; +" FINAL "; +" finalizeAggregation "; +" FIRST "; +" firstSignificantSubdomain "; +" firstSignificantSubdomainCustom "; +" first_value "; +" FIXED "; +" FixedString "; +" flatten "; +" FLOAT "; +" Float32 "; +" Float64 "; +" floor "; +" FLUSH "; +" FOR "; +" ForEach "; +" format "; +" FORMAT "; +" formatDateTime "; +" formatReadableQuantity "; +" formatReadableDecimalSize "; +" formatReadableSize "; +" formatReadableTimeDelta "; +" formatRow "; +" formatRowNoNewline "; +" FQDN "; +" fragment "; +" FREEZE "; +" FROM "; +" FROM_BASE64 "; +" fromModifiedJulianDay "; +" fromModifiedJulianDayOrNull "; +" FROM_UNIXTIME "; +" fromUnixTimestamp "; +" fromUnixTimestamp64Micro "; +" fromUnixTimestamp64Milli "; +" fromUnixTimestamp64Nano "; +" FULL "; +" fullHostName "; +" FUNCTION "; +" fuzzBits "; +" gccMurmurHash "; +" gcd "; +" generateUUIDv4 "; +" geoDistance "; +" geohashDecode "; +" geohashEncode "; +" geohashesInBox "; +" geoToH3 "; +" geoToS2 "; +" getMacro "; +" __getScalar "; +" getServerPort "; +" getSetting "; +" getSizeOfEnumType "; +" GLOBAL "; +" globalIn "; +" globalInIgnoreSet "; +" globalNotIn "; +" globalNotInIgnoreSet "; +" globalNotNullIn "; +" globalNotNullInIgnoreSet "; +" globalNullIn "; +" globalNullInIgnoreSet "; +" globalVariable "; +" GRANULARITY "; +" greatCircleAngle "; +" greatCircleDistance "; +" greater "; +" greaterOrEquals "; +" greatest "; +" GROUP "; +" groupArray "; +" groupArrayInsertAt "; +" groupArrayMovingAvg "; +" groupArrayMovingSum "; +" groupArraySample "; +" groupBitAnd "; +" groupBitmap "; +" groupBitmapAnd "; +" groupBitmapOr "; +" groupBitmapXor "; +" groupBitOr "; +" groupBitXor "; +" GROUP BY "; +" groupUniqArray "; +" h3EdgeAngle "; +" h3EdgeLengthM "; +" h3GetBaseCell "; +" h3GetFaces "; +" h3GetResolution "; +" h3HexAreaM2 "; +" h3IndexesAreNeighbors "; +" h3IsPentagon "; +" h3IsResClassIII "; +" h3IsValid "; +" h3kRing "; +" h3ToChildren "; +" h3ToGeo "; +" h3ToGeoBoundary "; +" h3ToParent "; +" h3ToString "; +" halfMD5 "; +" has "; +" hasAll "; +" hasAny "; +" hasColumnInTable "; +" hasSubstr "; +" hasThreadFuzzer "; +" hasToken "; +" hasTokenCaseInsensitive "; +" HAVING "; +" hex "; +" HH "; +" HIERARCHICAL "; +" histogram "; +" hiveHash "; +" hostname "; +" hostName "; +" HOUR "; +" hypot "; +" ID "; +" identity "; +" if "; +" IF "; +" IF EXISTS "; +" IF NOT EXISTS "; +" ifNotFinite "; +" ifNull "; +" ignore "; +" ilike "; +" ILIKE "; +" in "; +" IN "; +" INDEX "; +" indexHint "; +" indexOf "; +" INET4 "; +" INET6 "; +" INET6_ATON "; +" INET6_NTOA "; +" INET_ATON "; +" INET_NTOA "; +" INF "; +" inIgnoreSet "; +" initializeAggregation "; +" initial_query_id "; +" initialQueryID "; +" INJECTIVE "; +" INNER "; +" IN PARTITION "; +" INSERT "; +" INSERT INTO "; +" INT "; +" INT1 "; +" Int128 "; +" Int16 "; +" INT1 SIGNED "; +" INT1 UNSIGNED "; +" Int256 "; +" Int32 "; +" Int64 "; +" Int8 "; +" intDiv "; +" intDivOrZero "; +" INTEGER "; +" INTEGER SIGNED "; +" INTEGER UNSIGNED "; +" INTERVAL "; +" IntervalDay "; +" IntervalHour "; +" intervalLengthSum "; +" IntervalMinute "; +" IntervalMonth "; +" IntervalQuarter "; +" IntervalSecond "; +" IntervalWeek "; +" IntervalYear "; +" intExp10 "; +" intExp2 "; +" intHash32 "; +" intHash64 "; +" INTO "; +" INTO OUTFILE "; +" INT SIGNED "; +" INT UNSIGNED "; +" IPv4 "; +" IPv4CIDRToRange "; +" IPv4NumToString "; +" IPv4NumToStringClassC "; +" IPv4StringToNum "; +" IPv4ToIPv6 "; +" IPv6 "; +" IPv6CIDRToRange "; +" IPv6NumToString "; +" IPv6StringToNum "; +" IS "; +" isConstant "; +" isDecimalOverflow "; +" isFinite "; +" isInfinite "; +" isIPAddressInRange "; +" isIPv4String "; +" isIPv6String "; +" isNaN "; +" isNotNull "; +" isNull "; +" IS_OBJECT_ID "; +" isValidJSON "; +" isValidUTF8 "; +" isZeroOrNull "; +" javaHash "; +" javaHashUTF16LE "; +" JOIN "; +" joinGet "; +" joinGetOrNull "; +" JSON_EXISTS "; +" JSONExtract "; +" JSONExtractArrayRaw "; +" JSONExtractBool "; +" JSONExtractFloat "; +" JSONExtractInt "; +" JSONExtractKeysAndValues "; +" JSONExtractKeysAndValuesRaw "; +" JSONExtractKeys "; +" JSONExtractRaw "; +" JSONExtractString "; +" JSONExtractUInt "; +" JSONHas "; +" JSONKey "; +" JSONLength "; +" JSON_QUERY "; +" JSONType "; +" JSON_VALUE "; +" jumpConsistentHash "; +" KEY "; +" KILL "; +" kurtPop "; +" kurtSamp "; +" lagInFrame "; +" LAST "; +" last_value "; +" LAYOUT "; +" lcase "; +" lcm "; +" leadInFrame "; +" LEADING "; +" least "; +" LEFT "; +" LEFT ARRAY JOIN "; +" leftPad "; +" leftPadUTF8 "; +" lemmatize "; +" length "; +" lengthUTF8 "; +" less "; +" lessOrEquals "; +" lgamma "; +" LIFETIME "; +" like "; +" LIKE "; +" LIMIT "; +" LIVE "; +" ln "; +" LOCAL "; +" locate "; +" log "; +" log10 "; +" log1p "; +" log2 "; +" LOGS "; +" logTrace "; +" LONGBLOB "; +" LONGTEXT "; +" LowCardinality "; +" lowCardinalityIndices "; +" lowCardinalityKeys "; +" lower "; +" lowerUTF8 "; +" lpad "; +" LTRIM "; +" M "; +" MACNumToString "; +" MACStringToNum "; +" MACStringToOUI "; +" mannWhitneyUTest "; +" map "; +" Map "; +" mapAdd "; +" mapContains "; +" mapKeys "; +" mapPopulateSeries "; +" mapSubtract "; +" mapValues "; +" match "; +" materialize "; +" MATERIALIZE "; +" MATERIALIZED "; +" MATERIALIZE INDEX "; +" MATERIALIZE TTL "; +" max "; +" MAX "; +" maxIntersections "; +" maxIntersectionsPosition "; +" maxMap "; +" MD4 "; +" MD5 "; +" median "; +" medianBFloat16 "; +" medianBFloat16Weighted "; +" medianDeterministic "; +" medianExact "; +" medianExactHigh "; +" medianExactLow "; +" medianExactWeighted "; +" medianTDigest "; +" medianTDigestWeighted "; +" medianTiming "; +" medianTimingWeighted "; +" MEDIUMBLOB "; +" MEDIUMINT "; +" MEDIUMINT SIGNED "; +" MEDIUMINT UNSIGNED "; +" MEDIUMTEXT "; +" Merge "; +" MERGES "; +" metroHash64 "; +" MI "; +" mid "; +" min "; +" MIN "; +" minMap "; +" minus "; +" MINUTE "; +" MM "; +" mod "; +" MODIFY "; +" MODIFY COLUMN "; +" MODIFY ORDER BY "; +" MODIFY QUERY "; +" MODIFY SETTING "; +" MODIFY TTL "; +" modulo "; +" moduloLegacy "; +" moduloOrZero "; +" MONTH "; +" MOVE "; +" MOVE PART "; +" MOVE PARTITION "; +" movingXXX "; +" multiFuzzyMatchAllIndices "; +" multiFuzzyMatchAny "; +" multiFuzzyMatchAnyIndex "; +" multiIf "; +" multiMatchAllIndices "; +" multiMatchAny "; +" multiMatchAnyIndex "; +" multiply "; +" MultiPolygon "; +" multiSearchAllPositions "; +" multiSearchAllPositionsCaseInsensitive "; +" multiSearchAllPositionsCaseInsensitiveUTF8 "; +" multiSearchAllPositionsUTF8 "; +" multiSearchAny "; +" multiSearchAnyCaseInsensitive "; +" multiSearchAnyCaseInsensitiveUTF8 "; +" multiSearchAnyUTF8 "; +" multiSearchFirstIndex "; +" multiSearchFirstIndexCaseInsensitive "; +" multiSearchFirstIndexCaseInsensitiveUTF8 "; +" multiSearchFirstIndexUTF8 "; +" multiSearchFirstPosition "; +" multiSearchFirstPositionCaseInsensitive "; +" multiSearchFirstPositionCaseInsensitiveUTF8 "; +" multiSearchFirstPositionUTF8 "; +" murmurHash2_32 "; +" murmurHash2_64 "; +" murmurHash3_128 "; +" murmurHash3_32 "; +" murmurHash3_64 "; +" MUTATION "; +" N "; +" NAME "; +" NAN_SQL "; +" NATIONAL CHAR "; +" NATIONAL CHARACTER "; +" NATIONAL CHARACTER LARGE OBJECT "; +" NATIONAL CHARACTER VARYING "; +" NATIONAL CHAR VARYING "; +" NCHAR "; +" NCHAR LARGE OBJECT "; +" NCHAR VARYING "; +" negate "; +" neighbor "; +" Nested "; +" netloc "; +" ngramDistance "; +" ngramDistanceCaseInsensitive "; +" ngramDistanceCaseInsensitiveUTF8 "; +" ngramDistanceUTF8 "; +" ngramMinHash "; +" ngramMinHashArg "; +" ngramMinHashArgCaseInsensitive "; +" ngramMinHashArgCaseInsensitiveUTF8 "; +" ngramMinHashArgUTF8 "; +" ngramMinHashCaseInsensitive "; +" ngramMinHashCaseInsensitiveUTF8 "; +" ngramMinHashUTF8 "; +" ngramSearch "; +" ngramSearchCaseInsensitive "; +" ngramSearchCaseInsensitiveUTF8 "; +" ngramSearchUTF8 "; +" ngramSimHash "; +" ngramSimHashCaseInsensitive "; +" ngramSimHashCaseInsensitiveUTF8 "; +" ngramSimHashUTF8 "; +" NO "; +" NO DELAY "; +" NONE "; +" normalizedQueryHash "; +" normalizedQueryHashKeepNames "; +" normalizeQuery "; +" normalizeQueryKeepNames "; +" not "; +" NOT "; +" notEmpty "; +" notEquals "; +" nothing "; +" Nothing "; +" notILike "; +" notIn "; +" notInIgnoreSet "; +" notLike "; +" notNullIn "; +" notNullInIgnoreSet "; +" now "; +" now64 "; +" Null "; +" Nullable "; +" nullIf "; +" nullIn "; +" nullInIgnoreSet "; +" NULLS "; +" NULL_SQL "; +" NUMERIC "; +" NVARCHAR "; +" OFFSET "; +" ON "; +" ONLY "; +" OPTIMIZE "; +" OPTIMIZE TABLE "; +" or "; +" OR "; +" ORDER "; +" ORDER BY "; +" OR REPLACE "; +" OUTER "; +" OUTFILE "; +" parseDateTime32BestEffort "; +" parseDateTime32BestEffortOrNull "; +" parseDateTime32BestEffortOrZero "; +" parseDateTime64BestEffort "; +" parseDateTime64BestEffortOrNull "; +" parseDateTime64BestEffortOrZero "; +" parseDateTimeBestEffort "; +" parseDateTimeBestEffortOrNull "; +" parseDateTimeBestEffortOrZero "; +" parseDateTimeBestEffortUS "; +" parseDateTimeBestEffortUSOrNull "; +" parseDateTimeBestEffortUSOrZero "; +" parseTimeDelta "; +" PARTITION "; +" PARTITION BY "; +" partitionId "; +" path "; +" pathFull "; +" pi "; +" plus "; +" Point "; +" pointInEllipses "; +" pointInPolygon "; +" Polygon "; +" polygonAreaCartesian "; +" polygonAreaSpherical "; +" polygonConvexHullCartesian "; +" polygonPerimeterCartesian "; +" polygonPerimeterSpherical "; +" polygonsDistanceCartesian "; +" polygonsDistanceSpherical "; +" polygonsEqualsCartesian "; +" polygonsIntersectionCartesian "; +" polygonsIntersectionSpherical "; +" polygonsSymDifferenceCartesian "; +" polygonsSymDifferenceSpherical "; +" polygonsUnionCartesian "; +" polygonsUnionSpherical "; +" polygonsWithinCartesian "; +" polygonsWithinSpherical "; +" POPULATE "; +" port "; +" position "; +" positionCaseInsensitive "; +" positionCaseInsensitiveUTF8 "; +" positionUTF8 "; +" pow "; +" power "; +" PREWHERE "; +" PRIMARY "; +" PRIMARY KEY "; +" PROJECTION "; +" protocol "; +" Q "; +" QQ "; +" quantile "; +" quantileBFloat16 "; +" quantileBFloat16Weighted "; +" quantileDeterministic "; +" quantileExact "; +" quantileExactExclusive "; +" quantileExactHigh "; +" quantileExactInclusive "; +" quantileExactLow "; +" quantileExactWeighted "; +" quantiles "; +" quantilesBFloat16 "; +" quantilesBFloat16Weighted "; +" quantilesDeterministic "; +" quantilesExact "; +" quantilesExactExclusive "; +" quantilesExactHigh "; +" quantilesExactInclusive "; +" quantilesExactLow "; +" quantilesExactWeighted "; +" quantilesTDigest "; +" quantilesTDigestWeighted "; +" quantilesTiming "; +" quantilesTimingWeighted "; +" quantileTDigest "; +" quantileTDigestWeighted "; +" quantileTiming "; +" quantileTimingWeighted "; +" QUARTER "; +" query_id "; +" queryID "; +" queryString "; +" queryStringAndFragment "; +" rand "; +" rand32 "; +" rand64 "; +" randConstant "; +" randomFixedString "; +" randomPrintableASCII "; +" randomString "; +" randomStringUTF8 "; +" range "; +" RANGE "; +" rank "; +" rankCorr "; +" readWKTMultiPolygon "; +" readWKTPoint "; +" readWKTPolygon "; +" readWKTRing "; +" REAL "; +" REFRESH "; +" regexpQuoteMeta "; +" regionHierarchy "; +" regionIn "; +" regionToArea "; +" regionToCity "; +" regionToContinent "; +" regionToCountry "; +" regionToDistrict "; +" regionToName "; +" regionToPopulation "; +" regionToTopContinent "; +" reinterpret "; +" reinterpretAsDate "; +" reinterpretAsDateTime "; +" reinterpretAsFixedString "; +" reinterpretAsFloat32 "; +" reinterpretAsFloat64 "; +" reinterpretAsInt128 "; +" reinterpretAsInt16 "; +" reinterpretAsInt256 "; +" reinterpretAsInt32 "; +" reinterpretAsInt64 "; +" reinterpretAsInt8 "; +" reinterpretAsString "; +" reinterpretAsUInt128 "; +" reinterpretAsUInt16 "; +" reinterpretAsUInt256 "; +" reinterpretAsUInt32 "; +" reinterpretAsUInt64 "; +" reinterpretAsUInt8 "; +" reinterpretAsUUID "; +" RELOAD "; +" REMOVE "; +" RENAME "; +" RENAME COLUMN "; +" RENAME TABLE "; +" repeat "; +" replace "; +" REPLACE "; +" replaceAll "; +" replaceOne "; +" REPLACE PARTITION "; +" replaceRegexpAll "; +" replaceRegexpOne "; +" REPLICA "; +" replicate "; +" REPLICATED "; +" Resample "; +" RESUME "; +" retention "; +" reverse "; +" reverseUTF8 "; +" RIGHT "; +" rightPad "; +" rightPadUTF8 "; +" Ring "; +" ROLLUP "; +" round "; +" roundAge "; +" roundBankers "; +" roundDown "; +" roundDuration "; +" roundToExp2 "; +" row_number "; +" rowNumberInAllBlocks "; +" rowNumberInBlock "; +" rpad "; +" RTRIM "; +" runningAccumulate "; +" runningConcurrency "; +" runningDifference "; +" runningDifferenceStartingWithFirstValue "; +" S "; +" s2CapContains "; +" s2CapUnion "; +" s2CellsIntersect "; +" s2GetNeighbors "; +" s2RectAdd "; +" s2RectContains "; +" s2RectIntersection "; +" s2RectUnion "; +" s2ToGeo "; +" SAMPLE "; +" SAMPLE BY "; +" SECOND "; +" SELECT "; +" SEMI "; +" SENDS "; +" sequenceCount "; +" sequenceMatch "; +" sequenceNextNode "; +" serverUUID "; +" SET "; +" SETTINGS "; +" SHA1 "; +" SHA224 "; +" SHA256 "; +" SHA384 "; +" SHA512 "; +" shardCount "; +" shardNum "; +" SHOW "; +" SHOW PROCESSLIST "; +" sigmoid "; +" sign "; +" SimpleAggregateFunction "; +" simpleJSONExtractBool "; +" simpleJSONExtractFloat "; +" simpleJSONExtractInt "; +" simpleJSONExtractRaw "; +" simpleJSONExtractString "; +" simpleJSONExtractUInt "; +" simpleJSONHas "; +" simpleLinearRegression "; +" sin "; +" SINGLE "; +" singleValueOrNull "; +" sinh "; +" sipHash128 "; +" sipHash64 "; +" skewPop "; +" skewSamp "; +" sleep "; +" sleepEachRow "; +" SMALLINT "; +" SMALLINT SIGNED "; +" SMALLINT UNSIGNED "; +" snowflakeToDateTime "; +" snowflakeToDateTime64 "; +" SOURCE "; +" sparkbar "; +" splitByChar "; +" splitByNonAlpha "; +" splitByRegexp "; +" splitByString "; +" splitByWhitespace "; +" SQL_TSI_DAY "; +" SQL_TSI_HOUR "; +" SQL_TSI_MINUTE "; +" SQL_TSI_MONTH "; +" SQL_TSI_QUARTER "; +" SQL_TSI_SECOND "; +" SQL_TSI_WEEK "; +" SQL_TSI_YEAR "; +" sqrt "; +" SS "; +" START "; +" startsWith "; +" State "; +" stddevPop "; +" STDDEV_POP "; +" stddevPopStable "; +" stddevSamp "; +" STDDEV_SAMP "; +" stddevSampStable "; +" stem "; +" STEP "; +" stochasticLinearRegression "; +" stochasticLogisticRegression "; +" STOP "; +" String "; +" stringToH3 "; +" studentTTest "; +" subBitmap "; +" substr "; +" substring "; +" SUBSTRING "; +" substringUTF8 "; +" subtractDays "; +" subtractHours "; +" subtractMinutes "; +" subtractMonths "; +" subtractQuarters "; +" subtractSeconds "; +" subtractWeeks "; +" subtractYears "; +" sum "; +" sumCount "; +" sumKahan "; +" sumMap "; +" sumMapFiltered "; +" sumMapFilteredWithOverflow "; +" sumMapWithOverflow "; +" sumWithOverflow "; +" SUSPEND "; +" svg "; +" SVG "; +" SYNC "; +" synonyms "; +" SYNTAX "; +" SYSTEM "; +" TABLE "; +" TABLES "; +" tan "; +" tanh "; +" tcpPort "; +" TEMPORARY "; +" TEST "; +" TEXT "; +" tgamma "; +" THEN "; +" throwIf "; +" tid "; +" TIES "; +" TIMEOUT "; +" timeSlot "; +" timeSlots "; +" TIMESTAMP "; +" TIMESTAMP_ADD "; +" TIMESTAMPADD "; +" TIMESTAMP_DIFF "; +" TIMESTAMPDIFF "; +" TIMESTAMP_SUB "; +" TIMESTAMPSUB "; +" timezone "; +" timeZone "; +" timezoneOf "; +" timeZoneOf "; +" timezoneOffset "; +" timeZoneOffset "; +" TINYBLOB "; +" TINYINT "; +" TINYINT SIGNED "; +" TINYINT UNSIGNED "; +" TINYTEXT "; +" TO "; +" TO_BASE64 "; +" toColumnTypeName "; +" toDate "; +" toDate32 "; +" toDate32OrNull "; +" toDate32OrZero "; +" toDateOrNull "; +" toDateOrZero "; +" toDateTime "; +" toDateTime32 "; +" toDateTime64 "; +" toDateTime64OrNull "; +" toDateTime64OrZero "; +" toDateTimeOrNull "; +" toDateTimeOrZero "; +" today "; +" toDayOfMonth "; +" toDayOfWeek "; +" toDayOfYear "; +" toDecimal128 "; +" toDecimal128OrNull "; +" toDecimal128OrZero "; +" toDecimal256 "; +" toDecimal256OrNull "; +" toDecimal256OrZero "; +" toDecimal32 "; +" toDecimal32OrNull "; +" toDecimal32OrZero "; +" toDecimal64 "; +" toDecimal64OrNull "; +" toDecimal64OrZero "; +" TO DISK "; +" toFixedString "; +" toFloat32 "; +" toFloat32OrNull "; +" toFloat32OrZero "; +" toFloat64 "; +" toFloat64OrNull "; +" toFloat64OrZero "; +" toHour "; +" toInt128 "; +" toInt128OrNull "; +" toInt128OrZero "; +" toInt16 "; +" toInt16OrNull "; +" toInt16OrZero "; +" toInt256 "; +" toInt256OrNull "; +" toInt256OrZero "; +" toInt32 "; +" toInt32OrNull "; +" toInt32OrZero "; +" toInt64 "; +" toInt64OrNull "; +" toInt64OrZero "; +" toInt8 "; +" toInt8OrNull "; +" toInt8OrZero "; +" toIntervalDay "; +" toIntervalHour "; +" toIntervalMinute "; +" toIntervalMonth "; +" toIntervalQuarter "; +" toIntervalSecond "; +" toIntervalWeek "; +" toIntervalYear "; +" toIPv4 "; +" toIPv6 "; +" toISOWeek "; +" toISOYear "; +" toJSONString "; +" toLowCardinality "; +" toMinute "; +" toModifiedJulianDay "; +" toModifiedJulianDayOrNull "; +" toMonday "; +" toMonth "; +" toNullable "; +" TOP "; +" topK "; +" topKWeighted "; +" topLevelDomain "; +" toQuarter "; +" toRelativeDayNum "; +" toRelativeHourNum "; +" toRelativeMinuteNum "; +" toRelativeMonthNum "; +" toRelativeQuarterNum "; +" toRelativeSecondNum "; +" toRelativeWeekNum "; +" toRelativeYearNum "; +" toSecond "; +" toStartOfDay "; +" toStartOfFifteenMinutes "; +" toStartOfFiveMinutes "; +" toStartOfHour "; +" toStartOfInterval "; +" toStartOfISOYear "; +" toStartOfMinute "; +" toStartOfMonth "; +" toStartOfQuarter "; +" toStartOfSecond "; +" toStartOfTenMinutes "; +" toStartOfWeek "; +" toStartOfYear "; +" toString "; +" toStringCutToZero "; +" TO TABLE "; +" TOTALS "; +" toTime "; +" toTimezone "; +" toTimeZone "; +" toTypeName "; +" toUInt128 "; +" toUInt128OrNull "; +" toUInt128OrZero "; +" toUInt16 "; +" toUInt16OrNull "; +" toUInt16OrZero "; +" toUInt256 "; +" toUInt256OrNull "; +" toUInt256OrZero "; +" toUInt32 "; +" toUInt32OrNull "; +" toUInt32OrZero "; +" toUInt64 "; +" toUInt64OrNull "; +" toUInt64OrZero "; +" toUInt8 "; +" toUInt8OrNull "; +" toUInt8OrZero "; +" toUnixTimestamp "; +" toUnixTimestamp64Micro "; +" toUnixTimestamp64Milli "; +" toUnixTimestamp64Nano "; +" toUUID "; +" toUUIDOrNull "; +" toUUIDOrZero "; +" toValidUTF8 "; +" TO VOLUME "; +" toWeek "; +" toYear "; +" toYearWeek "; +" toYYYYMM "; +" toYYYYMMDD "; +" toYYYYMMDDhhmmss "; +" TRAILING "; +" transform "; +" TRIM "; +" trimBoth "; +" trimLeft "; +" trimRight "; +" trunc "; +" truncate "; +" TRUNCATE "; +" tryBase64Decode "; +" TTL "; +" tuple "; +" Tuple "; +" tupleElement "; +" tupleHammingDistance "; +" tupleToNameValuePairs "; +" TYPE "; +" ucase "; +" UInt128 "; +" UInt16 "; +" UInt256 "; +" UInt32 "; +" UInt64 "; +" UInt8 "; +" unbin "; +" unhex "; +" UNION "; +" uniq "; +" uniqCombined "; +" uniqCombined64 "; +" uniqExact "; +" uniqHLL12 "; +" uniqTheta "; +" uniqUpTo "; +" UPDATE "; +" upper "; +" upperUTF8 "; +" uptime "; +" URLHash "; +" URLHierarchy "; +" URLPathHierarchy "; +" USE "; +" user "; +" USING "; +" UUID "; +" UUIDNumToString "; +" UUIDStringToNum "; +" validateNestedArraySizes "; +" VALUES "; +" VARCHAR "; +" VARCHAR2 "; +" varPop "; +" VAR_POP "; +" varPopStable "; +" varSamp "; +" VAR_SAMP "; +" varSampStable "; +" version "; +" VIEW "; +" visibleWidth "; +" visitParamExtractBool "; +" visitParamExtractFloat "; +" visitParamExtractInt "; +" visitParamExtractRaw "; +" visitParamExtractString "; +" visitParamExtractUInt "; +" visitParamHas "; +" VOLUME "; +" WATCH "; +" week "; +" WEEK "; +" welchTTest "; +" WHEN "; +" WHERE "; +" windowFunnel "; +" WITH "; +" WITH FILL "; +" WITH TIES "; +" WK "; +" wkt "; +" wordShingleMinHash "; +" wordShingleMinHashArg "; +" wordShingleMinHashArgCaseInsensitive "; +" wordShingleMinHashArgCaseInsensitiveUTF8 "; +" wordShingleMinHashArgUTF8 "; +" wordShingleMinHashCaseInsensitive "; +" wordShingleMinHashCaseInsensitiveUTF8 "; +" wordShingleMinHashUTF8 "; +" wordShingleSimHash "; +" wordShingleSimHashCaseInsensitive "; +" wordShingleSimHashCaseInsensitiveUTF8 "; +" wordShingleSimHashUTF8 "; +" WW "; +" xor "; +" xxHash32 "; +" xxHash64 "; +" kostikConsistentHash "; +" YEAR "; +" yearweek "; +" yesterday "; +" YY "; +" YYYY "; +" zookeeperSessionUptime "; diff --git a/src/Parsers/fuzzers/codegen_fuzzer/out.cpp b/src/Parsers/fuzzers/codegen_fuzzer/out.cpp new file mode 100644 index 00000000000..29168751d71 --- /dev/null +++ b/src/Parsers/fuzzers/codegen_fuzzer/out.cpp @@ -0,0 +1,6461 @@ +#include +#include +#include + +#include + +#include "out.pb.h" + +void GenerateWord(const Word&, std::string&, int); + +void GenerateSentence(const Sentence& stc, std::string &s, int depth) { + for (int i = 0; i < stc.words_size(); i++ ) { + GenerateWord(stc.words(i), s, ++depth); + } +} +void GenerateWord(const Word& word, std::string &s, int depth) { + if (depth > 5) return; + + switch (word.value()) { + case 0: { + s += " "; + break; + } + case 1: { + s += " "; + break; + } + case 2: { + s += " "; + break; + } + case 3: { + s += ";"; + break; + } + case 4: { + s += "("; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ")"; + break; + } + case 5: { + s += "("; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ")"; + break; + } + case 6: { + s += "("; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ")"; + break; + } + case 7: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + break; + } + case 8: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 9: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ", "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + break; + } + case 10: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ", "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + s += ", "; + if (word.inner().words_size() > 4) GenerateWord(word.inner().words(4), s, ++depth); + break; + } + case 11: { + s += "["; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += "]"; + break; + } + case 12: { + s += "["; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += "]"; + break; + } + case 13: { + s += "["; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ", "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + s += "]"; + break; + } + case 14: { + s += "["; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ", "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + s += ", "; + if (word.inner().words_size() > 4) GenerateWord(word.inner().words(4), s, ++depth); + s += "]"; + break; + } + case 15: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += "("; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ")"; + break; + } + case 16: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += "("; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ")"; + break; + } + case 17: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += "("; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ", "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + s += ")"; + break; + } + case 18: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " as "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + break; + } + case 19: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " WHERE "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 20: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " GROUP BY "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 21: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " SORT BY "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 22: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " LIMIT "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 23: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " JOIN "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 24: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " ARRAY JOIN "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 25: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " JOIN "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += " ON "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + break; + } + case 26: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " JOIN "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += " USING "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + break; + } + case 27: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " INTO OUTFILE "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + break; + } + case 28: { + s += "WITH "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " AS "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + break; + } + case 29: { + s += "{"; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ":"; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += "}"; + break; + } + case 30: { + s += "["; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ","; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += "]"; + break; + } + case 31: { + s += "[]"; + break; + } + case 32: { + s += " x "; + break; + } + case 33: { + s += "x"; + break; + } + case 34: { + s += " `x` "; + break; + } + case 35: { + s += "`x`"; + break; + } + case 36: { + s += " \"value\" "; + break; + } + case 37: { + s += "\"value\""; + break; + } + case 38: { + s += " 0 "; + break; + } + case 39: { + s += "0"; + break; + } + case 40: { + s += "1"; + break; + } + case 41: { + s += "2"; + break; + } + case 42: { + s += "123123123123123123"; + break; + } + case 43: { + s += "182374019873401982734091873420923123123123123123"; + break; + } + case 44: { + s += "1e-1"; + break; + } + case 45: { + s += "1.1"; + break; + } + case 46: { + s += "\"\""; + break; + } + case 47: { + s += " '../../../../../../../../../etc/passwd' "; + break; + } + case 48: { + s += "/"; + break; + } + case 49: { + s += "="; + break; + } + case 50: { + s += "=="; + break; + } + case 51: { + s += "!="; + break; + } + case 52: { + s += "<>"; + break; + } + case 53: { + s += "<"; + break; + } + case 54: { + s += "<="; + break; + } + case 55: { + s += ">"; + break; + } + case 56: { + s += ">="; + break; + } + case 57: { + s += "<<"; + break; + } + case 58: { + s += "|<<"; + break; + } + case 59: { + s += "&"; + break; + } + case 60: { + s += "|"; + break; + } + case 61: { + s += "||"; + break; + } + case 62: { + s += "<|"; + break; + } + case 63: { + s += "|>"; + break; + } + case 64: { + s += "+"; + break; + } + case 65: { + s += "-"; + break; + } + case 66: { + s += "~"; + break; + } + case 67: { + s += "*"; + break; + } + case 68: { + s += "/"; + break; + } + case 69: { + s += "\\"; + break; + } + case 70: { + s += "%"; + break; + } + case 71: { + s += ""; + break; + } + case 72: { + s += "."; + break; + } + case 73: { + s += ","; + break; + } + case 74: { + s += ","; + break; + } + case 75: { + s += ","; + break; + } + case 76: { + s += ","; + break; + } + case 77: { + s += ","; + break; + } + case 78: { + s += ","; + break; + } + case 79: { + s += "("; + break; + } + case 80: { + s += ")"; + break; + } + case 81: { + s += "("; + break; + } + case 82: { + s += ")"; + break; + } + case 83: { + s += "("; + break; + } + case 84: { + s += ")"; + break; + } + case 85: { + s += "("; + break; + } + case 86: { + s += ")"; + break; + } + case 87: { + s += "("; + break; + } + case 88: { + s += ")"; + break; + } + case 89: { + s += "("; + break; + } + case 90: { + s += ")"; + break; + } + case 91: { + s += "?"; + break; + } + case 92: { + s += ":"; + break; + } + case 93: { + s += "@"; + break; + } + case 94: { + s += "@@"; + break; + } + case 95: { + s += "$"; + break; + } + case 96: { + s += "\""; + break; + } + case 97: { + s += "`"; + break; + } + case 98: { + s += "{"; + break; + } + case 99: { + s += "}"; + break; + } + case 100: { + s += "^"; + break; + } + case 101: { + s += "::"; + break; + } + case 102: { + s += "->"; + break; + } + case 103: { + s += "]"; + break; + } + case 104: { + s += "["; + break; + } + case 105: { + s += " abs "; + break; + } + case 106: { + s += " accurate_Cast "; + break; + } + case 107: { + s += " accurateCast "; + break; + } + case 108: { + s += " accurate_CastOrNull "; + break; + } + case 109: { + s += " accurateCastOrNull "; + break; + } + case 110: { + s += " acos "; + break; + } + case 111: { + s += " acosh "; + break; + } + case 112: { + s += " ADD "; + break; + } + case 113: { + s += " ADD COLUMN "; + break; + } + case 114: { + s += " ADD CONSTRAINT "; + break; + } + case 115: { + s += " addDays "; + break; + } + case 116: { + s += " addHours "; + break; + } + case 117: { + s += " ADD INDEX "; + break; + } + case 118: { + s += " addMinutes "; + break; + } + case 119: { + s += " addMonths "; + break; + } + case 120: { + s += " addQuarters "; + break; + } + case 121: { + s += " addressToLine "; + break; + } + case 122: { + s += " addressToSymbol "; + break; + } + case 123: { + s += " addSeconds "; + break; + } + case 124: { + s += " addWeeks "; + break; + } + case 125: { + s += " addYears "; + break; + } + case 126: { + s += " aes_decrypt_mysql "; + break; + } + case 127: { + s += " aes_encrypt_mysql "; + break; + } + case 128: { + s += " AFTER "; + break; + } + case 129: { + s += " AggregateFunction "; + break; + } + case 130: { + s += " aggThrow "; + break; + } + case 131: { + s += " ALIAS "; + break; + } + case 132: { + s += " ALL "; + break; + } + case 133: { + s += " alphaTokens "; + break; + } + case 134: { + s += " ALTER "; + break; + } + case 135: { + s += " ALTER LIVE VIEW "; + break; + } + case 136: { + s += " ALTER TABLE "; + break; + } + case 137: { + s += " and "; + break; + } + case 138: { + s += " AND "; + break; + } + case 139: { + s += " ANTI "; + break; + } + case 140: { + s += " any "; + break; + } + case 141: { + s += " ANY "; + break; + } + case 142: { + s += " anyHeavy "; + break; + } + case 143: { + s += " anyLast "; + break; + } + case 144: { + s += " appendTrailingCharIfAbsent "; + break; + } + case 145: { + s += " argMax "; + break; + } + case 146: { + s += " argMin "; + break; + } + case 147: { + s += " array "; + break; + } + case 148: { + s += " Array "; + break; + } + case 149: { + s += " ARRAY "; + break; + } + case 150: { + s += " arrayAll "; + break; + } + case 151: { + s += " arrayAUC "; + break; + } + case 152: { + s += " arrayAvg "; + break; + } + case 153: { + s += " arrayCompact "; + break; + } + case 154: { + s += " arrayConcat "; + break; + } + case 155: { + s += " arrayCount "; + break; + } + case 156: { + s += " arrayCumSum "; + break; + } + case 157: { + s += " arrayCumSumNonNegative "; + break; + } + case 158: { + s += " arrayDifference "; + break; + } + case 159: { + s += " arrayDistinct "; + break; + } + case 160: { + s += " arrayElement "; + break; + } + case 161: { + s += " arrayEnumerate "; + break; + } + case 162: { + s += " arrayEnumerateDense "; + break; + } + case 163: { + s += " arrayEnumerateDenseRanked "; + break; + } + case 164: { + s += " arrayEnumerateUniq "; + break; + } + case 165: { + s += " arrayEnumerateUniqRanked "; + break; + } + case 166: { + s += " arrayExists "; + break; + } + case 167: { + s += " arrayFill "; + break; + } + case 168: { + s += " arrayFilter "; + break; + } + case 169: { + s += " arrayFirst "; + break; + } + case 170: { + s += " arrayFirstIndex "; + break; + } + case 171: { + s += " arrayFlatten "; + break; + } + case 172: { + s += " arrayIntersect "; + break; + } + case 173: { + s += " arrayJoin "; + break; + } + case 174: { + s += " ARRAY JOIN "; + break; + } + case 175: { + s += " arrayMap "; + break; + } + case 176: { + s += " arrayMax "; + break; + } + case 177: { + s += " arrayMin "; + break; + } + case 178: { + s += " arrayPartialReverseSort "; + break; + } + case 179: { + s += " arrayPartialShuffle "; + break; + } + case 180: { + s += " arrayPartialSort "; + break; + } + case 181: { + s += " arrayPopBack "; + break; + } + case 182: { + s += " arrayPopFront "; + break; + } + case 183: { + s += " arrayProduct "; + break; + } + case 184: { + s += " arrayPushBack "; + break; + } + case 185: { + s += " arrayPushFront "; + break; + } + case 186: { + s += " arrayReduce "; + break; + } + case 187: { + s += " arrayReduceInRanges "; + break; + } + case 188: { + s += " arrayResize "; + break; + } + case 189: { + s += " arrayReverse "; + break; + } + case 190: { + s += " arrayReverseFill "; + break; + } + case 191: { + s += " arrayReverseSort "; + break; + } + case 192: { + s += " arrayReverseSplit "; + break; + } + case 193: { + s += " arrayShuffle "; + break; + } + case 194: { + s += " arraySlice "; + break; + } + case 195: { + s += " arraySort "; + break; + } + case 196: { + s += " arraySplit "; + break; + } + case 197: { + s += " arrayStringConcat "; + break; + } + case 198: { + s += " arraySum "; + break; + } + case 199: { + s += " arrayUniq "; + break; + } + case 200: { + s += " arrayWithConstant "; + break; + } + case 201: { + s += " arrayZip "; + break; + } + case 202: { + s += " AS "; + break; + } + case 203: { + s += " ASC "; + break; + } + case 204: { + s += " ASCENDING "; + break; + } + case 205: { + s += " asin "; + break; + } + case 206: { + s += " asinh "; + break; + } + case 207: { + s += " ASOF "; + break; + } + case 208: { + s += " assumeNotNull "; + break; + } + case 209: { + s += " AST "; + break; + } + case 210: { + s += " ASYNC "; + break; + } + case 211: { + s += " atan "; + break; + } + case 212: { + s += " atan2 "; + break; + } + case 213: { + s += " atanh "; + break; + } + case 214: { + s += " ATTACH "; + break; + } + case 215: { + s += " ATTACH PART "; + break; + } + case 216: { + s += " ATTACH PARTITION "; + break; + } + case 217: { + s += " avg "; + break; + } + case 218: { + s += " avgWeighted "; + break; + } + case 219: { + s += " bar "; + break; + } + case 220: { + s += " base64Decode "; + break; + } + case 221: { + s += " base64Encode "; + break; + } + case 222: { + s += " basename "; + break; + } + case 223: { + s += " bayesAB "; + break; + } + case 224: { + s += " BETWEEN "; + break; + } + case 225: { + s += " BIGINT "; + break; + } + case 226: { + s += " BIGINT SIGNED "; + break; + } + case 227: { + s += " BIGINT UNSIGNED "; + break; + } + case 228: { + s += " bin "; + break; + } + case 229: { + s += " BINARY "; + break; + } + case 230: { + s += " BINARY LARGE OBJECT "; + break; + } + case 231: { + s += " BINARY VARYING "; + break; + } + case 232: { + s += " bitAnd "; + break; + } + case 233: { + s += " BIT_AND "; + break; + } + case 234: { + s += " __bitBoolMaskAnd "; + break; + } + case 235: { + s += " __bitBoolMaskOr "; + break; + } + case 236: { + s += " bitCount "; + break; + } + case 237: { + s += " bitHammingDistance "; + break; + } + case 238: { + s += " bitmapAnd "; + break; + } + case 239: { + s += " bitmapAndCardinality "; + break; + } + case 240: { + s += " bitmapAndnot "; + break; + } + case 241: { + s += " bitmapAndnotCardinality "; + break; + } + case 242: { + s += " bitmapBuild "; + break; + } + case 243: { + s += " bitmapCardinality "; + break; + } + case 244: { + s += " bitmapContains "; + break; + } + case 245: { + s += " bitmapHasAll "; + break; + } + case 246: { + s += " bitmapHasAny "; + break; + } + case 247: { + s += " bitmapMax "; + break; + } + case 248: { + s += " bitmapMin "; + break; + } + case 249: { + s += " bitmapOr "; + break; + } + case 250: { + s += " bitmapOrCardinality "; + break; + } + case 251: { + s += " bitmapSubsetInRange "; + break; + } + case 252: { + s += " bitmapSubsetLimit "; + break; + } + case 253: { + s += " bitmapToArray "; + break; + } + case 254: { + s += " bitmapTransform "; + break; + } + case 255: { + s += " bitmapXor "; + break; + } + case 256: { + s += " bitmapXorCardinality "; + break; + } + case 257: { + s += " bitmaskToArray "; + break; + } + case 258: { + s += " bitmaskToList "; + break; + } + case 259: { + s += " bitNot "; + break; + } + case 260: { + s += " bitOr "; + break; + } + case 261: { + s += " BIT_OR "; + break; + } + case 262: { + s += " bitPositionsToArray "; + break; + } + case 263: { + s += " bitRotateLeft "; + break; + } + case 264: { + s += " bitRotateRight "; + break; + } + case 265: { + s += " bitShiftLeft "; + break; + } + case 266: { + s += " bitShiftRight "; + break; + } + case 267: { + s += " __bitSwapLastTwo "; + break; + } + case 268: { + s += " bitTest "; + break; + } + case 269: { + s += " bitTestAll "; + break; + } + case 270: { + s += " bitTestAny "; + break; + } + case 271: { + s += " __bitWrapperFunc "; + break; + } + case 272: { + s += " bitXor "; + break; + } + case 273: { + s += " BIT_XOR "; + break; + } + case 274: { + s += " BLOB "; + break; + } + case 275: { + s += " blockNumber "; + break; + } + case 276: { + s += " blockSerializedSize "; + break; + } + case 277: { + s += " blockSize "; + break; + } + case 278: { + s += " BOOL "; + break; + } + case 279: { + s += " BOOLEAN "; + break; + } + case 280: { + s += " BOTH "; + break; + } + case 281: { + s += " boundingRatio "; + break; + } + case 282: { + s += " buildId "; + break; + } + case 283: { + s += " BY "; + break; + } + case 284: { + s += " BYTE "; + break; + } + case 285: { + s += " BYTEA "; + break; + } + case 286: { + s += " byteSize "; + break; + } + case 287: { + s += " CASE "; + break; + } + case 288: { + s += " caseWithExpr "; + break; + } + case 289: { + s += " caseWithExpression "; + break; + } + case 290: { + s += " caseWithoutExpr "; + break; + } + case 291: { + s += " caseWithoutExpression "; + break; + } + case 292: { + s += " _CAST "; + break; + } + case 293: { + s += " CAST "; + break; + } + case 294: { + s += " categoricalInformationValue "; + break; + } + case 295: { + s += " cbrt "; + break; + } + case 296: { + s += " ceil "; + break; + } + case 297: { + s += " ceiling "; + break; + } + case 298: { + s += " char "; + break; + } + case 299: { + s += " CHAR "; + break; + } + case 300: { + s += " CHARACTER "; + break; + } + case 301: { + s += " CHARACTER LARGE OBJECT "; + break; + } + case 302: { + s += " CHARACTER_LENGTH "; + break; + } + case 303: { + s += " CHARACTER VARYING "; + break; + } + case 304: { + s += " CHAR LARGE OBJECT "; + break; + } + case 305: { + s += " CHAR_LENGTH "; + break; + } + case 306: { + s += " CHAR VARYING "; + break; + } + case 307: { + s += " CHECK "; + break; + } + case 308: { + s += " CHECK TABLE "; + break; + } + case 309: { + s += " cityHash64 "; + break; + } + case 310: { + s += " CLEAR "; + break; + } + case 311: { + s += " CLEAR COLUMN "; + break; + } + case 312: { + s += " CLEAR INDEX "; + break; + } + case 313: { + s += " CLOB "; + break; + } + case 314: { + s += " CLUSTER "; + break; + } + case 315: { + s += " coalesce "; + break; + } + case 316: { + s += " CODEC "; + break; + } + case 317: { + s += " COLLATE "; + break; + } + case 318: { + s += " COLUMN "; + break; + } + case 319: { + s += " COLUMNS "; + break; + } + case 320: { + s += " COMMENT "; + break; + } + case 321: { + s += " COMMENT COLUMN "; + break; + } + case 322: { + s += " concat "; + break; + } + case 323: { + s += " concatAssumeInjective "; + break; + } + case 324: { + s += " connection_id "; + break; + } + case 325: { + s += " connectionid "; + break; + } + case 326: { + s += " connectionId "; + break; + } + case 327: { + s += " CONSTRAINT "; + break; + } + case 328: { + s += " convertCharset "; + break; + } + case 329: { + s += " corr "; + break; + } + case 330: { + s += " corrStable "; + break; + } + case 331: { + s += " cos "; + break; + } + case 332: { + s += " cosh "; + break; + } + case 333: { + s += " count "; + break; + } + case 334: { + s += " countDigits "; + break; + } + case 335: { + s += " countEqual "; + break; + } + case 336: { + s += " countMatches "; + break; + } + case 337: { + s += " countMatchesCaseInsensitive "; + break; + } + case 338: { + s += " countSubstrings "; + break; + } + case 339: { + s += " countSubstringsCaseInsensitive "; + break; + } + case 340: { + s += " countSubstringsCaseInsensitiveUTF8 "; + break; + } + case 341: { + s += " covarPop "; + break; + } + case 342: { + s += " COVAR_POP "; + break; + } + case 343: { + s += " covarPopStable "; + break; + } + case 344: { + s += " covarSamp "; + break; + } + case 345: { + s += " COVAR_SAMP "; + break; + } + case 346: { + s += " covarSampStable "; + break; + } + case 347: { + s += " CRC32 "; + break; + } + case 348: { + s += " CRC32IEEE "; + break; + } + case 349: { + s += " CRC64 "; + break; + } + case 350: { + s += " CREATE "; + break; + } + case 351: { + s += " CROSS "; + break; + } + case 352: { + s += " CUBE "; + break; + } + case 353: { + s += " currentDatabase "; + break; + } + case 354: { + s += " currentProfiles "; + break; + } + case 355: { + s += " currentRoles "; + break; + } + case 356: { + s += " currentUser "; + break; + } + case 357: { + s += " cutFragment "; + break; + } + case 358: { + s += " cutIPv6 "; + break; + } + case 359: { + s += " cutQueryString "; + break; + } + case 360: { + s += " cutQueryStringAndFragment "; + break; + } + case 361: { + s += " cutToFirstSignificantSubdomain "; + break; + } + case 362: { + s += " cutToFirstSignificantSubdomainCustom "; + break; + } + case 363: { + s += " cutToFirstSignificantSubdomainCustomWithWWW "; + break; + } + case 364: { + s += " cutToFirstSignificantSubdomainWithWWW "; + break; + } + case 365: { + s += " cutURLParameter "; + break; + } + case 366: { + s += " cutWWW "; + break; + } + case 367: { + s += " D "; + break; + } + case 368: { + s += " DATABASE "; + break; + } + case 369: { + s += " DATABASES "; + break; + } + case 370: { + s += " Date "; + break; + } + case 371: { + s += " DATE "; + break; + } + case 372: { + s += " Date32 "; + break; + } + case 373: { + s += " DATE_ADD "; + break; + } + case 374: { + s += " DATEADD "; + break; + } + case 375: { + s += " dateDiff "; + break; + } + case 376: { + s += " DATE_DIFF "; + break; + } + case 377: { + s += " DATEDIFF "; + break; + } + case 378: { + s += " dateName "; + break; + } + case 379: { + s += " DATE_SUB "; + break; + } + case 380: { + s += " DATESUB "; + break; + } + case 381: { + s += " DateTime "; + break; + } + case 382: { + s += " DateTime32 "; + break; + } + case 383: { + s += " DateTime64 "; + break; + } + case 384: { + s += " dateTime64ToSnowflake "; + break; + } + case 385: { + s += " dateTimeToSnowflake "; + break; + } + case 386: { + s += " date_trunc "; + break; + } + case 387: { + s += " dateTrunc "; + break; + } + case 388: { + s += " DAY "; + break; + } + case 389: { + s += " DAYOFMONTH "; + break; + } + case 390: { + s += " DAYOFWEEK "; + break; + } + case 391: { + s += " DAYOFYEAR "; + break; + } + case 392: { + s += " DD "; + break; + } + case 393: { + s += " DEC "; + break; + } + case 394: { + s += " Decimal "; + break; + } + case 395: { + s += " Decimal128 "; + break; + } + case 396: { + s += " Decimal256 "; + break; + } + case 397: { + s += " Decimal32 "; + break; + } + case 398: { + s += " Decimal64 "; + break; + } + case 399: { + s += " decodeURLComponent "; + break; + } + case 400: { + s += " decodeXMLComponent "; + break; + } + case 401: { + s += " decrypt "; + break; + } + case 402: { + s += " DEDUPLICATE "; + break; + } + case 403: { + s += " DEFAULT "; + break; + } + case 404: { + s += " defaultProfiles "; + break; + } + case 405: { + s += " defaultRoles "; + break; + } + case 406: { + s += " defaultValueOfArgumentType "; + break; + } + case 407: { + s += " defaultValueOfTypeName "; + break; + } + case 408: { + s += " DELAY "; + break; + } + case 409: { + s += " DELETE "; + break; + } + case 410: { + s += " DELETE WHERE "; + break; + } + case 411: { + s += " deltaSum "; + break; + } + case 412: { + s += " deltaSumTimestamp "; + break; + } + case 413: { + s += " demangle "; + break; + } + case 414: { + s += " dense_rank "; + break; + } + case 415: { + s += " DESC "; + break; + } + case 416: { + s += " DESCENDING "; + break; + } + case 417: { + s += " DESCRIBE "; + break; + } + case 418: { + s += " DETACH "; + break; + } + case 419: { + s += " DETACH PARTITION "; + break; + } + case 420: { + s += " dictGet "; + break; + } + case 421: { + s += " dictGetChildren "; + break; + } + case 422: { + s += " dictGetDate "; + break; + } + case 423: { + s += " dictGetDateOrDefault "; + break; + } + case 424: { + s += " dictGetDateTime "; + break; + } + case 425: { + s += " dictGetDateTimeOrDefault "; + break; + } + case 426: { + s += " dictGetDescendants "; + break; + } + case 427: { + s += " dictGetFloat32 "; + break; + } + case 428: { + s += " dictGetFloat32OrDefault "; + break; + } + case 429: { + s += " dictGetFloat64 "; + break; + } + case 430: { + s += " dictGetFloat64OrDefault "; + break; + } + case 431: { + s += " dictGetHierarchy "; + break; + } + case 432: { + s += " dictGetInt16 "; + break; + } + case 433: { + s += " dictGetInt16OrDefault "; + break; + } + case 434: { + s += " dictGetInt32 "; + break; + } + case 435: { + s += " dictGetInt32OrDefault "; + break; + } + case 436: { + s += " dictGetInt64 "; + break; + } + case 437: { + s += " dictGetInt64OrDefault "; + break; + } + case 438: { + s += " dictGetInt8 "; + break; + } + case 439: { + s += " dictGetInt8OrDefault "; + break; + } + case 440: { + s += " dictGetOrDefault "; + break; + } + case 441: { + s += " dictGetOrNull "; + break; + } + case 442: { + s += " dictGetString "; + break; + } + case 443: { + s += " dictGetStringOrDefault "; + break; + } + case 444: { + s += " dictGetUInt16 "; + break; + } + case 445: { + s += " dictGetUInt16OrDefault "; + break; + } + case 446: { + s += " dictGetUInt32 "; + break; + } + case 447: { + s += " dictGetUInt32OrDefault "; + break; + } + case 448: { + s += " dictGetUInt64 "; + break; + } + case 449: { + s += " dictGetUInt64OrDefault "; + break; + } + case 450: { + s += " dictGetUInt8 "; + break; + } + case 451: { + s += " dictGetUInt8OrDefault "; + break; + } + case 452: { + s += " dictGetUUID "; + break; + } + case 453: { + s += " dictGetUUIDOrDefault "; + break; + } + case 454: { + s += " dictHas "; + break; + } + case 455: { + s += " DICTIONARIES "; + break; + } + case 456: { + s += " DICTIONARY "; + break; + } + case 457: { + s += " dictIsIn "; + break; + } + case 458: { + s += " DISK "; + break; + } + case 459: { + s += " DISTINCT "; + break; + } + case 460: { + s += " DISTRIBUTED "; + break; + } + case 461: { + s += " divide "; + break; + } + case 462: { + s += " domain "; + break; + } + case 463: { + s += " domainWithoutWWW "; + break; + } + case 464: { + s += " DOUBLE "; + break; + } + case 465: { + s += " DOUBLE PRECISION "; + break; + } + case 466: { + s += " DROP "; + break; + } + case 467: { + s += " DROP COLUMN "; + break; + } + case 468: { + s += " DROP CONSTRAINT "; + break; + } + case 469: { + s += " DROP DETACHED PART "; + break; + } + case 470: { + s += " DROP DETACHED PARTITION "; + break; + } + case 471: { + s += " DROP INDEX "; + break; + } + case 472: { + s += " DROP PARTITION "; + break; + } + case 473: { + s += " dumpColumnStructure "; + break; + } + case 474: { + s += " e "; + break; + } + case 475: { + s += " ELSE "; + break; + } + case 476: { + s += " empty "; + break; + } + case 477: { + s += " emptyArrayDate "; + break; + } + case 478: { + s += " emptyArrayDateTime "; + break; + } + case 479: { + s += " emptyArrayFloat32 "; + break; + } + case 480: { + s += " emptyArrayFloat64 "; + break; + } + case 481: { + s += " emptyArrayInt16 "; + break; + } + case 482: { + s += " emptyArrayInt32 "; + break; + } + case 483: { + s += " emptyArrayInt64 "; + break; + } + case 484: { + s += " emptyArrayInt8 "; + break; + } + case 485: { + s += " emptyArrayString "; + break; + } + case 486: { + s += " emptyArrayToSingle "; + break; + } + case 487: { + s += " emptyArrayUInt16 "; + break; + } + case 488: { + s += " emptyArrayUInt32 "; + break; + } + case 489: { + s += " emptyArrayUInt64 "; + break; + } + case 490: { + s += " emptyArrayUInt8 "; + break; + } + case 491: { + s += " enabledProfiles "; + break; + } + case 492: { + s += " enabledRoles "; + break; + } + case 493: { + s += " encodeXMLComponent "; + break; + } + case 494: { + s += " encrypt "; + break; + } + case 495: { + s += " END "; + break; + } + case 496: { + s += " endsWith "; + break; + } + case 497: { + s += " ENGINE "; + break; + } + case 498: { + s += " entropy "; + break; + } + case 499: { + s += " Enum "; + break; + } + case 500: { + s += " ENUM "; + break; + } + case 501: { + s += " Enum16 "; + break; + } + case 502: { + s += " Enum8 "; + break; + } + case 503: { + s += " equals "; + break; + } + case 504: { + s += " erf "; + break; + } + case 505: { + s += " erfc "; + break; + } + case 506: { + s += " errorCodeToName "; + break; + } + case 507: { + s += " evalMLMethod "; + break; + } + case 508: { + s += " EVENTS "; + break; + } + case 509: { + s += " EXCHANGE TABLES "; + break; + } + case 510: { + s += " EXISTS "; + break; + } + case 511: { + s += " exp "; + break; + } + case 512: { + s += " exp10 "; + break; + } + case 513: { + s += " exp2 "; + break; + } + case 514: { + s += " EXPLAIN "; + break; + } + case 515: { + s += " exponentialMovingAverage "; + break; + } + case 516: { + s += " EXPRESSION "; + break; + } + case 517: { + s += " extract "; + break; + } + case 518: { + s += " EXTRACT "; + break; + } + case 519: { + s += " extractAll "; + break; + } + case 520: { + s += " extractAllGroups "; + break; + } + case 521: { + s += " extractAllGroupsHorizontal "; + break; + } + case 522: { + s += " extractAllGroupsVertical "; + break; + } + case 523: { + s += " extractGroups "; + break; + } + case 524: { + s += " extractTextFromHTML "; + break; + } + case 525: { + s += " extractURLParameter "; + break; + } + case 526: { + s += " extractURLParameterNames "; + break; + } + case 527: { + s += " extractURLParameters "; + break; + } + case 528: { + s += " farmFingerprint64 "; + break; + } + case 529: { + s += " farmHash64 "; + break; + } + case 530: { + s += " FETCHES "; + break; + } + case 531: { + s += " FETCH PART "; + break; + } + case 532: { + s += " FETCH PARTITION "; + break; + } + case 533: { + s += " file "; + break; + } + case 534: { + s += " filesystemAvailable "; + break; + } + case 535: { + s += " filesystemCapacity "; + break; + } + case 536: { + s += " filesystemFree "; + break; + } + case 537: { + s += " FINAL "; + break; + } + case 538: { + s += " finalizeAggregation "; + break; + } + case 539: { + s += " FIRST "; + break; + } + case 540: { + s += " firstSignificantSubdomain "; + break; + } + case 541: { + s += " firstSignificantSubdomainCustom "; + break; + } + case 542: { + s += " first_value "; + break; + } + case 543: { + s += " FIXED "; + break; + } + case 544: { + s += " FixedString "; + break; + } + case 545: { + s += " flatten "; + break; + } + case 546: { + s += " FLOAT "; + break; + } + case 547: { + s += " Float32 "; + break; + } + case 548: { + s += " Float64 "; + break; + } + case 549: { + s += " floor "; + break; + } + case 550: { + s += " FLUSH "; + break; + } + case 551: { + s += " FOR "; + break; + } + case 552: { + s += " ForEach "; + break; + } + case 553: { + s += " format "; + break; + } + case 554: { + s += " FORMAT "; + break; + } + case 555: { + s += " formatDateTime "; + break; + } + case 556: { + s += " formatReadableQuantity "; + break; + } + case 557: { + s += " formatReadableDecimalSize "; + break; + } + case 558: { + s += " formatReadableSize "; + break; + } + case 559: { + s += " formatReadableTimeDelta "; + break; + } + case 560: { + s += " formatRow "; + break; + } + case 561: { + s += " formatRowNoNewline "; + break; + } + case 562: { + s += " FQDN "; + break; + } + case 563: { + s += " fragment "; + break; + } + case 564: { + s += " FREEZE "; + break; + } + case 565: { + s += " FROM "; + break; + } + case 566: { + s += " FROM_BASE64 "; + break; + } + case 567: { + s += " fromModifiedJulianDay "; + break; + } + case 568: { + s += " fromModifiedJulianDayOrNull "; + break; + } + case 569: { + s += " FROM_UNIXTIME "; + break; + } + case 570: { + s += " fromUnixTimestamp "; + break; + } + case 571: { + s += " fromUnixTimestamp64Micro "; + break; + } + case 572: { + s += " fromUnixTimestamp64Milli "; + break; + } + case 573: { + s += " fromUnixTimestamp64Nano "; + break; + } + case 574: { + s += " FULL "; + break; + } + case 575: { + s += " fullHostName "; + break; + } + case 576: { + s += " FUNCTION "; + break; + } + case 577: { + s += " fuzzBits "; + break; + } + case 578: { + s += " gccMurmurHash "; + break; + } + case 579: { + s += " gcd "; + break; + } + case 580: { + s += " generateUUIDv4 "; + break; + } + case 581: { + s += " geoDistance "; + break; + } + case 582: { + s += " geohashDecode "; + break; + } + case 583: { + s += " geohashEncode "; + break; + } + case 584: { + s += " geohashesInBox "; + break; + } + case 585: { + s += " geoToH3 "; + break; + } + case 586: { + s += " geoToS2 "; + break; + } + case 587: { + s += " getMacro "; + break; + } + case 588: { + s += " __getScalar "; + break; + } + case 589: { + s += " getServerPort "; + break; + } + case 590: { + s += " getSetting "; + break; + } + case 591: { + s += " getSizeOfEnumType "; + break; + } + case 592: { + s += " GLOBAL "; + break; + } + case 593: { + s += " globalIn "; + break; + } + case 594: { + s += " globalInIgnoreSet "; + break; + } + case 595: { + s += " globalNotIn "; + break; + } + case 596: { + s += " globalNotInIgnoreSet "; + break; + } + case 597: { + s += " globalNotNullIn "; + break; + } + case 598: { + s += " globalNotNullInIgnoreSet "; + break; + } + case 599: { + s += " globalNullIn "; + break; + } + case 600: { + s += " globalNullInIgnoreSet "; + break; + } + case 601: { + s += " globalVariable "; + break; + } + case 602: { + s += " GRANULARITY "; + break; + } + case 603: { + s += " greatCircleAngle "; + break; + } + case 604: { + s += " greatCircleDistance "; + break; + } + case 605: { + s += " greater "; + break; + } + case 606: { + s += " greaterOrEquals "; + break; + } + case 607: { + s += " greatest "; + break; + } + case 608: { + s += " GROUP "; + break; + } + case 609: { + s += " groupArray "; + break; + } + case 610: { + s += " groupArrayInsertAt "; + break; + } + case 611: { + s += " groupArrayMovingAvg "; + break; + } + case 612: { + s += " groupArrayMovingSum "; + break; + } + case 613: { + s += " groupArraySample "; + break; + } + case 614: { + s += " groupBitAnd "; + break; + } + case 615: { + s += " groupBitmap "; + break; + } + case 616: { + s += " groupBitmapAnd "; + break; + } + case 617: { + s += " groupBitmapOr "; + break; + } + case 618: { + s += " groupBitmapXor "; + break; + } + case 619: { + s += " groupBitOr "; + break; + } + case 620: { + s += " groupBitXor "; + break; + } + case 621: { + s += " GROUP BY "; + break; + } + case 622: { + s += " groupUniqArray "; + break; + } + case 623: { + s += " h3EdgeAngle "; + break; + } + case 624: { + s += " h3EdgeLengthM "; + break; + } + case 625: { + s += " h3GetBaseCell "; + break; + } + case 626: { + s += " h3GetFaces "; + break; + } + case 627: { + s += " h3GetResolution "; + break; + } + case 628: { + s += " h3HexAreaM2 "; + break; + } + case 629: { + s += " h3IndexesAreNeighbors "; + break; + } + case 630: { + s += " h3IsPentagon "; + break; + } + case 631: { + s += " h3IsResClassIII "; + break; + } + case 632: { + s += " h3IsValid "; + break; + } + case 633: { + s += " h3kRing "; + break; + } + case 634: { + s += " h3ToChildren "; + break; + } + case 635: { + s += " h3ToGeo "; + break; + } + case 636: { + s += " h3ToGeoBoundary "; + break; + } + case 637: { + s += " h3ToParent "; + break; + } + case 638: { + s += " h3ToString "; + break; + } + case 639: { + s += " halfMD5 "; + break; + } + case 640: { + s += " has "; + break; + } + case 641: { + s += " hasAll "; + break; + } + case 642: { + s += " hasAny "; + break; + } + case 643: { + s += " hasColumnInTable "; + break; + } + case 644: { + s += " hasSubstr "; + break; + } + case 645: { + s += " hasThreadFuzzer "; + break; + } + case 646: { + s += " hasToken "; + break; + } + case 647: { + s += " hasTokenCaseInsensitive "; + break; + } + case 648: { + s += " HAVING "; + break; + } + case 649: { + s += " hex "; + break; + } + case 650: { + s += " HH "; + break; + } + case 651: { + s += " HIERARCHICAL "; + break; + } + case 652: { + s += " histogram "; + break; + } + case 653: { + s += " hiveHash "; + break; + } + case 654: { + s += " hostname "; + break; + } + case 655: { + s += " hostName "; + break; + } + case 656: { + s += " HOUR "; + break; + } + case 657: { + s += " hypot "; + break; + } + case 658: { + s += " ID "; + break; + } + case 659: { + s += " identity "; + break; + } + case 660: { + s += " if "; + break; + } + case 661: { + s += " IF "; + break; + } + case 662: { + s += " IF EXISTS "; + break; + } + case 663: { + s += " IF NOT EXISTS "; + break; + } + case 664: { + s += " ifNotFinite "; + break; + } + case 665: { + s += " ifNull "; + break; + } + case 666: { + s += " ignore "; + break; + } + case 667: { + s += " ilike "; + break; + } + case 668: { + s += " ILIKE "; + break; + } + case 669: { + s += " in "; + break; + } + case 670: { + s += " IN "; + break; + } + case 671: { + s += " INDEX "; + break; + } + case 672: { + s += " indexHint "; + break; + } + case 673: { + s += " indexOf "; + break; + } + case 674: { + s += " INET4 "; + break; + } + case 675: { + s += " INET6 "; + break; + } + case 676: { + s += " INET6_ATON "; + break; + } + case 677: { + s += " INET6_NTOA "; + break; + } + case 678: { + s += " INET_ATON "; + break; + } + case 679: { + s += " INET_NTOA "; + break; + } + case 680: { + s += " INF "; + break; + } + case 681: { + s += " inIgnoreSet "; + break; + } + case 682: { + s += " initializeAggregation "; + break; + } + case 683: { + s += " initial_query_id "; + break; + } + case 684: { + s += " initialQueryID "; + break; + } + case 685: { + s += " INJECTIVE "; + break; + } + case 686: { + s += " INNER "; + break; + } + case 687: { + s += " IN PARTITION "; + break; + } + case 688: { + s += " INSERT "; + break; + } + case 689: { + s += " INSERT INTO "; + break; + } + case 690: { + s += " INT "; + break; + } + case 691: { + s += " INT1 "; + break; + } + case 692: { + s += " Int128 "; + break; + } + case 693: { + s += " Int16 "; + break; + } + case 694: { + s += " INT1 SIGNED "; + break; + } + case 695: { + s += " INT1 UNSIGNED "; + break; + } + case 696: { + s += " Int256 "; + break; + } + case 697: { + s += " Int32 "; + break; + } + case 698: { + s += " Int64 "; + break; + } + case 699: { + s += " Int8 "; + break; + } + case 700: { + s += " intDiv "; + break; + } + case 701: { + s += " intDivOrZero "; + break; + } + case 702: { + s += " INTEGER "; + break; + } + case 703: { + s += " INTEGER SIGNED "; + break; + } + case 704: { + s += " INTEGER UNSIGNED "; + break; + } + case 705: { + s += " INTERVAL "; + break; + } + case 706: { + s += " IntervalDay "; + break; + } + case 707: { + s += " IntervalHour "; + break; + } + case 708: { + s += " intervalLengthSum "; + break; + } + case 709: { + s += " IntervalMinute "; + break; + } + case 710: { + s += " IntervalMonth "; + break; + } + case 711: { + s += " IntervalQuarter "; + break; + } + case 712: { + s += " IntervalSecond "; + break; + } + case 713: { + s += " IntervalWeek "; + break; + } + case 714: { + s += " IntervalYear "; + break; + } + case 715: { + s += " intExp10 "; + break; + } + case 716: { + s += " intExp2 "; + break; + } + case 717: { + s += " intHash32 "; + break; + } + case 718: { + s += " intHash64 "; + break; + } + case 719: { + s += " INTO "; + break; + } + case 720: { + s += " INTO OUTFILE "; + break; + } + case 721: { + s += " INT SIGNED "; + break; + } + case 722: { + s += " INT UNSIGNED "; + break; + } + case 723: { + s += " IPv4 "; + break; + } + case 724: { + s += " IPv4CIDRToRange "; + break; + } + case 725: { + s += " IPv4NumToString "; + break; + } + case 726: { + s += " IPv4NumToStringClassC "; + break; + } + case 727: { + s += " IPv4StringToNum "; + break; + } + case 728: { + s += " IPv4ToIPv6 "; + break; + } + case 729: { + s += " IPv6 "; + break; + } + case 730: { + s += " IPv6CIDRToRange "; + break; + } + case 731: { + s += " IPv6NumToString "; + break; + } + case 732: { + s += " IPv6StringToNum "; + break; + } + case 733: { + s += " IS "; + break; + } + case 734: { + s += " isConstant "; + break; + } + case 735: { + s += " isDecimalOverflow "; + break; + } + case 736: { + s += " isFinite "; + break; + } + case 737: { + s += " isInfinite "; + break; + } + case 738: { + s += " isIPAddressInRange "; + break; + } + case 739: { + s += " isIPv4String "; + break; + } + case 740: { + s += " isIPv6String "; + break; + } + case 741: { + s += " isNaN "; + break; + } + case 742: { + s += " isNotNull "; + break; + } + case 743: { + s += " isNull "; + break; + } + case 744: { + s += " IS_OBJECT_ID "; + break; + } + case 745: { + s += " isValidJSON "; + break; + } + case 746: { + s += " isValidUTF8 "; + break; + } + case 747: { + s += " isZeroOrNull "; + break; + } + case 748: { + s += " javaHash "; + break; + } + case 749: { + s += " javaHashUTF16LE "; + break; + } + case 750: { + s += " JOIN "; + break; + } + case 751: { + s += " joinGet "; + break; + } + case 752: { + s += " joinGetOrNull "; + break; + } + case 753: { + s += " JSON_EXISTS "; + break; + } + case 754: { + s += " JSONExtract "; + break; + } + case 755: { + s += " JSONExtractArrayRaw "; + break; + } + case 756: { + s += " JSONExtractBool "; + break; + } + case 757: { + s += " JSONExtractFloat "; + break; + } + case 758: { + s += " JSONExtractInt "; + break; + } + case 759: { + s += " JSONExtractKeysAndValues "; + break; + } + case 760: { + s += " JSONExtractKeysAndValuesRaw "; + break; + } + case 761: { + s += " JSONExtractKeys "; + break; + } + case 762: { + s += " JSONExtractRaw "; + break; + } + case 763: { + s += " JSONExtractString "; + break; + } + case 764: { + s += " JSONExtractUInt "; + break; + } + case 765: { + s += " JSONHas "; + break; + } + case 766: { + s += " JSONKey "; + break; + } + case 767: { + s += " JSONLength "; + break; + } + case 768: { + s += " JSON_QUERY "; + break; + } + case 769: { + s += " JSONType "; + break; + } + case 770: { + s += " JSON_VALUE "; + break; + } + case 771: { + s += " jumpConsistentHash "; + break; + } + case 772: { + s += " KEY "; + break; + } + case 773: { + s += " KILL "; + break; + } + case 774: { + s += " kurtPop "; + break; + } + case 775: { + s += " kurtSamp "; + break; + } + case 776: { + s += " lagInFrame "; + break; + } + case 777: { + s += " LAST "; + break; + } + case 778: { + s += " last_value "; + break; + } + case 779: { + s += " LAYOUT "; + break; + } + case 780: { + s += " lcase "; + break; + } + case 781: { + s += " lcm "; + break; + } + case 782: { + s += " leadInFrame "; + break; + } + case 783: { + s += " LEADING "; + break; + } + case 784: { + s += " least "; + break; + } + case 785: { + s += " LEFT "; + break; + } + case 786: { + s += " LEFT ARRAY JOIN "; + break; + } + case 787: { + s += " leftPad "; + break; + } + case 788: { + s += " leftPadUTF8 "; + break; + } + case 789: { + s += " lemmatize "; + break; + } + case 790: { + s += " length "; + break; + } + case 791: { + s += " lengthUTF8 "; + break; + } + case 792: { + s += " less "; + break; + } + case 793: { + s += " lessOrEquals "; + break; + } + case 794: { + s += " lgamma "; + break; + } + case 795: { + s += " LIFETIME "; + break; + } + case 796: { + s += " like "; + break; + } + case 797: { + s += " LIKE "; + break; + } + case 798: { + s += " LIMIT "; + break; + } + case 799: { + s += " LIVE "; + break; + } + case 800: { + s += " ln "; + break; + } + case 801: { + s += " LOCAL "; + break; + } + case 802: { + s += " locate "; + break; + } + case 803: { + s += " log "; + break; + } + case 804: { + s += " log10 "; + break; + } + case 805: { + s += " log1p "; + break; + } + case 806: { + s += " log2 "; + break; + } + case 807: { + s += " LOGS "; + break; + } + case 808: { + s += " logTrace "; + break; + } + case 809: { + s += " LONGBLOB "; + break; + } + case 810: { + s += " LONGTEXT "; + break; + } + case 811: { + s += " LowCardinality "; + break; + } + case 812: { + s += " lowCardinalityIndices "; + break; + } + case 813: { + s += " lowCardinalityKeys "; + break; + } + case 814: { + s += " lower "; + break; + } + case 815: { + s += " lowerUTF8 "; + break; + } + case 816: { + s += " lpad "; + break; + } + case 817: { + s += " LTRIM "; + break; + } + case 818: { + s += " M "; + break; + } + case 819: { + s += " MACNumToString "; + break; + } + case 820: { + s += " MACStringToNum "; + break; + } + case 821: { + s += " MACStringToOUI "; + break; + } + case 822: { + s += " mannWhitneyUTest "; + break; + } + case 823: { + s += " map "; + break; + } + case 824: { + s += " Map "; + break; + } + case 825: { + s += " mapAdd "; + break; + } + case 826: { + s += " mapContains "; + break; + } + case 827: { + s += " mapKeys "; + break; + } + case 828: { + s += " mapPopulateSeries "; + break; + } + case 829: { + s += " mapSubtract "; + break; + } + case 830: { + s += " mapValues "; + break; + } + case 831: { + s += " match "; + break; + } + case 832: { + s += " materialize "; + break; + } + case 833: { + s += " MATERIALIZE "; + break; + } + case 834: { + s += " MATERIALIZED "; + break; + } + case 835: { + s += " MATERIALIZE INDEX "; + break; + } + case 836: { + s += " MATERIALIZE TTL "; + break; + } + case 837: { + s += " max "; + break; + } + case 838: { + s += " MAX "; + break; + } + case 839: { + s += " maxIntersections "; + break; + } + case 840: { + s += " maxIntersectionsPosition "; + break; + } + case 841: { + s += " maxMap "; + break; + } + case 842: { + s += " MD4 "; + break; + } + case 843: { + s += " MD5 "; + break; + } + case 844: { + s += " median "; + break; + } + case 845: { + s += " medianBFloat16 "; + break; + } + case 846: { + s += " medianBFloat16Weighted "; + break; + } + case 847: { + s += " medianDeterministic "; + break; + } + case 848: { + s += " medianExact "; + break; + } + case 849: { + s += " medianExactHigh "; + break; + } + case 850: { + s += " medianExactLow "; + break; + } + case 851: { + s += " medianExactWeighted "; + break; + } + case 852: { + s += " medianTDigest "; + break; + } + case 853: { + s += " medianTDigestWeighted "; + break; + } + case 854: { + s += " medianTiming "; + break; + } + case 855: { + s += " medianTimingWeighted "; + break; + } + case 856: { + s += " MEDIUMBLOB "; + break; + } + case 857: { + s += " MEDIUMINT "; + break; + } + case 858: { + s += " MEDIUMINT SIGNED "; + break; + } + case 859: { + s += " MEDIUMINT UNSIGNED "; + break; + } + case 860: { + s += " MEDIUMTEXT "; + break; + } + case 861: { + s += " Merge "; + break; + } + case 862: { + s += " MERGES "; + break; + } + case 863: { + s += " metroHash64 "; + break; + } + case 864: { + s += " MI "; + break; + } + case 865: { + s += " mid "; + break; + } + case 866: { + s += " min "; + break; + } + case 867: { + s += " MIN "; + break; + } + case 868: { + s += " minMap "; + break; + } + case 869: { + s += " minus "; + break; + } + case 870: { + s += " MINUTE "; + break; + } + case 871: { + s += " MM "; + break; + } + case 872: { + s += " mod "; + break; + } + case 873: { + s += " MODIFY "; + break; + } + case 874: { + s += " MODIFY COLUMN "; + break; + } + case 875: { + s += " MODIFY ORDER BY "; + break; + } + case 876: { + s += " MODIFY QUERY "; + break; + } + case 877: { + s += " MODIFY SETTING "; + break; + } + case 878: { + s += " MODIFY TTL "; + break; + } + case 879: { + s += " modulo "; + break; + } + case 880: { + s += " moduloLegacy "; + break; + } + case 881: { + s += " moduloOrZero "; + break; + } + case 882: { + s += " MONTH "; + break; + } + case 883: { + s += " MOVE "; + break; + } + case 884: { + s += " MOVE PART "; + break; + } + case 885: { + s += " MOVE PARTITION "; + break; + } + case 886: { + s += " movingXXX "; + break; + } + case 887: { + s += " multiFuzzyMatchAllIndices "; + break; + } + case 888: { + s += " multiFuzzyMatchAny "; + break; + } + case 889: { + s += " multiFuzzyMatchAnyIndex "; + break; + } + case 890: { + s += " multiIf "; + break; + } + case 891: { + s += " multiMatchAllIndices "; + break; + } + case 892: { + s += " multiMatchAny "; + break; + } + case 893: { + s += " multiMatchAnyIndex "; + break; + } + case 894: { + s += " multiply "; + break; + } + case 895: { + s += " MultiPolygon "; + break; + } + case 896: { + s += " multiSearchAllPositions "; + break; + } + case 897: { + s += " multiSearchAllPositionsCaseInsensitive "; + break; + } + case 898: { + s += " multiSearchAllPositionsCaseInsensitiveUTF8 "; + break; + } + case 899: { + s += " multiSearchAllPositionsUTF8 "; + break; + } + case 900: { + s += " multiSearchAny "; + break; + } + case 901: { + s += " multiSearchAnyCaseInsensitive "; + break; + } + case 902: { + s += " multiSearchAnyCaseInsensitiveUTF8 "; + break; + } + case 903: { + s += " multiSearchAnyUTF8 "; + break; + } + case 904: { + s += " multiSearchFirstIndex "; + break; + } + case 905: { + s += " multiSearchFirstIndexCaseInsensitive "; + break; + } + case 906: { + s += " multiSearchFirstIndexCaseInsensitiveUTF8 "; + break; + } + case 907: { + s += " multiSearchFirstIndexUTF8 "; + break; + } + case 908: { + s += " multiSearchFirstPosition "; + break; + } + case 909: { + s += " multiSearchFirstPositionCaseInsensitive "; + break; + } + case 910: { + s += " multiSearchFirstPositionCaseInsensitiveUTF8 "; + break; + } + case 911: { + s += " multiSearchFirstPositionUTF8 "; + break; + } + case 912: { + s += " murmurHash2_32 "; + break; + } + case 913: { + s += " murmurHash2_64 "; + break; + } + case 914: { + s += " murmurHash3_128 "; + break; + } + case 915: { + s += " murmurHash3_32 "; + break; + } + case 916: { + s += " murmurHash3_64 "; + break; + } + case 917: { + s += " MUTATION "; + break; + } + case 918: { + s += " N "; + break; + } + case 919: { + s += " NAME "; + break; + } + case 920: { + s += " NAN_SQL "; + break; + } + case 921: { + s += " NATIONAL CHAR "; + break; + } + case 922: { + s += " NATIONAL CHARACTER "; + break; + } + case 923: { + s += " NATIONAL CHARACTER LARGE OBJECT "; + break; + } + case 924: { + s += " NATIONAL CHARACTER VARYING "; + break; + } + case 925: { + s += " NATIONAL CHAR VARYING "; + break; + } + case 926: { + s += " NCHAR "; + break; + } + case 927: { + s += " NCHAR LARGE OBJECT "; + break; + } + case 928: { + s += " NCHAR VARYING "; + break; + } + case 929: { + s += " negate "; + break; + } + case 930: { + s += " neighbor "; + break; + } + case 931: { + s += " Nested "; + break; + } + case 932: { + s += " netloc "; + break; + } + case 933: { + s += " ngramDistance "; + break; + } + case 934: { + s += " ngramDistanceCaseInsensitive "; + break; + } + case 935: { + s += " ngramDistanceCaseInsensitiveUTF8 "; + break; + } + case 936: { + s += " ngramDistanceUTF8 "; + break; + } + case 937: { + s += " ngramMinHash "; + break; + } + case 938: { + s += " ngramMinHashArg "; + break; + } + case 939: { + s += " ngramMinHashArgCaseInsensitive "; + break; + } + case 940: { + s += " ngramMinHashArgCaseInsensitiveUTF8 "; + break; + } + case 941: { + s += " ngramMinHashArgUTF8 "; + break; + } + case 942: { + s += " ngramMinHashCaseInsensitive "; + break; + } + case 943: { + s += " ngramMinHashCaseInsensitiveUTF8 "; + break; + } + case 944: { + s += " ngramMinHashUTF8 "; + break; + } + case 945: { + s += " ngramSearch "; + break; + } + case 946: { + s += " ngramSearchCaseInsensitive "; + break; + } + case 947: { + s += " ngramSearchCaseInsensitiveUTF8 "; + break; + } + case 948: { + s += " ngramSearchUTF8 "; + break; + } + case 949: { + s += " ngramSimHash "; + break; + } + case 950: { + s += " ngramSimHashCaseInsensitive "; + break; + } + case 951: { + s += " ngramSimHashCaseInsensitiveUTF8 "; + break; + } + case 952: { + s += " ngramSimHashUTF8 "; + break; + } + case 953: { + s += " NO "; + break; + } + case 954: { + s += " NO DELAY "; + break; + } + case 955: { + s += " NONE "; + break; + } + case 956: { + s += " normalizedQueryHash "; + break; + } + case 957: { + s += " normalizedQueryHashKeepNames "; + break; + } + case 958: { + s += " normalizeQuery "; + break; + } + case 959: { + s += " normalizeQueryKeepNames "; + break; + } + case 960: { + s += " not "; + break; + } + case 961: { + s += " NOT "; + break; + } + case 962: { + s += " notEmpty "; + break; + } + case 963: { + s += " notEquals "; + break; + } + case 964: { + s += " nothing "; + break; + } + case 965: { + s += " Nothing "; + break; + } + case 966: { + s += " notILike "; + break; + } + case 967: { + s += " notIn "; + break; + } + case 968: { + s += " notInIgnoreSet "; + break; + } + case 969: { + s += " notLike "; + break; + } + case 970: { + s += " notNullIn "; + break; + } + case 971: { + s += " notNullInIgnoreSet "; + break; + } + case 972: { + s += " now "; + break; + } + case 973: { + s += " now64 "; + break; + } + case 974: { + s += " Null "; + break; + } + case 975: { + s += " Nullable "; + break; + } + case 976: { + s += " nullIf "; + break; + } + case 977: { + s += " nullIn "; + break; + } + case 978: { + s += " nullInIgnoreSet "; + break; + } + case 979: { + s += " NULLS "; + break; + } + case 980: { + s += " NULL_SQL "; + break; + } + case 981: { + s += " NUMERIC "; + break; + } + case 982: { + s += " NVARCHAR "; + break; + } + case 983: { + s += " OFFSET "; + break; + } + case 984: { + s += " ON "; + break; + } + case 985: { + s += " ONLY "; + break; + } + case 986: { + s += " OPTIMIZE "; + break; + } + case 987: { + s += " OPTIMIZE TABLE "; + break; + } + case 988: { + s += " or "; + break; + } + case 989: { + s += " OR "; + break; + } + case 990: { + s += " ORDER "; + break; + } + case 991: { + s += " ORDER BY "; + break; + } + case 992: { + s += " OR REPLACE "; + break; + } + case 993: { + s += " OUTER "; + break; + } + case 994: { + s += " OUTFILE "; + break; + } + case 995: { + s += " parseDateTime32BestEffort "; + break; + } + case 996: { + s += " parseDateTime32BestEffortOrNull "; + break; + } + case 997: { + s += " parseDateTime32BestEffortOrZero "; + break; + } + case 998: { + s += " parseDateTime64BestEffort "; + break; + } + case 999: { + s += " parseDateTime64BestEffortOrNull "; + break; + } + case 1000: { + s += " parseDateTime64BestEffortOrZero "; + break; + } + case 1001: { + s += " parseDateTimeBestEffort "; + break; + } + case 1002: { + s += " parseDateTimeBestEffortOrNull "; + break; + } + case 1003: { + s += " parseDateTimeBestEffortOrZero "; + break; + } + case 1004: { + s += " parseDateTimeBestEffortUS "; + break; + } + case 1005: { + s += " parseDateTimeBestEffortUSOrNull "; + break; + } + case 1006: { + s += " parseDateTimeBestEffortUSOrZero "; + break; + } + case 1007: { + s += " parseTimeDelta "; + break; + } + case 1008: { + s += " PARTITION "; + break; + } + case 1009: { + s += " PARTITION BY "; + break; + } + case 1010: { + s += " partitionId "; + break; + } + case 1011: { + s += " path "; + break; + } + case 1012: { + s += " pathFull "; + break; + } + case 1013: { + s += " pi "; + break; + } + case 1014: { + s += " plus "; + break; + } + case 1015: { + s += " Point "; + break; + } + case 1016: { + s += " pointInEllipses "; + break; + } + case 1017: { + s += " pointInPolygon "; + break; + } + case 1018: { + s += " Polygon "; + break; + } + case 1019: { + s += " polygonAreaCartesian "; + break; + } + case 1020: { + s += " polygonAreaSpherical "; + break; + } + case 1021: { + s += " polygonConvexHullCartesian "; + break; + } + case 1022: { + s += " polygonPerimeterCartesian "; + break; + } + case 1023: { + s += " polygonPerimeterSpherical "; + break; + } + case 1024: { + s += " polygonsDistanceCartesian "; + break; + } + case 1025: { + s += " polygonsDistanceSpherical "; + break; + } + case 1026: { + s += " polygonsEqualsCartesian "; + break; + } + case 1027: { + s += " polygonsIntersectionCartesian "; + break; + } + case 1028: { + s += " polygonsIntersectionSpherical "; + break; + } + case 1029: { + s += " polygonsSymDifferenceCartesian "; + break; + } + case 1030: { + s += " polygonsSymDifferenceSpherical "; + break; + } + case 1031: { + s += " polygonsUnionCartesian "; + break; + } + case 1032: { + s += " polygonsUnionSpherical "; + break; + } + case 1033: { + s += " polygonsWithinCartesian "; + break; + } + case 1034: { + s += " polygonsWithinSpherical "; + break; + } + case 1035: { + s += " POPULATE "; + break; + } + case 1036: { + s += " port "; + break; + } + case 1037: { + s += " position "; + break; + } + case 1038: { + s += " positionCaseInsensitive "; + break; + } + case 1039: { + s += " positionCaseInsensitiveUTF8 "; + break; + } + case 1040: { + s += " positionUTF8 "; + break; + } + case 1041: { + s += " pow "; + break; + } + case 1042: { + s += " power "; + break; + } + case 1043: { + s += " PREWHERE "; + break; + } + case 1044: { + s += " PRIMARY "; + break; + } + case 1045: { + s += " PRIMARY KEY "; + break; + } + case 1046: { + s += " PROJECTION "; + break; + } + case 1047: { + s += " protocol "; + break; + } + case 1048: { + s += " Q "; + break; + } + case 1049: { + s += " QQ "; + break; + } + case 1050: { + s += " quantile "; + break; + } + case 1051: { + s += " quantileBFloat16 "; + break; + } + case 1052: { + s += " quantileBFloat16Weighted "; + break; + } + case 1053: { + s += " quantileDeterministic "; + break; + } + case 1054: { + s += " quantileExact "; + break; + } + case 1055: { + s += " quantileExactExclusive "; + break; + } + case 1056: { + s += " quantileExactHigh "; + break; + } + case 1057: { + s += " quantileExactInclusive "; + break; + } + case 1058: { + s += " quantileExactLow "; + break; + } + case 1059: { + s += " quantileExactWeighted "; + break; + } + case 1060: { + s += " quantiles "; + break; + } + case 1061: { + s += " quantilesBFloat16 "; + break; + } + case 1062: { + s += " quantilesBFloat16Weighted "; + break; + } + case 1063: { + s += " quantilesDeterministic "; + break; + } + case 1064: { + s += " quantilesExact "; + break; + } + case 1065: { + s += " quantilesExactExclusive "; + break; + } + case 1066: { + s += " quantilesExactHigh "; + break; + } + case 1067: { + s += " quantilesExactInclusive "; + break; + } + case 1068: { + s += " quantilesExactLow "; + break; + } + case 1069: { + s += " quantilesExactWeighted "; + break; + } + case 1070: { + s += " quantilesTDigest "; + break; + } + case 1071: { + s += " quantilesTDigestWeighted "; + break; + } + case 1072: { + s += " quantilesTiming "; + break; + } + case 1073: { + s += " quantilesTimingWeighted "; + break; + } + case 1074: { + s += " quantileTDigest "; + break; + } + case 1075: { + s += " quantileTDigestWeighted "; + break; + } + case 1076: { + s += " quantileTiming "; + break; + } + case 1077: { + s += " quantileTimingWeighted "; + break; + } + case 1078: { + s += " QUARTER "; + break; + } + case 1079: { + s += " query_id "; + break; + } + case 1080: { + s += " queryID "; + break; + } + case 1081: { + s += " queryString "; + break; + } + case 1082: { + s += " queryStringAndFragment "; + break; + } + case 1083: { + s += " rand "; + break; + } + case 1084: { + s += " rand32 "; + break; + } + case 1085: { + s += " rand64 "; + break; + } + case 1086: { + s += " randConstant "; + break; + } + case 1087: { + s += " randomFixedString "; + break; + } + case 1088: { + s += " randomPrintableASCII "; + break; + } + case 1089: { + s += " randomString "; + break; + } + case 1090: { + s += " randomStringUTF8 "; + break; + } + case 1091: { + s += " range "; + break; + } + case 1092: { + s += " RANGE "; + break; + } + case 1093: { + s += " rank "; + break; + } + case 1094: { + s += " rankCorr "; + break; + } + case 1095: { + s += " readWKTMultiPolygon "; + break; + } + case 1096: { + s += " readWKTPoint "; + break; + } + case 1097: { + s += " readWKTPolygon "; + break; + } + case 1098: { + s += " readWKTRing "; + break; + } + case 1099: { + s += " REAL "; + break; + } + case 1100: { + s += " REFRESH "; + break; + } + case 1101: { + s += " regexpQuoteMeta "; + break; + } + case 1102: { + s += " regionHierarchy "; + break; + } + case 1103: { + s += " regionIn "; + break; + } + case 1104: { + s += " regionToArea "; + break; + } + case 1105: { + s += " regionToCity "; + break; + } + case 1106: { + s += " regionToContinent "; + break; + } + case 1107: { + s += " regionToCountry "; + break; + } + case 1108: { + s += " regionToDistrict "; + break; + } + case 1109: { + s += " regionToName "; + break; + } + case 1110: { + s += " regionToPopulation "; + break; + } + case 1111: { + s += " regionToTopContinent "; + break; + } + case 1112: { + s += " reinterpret "; + break; + } + case 1113: { + s += " reinterpretAsDate "; + break; + } + case 1114: { + s += " reinterpretAsDateTime "; + break; + } + case 1115: { + s += " reinterpretAsFixedString "; + break; + } + case 1116: { + s += " reinterpretAsFloat32 "; + break; + } + case 1117: { + s += " reinterpretAsFloat64 "; + break; + } + case 1118: { + s += " reinterpretAsInt128 "; + break; + } + case 1119: { + s += " reinterpretAsInt16 "; + break; + } + case 1120: { + s += " reinterpretAsInt256 "; + break; + } + case 1121: { + s += " reinterpretAsInt32 "; + break; + } + case 1122: { + s += " reinterpretAsInt64 "; + break; + } + case 1123: { + s += " reinterpretAsInt8 "; + break; + } + case 1124: { + s += " reinterpretAsString "; + break; + } + case 1125: { + s += " reinterpretAsUInt128 "; + break; + } + case 1126: { + s += " reinterpretAsUInt16 "; + break; + } + case 1127: { + s += " reinterpretAsUInt256 "; + break; + } + case 1128: { + s += " reinterpretAsUInt32 "; + break; + } + case 1129: { + s += " reinterpretAsUInt64 "; + break; + } + case 1130: { + s += " reinterpretAsUInt8 "; + break; + } + case 1131: { + s += " reinterpretAsUUID "; + break; + } + case 1132: { + s += " RELOAD "; + break; + } + case 1133: { + s += " REMOVE "; + break; + } + case 1134: { + s += " RENAME "; + break; + } + case 1135: { + s += " RENAME COLUMN "; + break; + } + case 1136: { + s += " RENAME TABLE "; + break; + } + case 1137: { + s += " repeat "; + break; + } + case 1138: { + s += " replace "; + break; + } + case 1139: { + s += " REPLACE "; + break; + } + case 1140: { + s += " replaceAll "; + break; + } + case 1141: { + s += " replaceOne "; + break; + } + case 1142: { + s += " REPLACE PARTITION "; + break; + } + case 1143: { + s += " replaceRegexpAll "; + break; + } + case 1144: { + s += " replaceRegexpOne "; + break; + } + case 1145: { + s += " REPLICA "; + break; + } + case 1146: { + s += " replicate "; + break; + } + case 1147: { + s += " REPLICATED "; + break; + } + case 1148: { + s += " Resample "; + break; + } + case 1149: { + s += " RESUME "; + break; + } + case 1150: { + s += " retention "; + break; + } + case 1151: { + s += " reverse "; + break; + } + case 1152: { + s += " reverseUTF8 "; + break; + } + case 1153: { + s += " RIGHT "; + break; + } + case 1154: { + s += " rightPad "; + break; + } + case 1155: { + s += " rightPadUTF8 "; + break; + } + case 1156: { + s += " Ring "; + break; + } + case 1157: { + s += " ROLLUP "; + break; + } + case 1158: { + s += " round "; + break; + } + case 1159: { + s += " roundAge "; + break; + } + case 1160: { + s += " roundBankers "; + break; + } + case 1161: { + s += " roundDown "; + break; + } + case 1162: { + s += " roundDuration "; + break; + } + case 1163: { + s += " roundToExp2 "; + break; + } + case 1164: { + s += " row_number "; + break; + } + case 1165: { + s += " rowNumberInAllBlocks "; + break; + } + case 1166: { + s += " rowNumberInBlock "; + break; + } + case 1167: { + s += " rpad "; + break; + } + case 1168: { + s += " RTRIM "; + break; + } + case 1169: { + s += " runningAccumulate "; + break; + } + case 1170: { + s += " runningConcurrency "; + break; + } + case 1171: { + s += " runningDifference "; + break; + } + case 1172: { + s += " runningDifferenceStartingWithFirstValue "; + break; + } + case 1173: { + s += " S "; + break; + } + case 1174: { + s += " s2CapContains "; + break; + } + case 1175: { + s += " s2CapUnion "; + break; + } + case 1176: { + s += " s2CellsIntersect "; + break; + } + case 1177: { + s += " s2GetNeighbors "; + break; + } + case 1178: { + s += " s2RectAdd "; + break; + } + case 1179: { + s += " s2RectContains "; + break; + } + case 1180: { + s += " s2RectIntersection "; + break; + } + case 1181: { + s += " s2RectUnion "; + break; + } + case 1182: { + s += " s2ToGeo "; + break; + } + case 1183: { + s += " SAMPLE "; + break; + } + case 1184: { + s += " SAMPLE BY "; + break; + } + case 1185: { + s += " SECOND "; + break; + } + case 1186: { + s += " SELECT "; + break; + } + case 1187: { + s += " SEMI "; + break; + } + case 1188: { + s += " SENDS "; + break; + } + case 1189: { + s += " sequenceCount "; + break; + } + case 1190: { + s += " sequenceMatch "; + break; + } + case 1191: { + s += " sequenceNextNode "; + break; + } + case 1192: { + s += " serverUUID "; + break; + } + case 1193: { + s += " SET "; + break; + } + case 1194: { + s += " SETTINGS "; + break; + } + case 1195: { + s += " SHA1 "; + break; + } + case 1196: { + s += " SHA224 "; + break; + } + case 1197: { + s += " SHA256 "; + break; + } + case 1198: { + s += " SHA384 "; + break; + } + case 1199: { + s += " SHA512 "; + break; + } + case 1200: { + s += " shardCount "; + break; + } + case 1201: { + s += " shardNum "; + break; + } + case 1202: { + s += " SHOW "; + break; + } + case 1203: { + s += " SHOW PROCESSLIST "; + break; + } + case 1204: { + s += " sigmoid "; + break; + } + case 1205: { + s += " sign "; + break; + } + case 1206: { + s += " SimpleAggregateFunction "; + break; + } + case 1207: { + s += " simpleJSONExtractBool "; + break; + } + case 1208: { + s += " simpleJSONExtractFloat "; + break; + } + case 1209: { + s += " simpleJSONExtractInt "; + break; + } + case 1210: { + s += " simpleJSONExtractRaw "; + break; + } + case 1211: { + s += " simpleJSONExtractString "; + break; + } + case 1212: { + s += " simpleJSONExtractUInt "; + break; + } + case 1213: { + s += " simpleJSONHas "; + break; + } + case 1214: { + s += " simpleLinearRegression "; + break; + } + case 1215: { + s += " sin "; + break; + } + case 1216: { + s += " SINGLE "; + break; + } + case 1217: { + s += " singleValueOrNull "; + break; + } + case 1218: { + s += " sinh "; + break; + } + case 1219: { + s += " sipHash128 "; + break; + } + case 1220: { + s += " sipHash64 "; + break; + } + case 1221: { + s += " skewPop "; + break; + } + case 1222: { + s += " skewSamp "; + break; + } + case 1223: { + s += " sleep "; + break; + } + case 1224: { + s += " sleepEachRow "; + break; + } + case 1225: { + s += " SMALLINT "; + break; + } + case 1226: { + s += " SMALLINT SIGNED "; + break; + } + case 1227: { + s += " SMALLINT UNSIGNED "; + break; + } + case 1228: { + s += " snowflakeToDateTime "; + break; + } + case 1229: { + s += " snowflakeToDateTime64 "; + break; + } + case 1230: { + s += " SOURCE "; + break; + } + case 1231: { + s += " sparkbar "; + break; + } + case 1232: { + s += " splitByChar "; + break; + } + case 1233: { + s += " splitByNonAlpha "; + break; + } + case 1234: { + s += " splitByRegexp "; + break; + } + case 1235: { + s += " splitByString "; + break; + } + case 1236: { + s += " splitByWhitespace "; + break; + } + case 1237: { + s += " SQL_TSI_DAY "; + break; + } + case 1238: { + s += " SQL_TSI_HOUR "; + break; + } + case 1239: { + s += " SQL_TSI_MINUTE "; + break; + } + case 1240: { + s += " SQL_TSI_MONTH "; + break; + } + case 1241: { + s += " SQL_TSI_QUARTER "; + break; + } + case 1242: { + s += " SQL_TSI_SECOND "; + break; + } + case 1243: { + s += " SQL_TSI_WEEK "; + break; + } + case 1244: { + s += " SQL_TSI_YEAR "; + break; + } + case 1245: { + s += " sqrt "; + break; + } + case 1246: { + s += " SS "; + break; + } + case 1247: { + s += " START "; + break; + } + case 1248: { + s += " startsWith "; + break; + } + case 1249: { + s += " State "; + break; + } + case 1250: { + s += " stddevPop "; + break; + } + case 1251: { + s += " STDDEV_POP "; + break; + } + case 1252: { + s += " stddevPopStable "; + break; + } + case 1253: { + s += " stddevSamp "; + break; + } + case 1254: { + s += " STDDEV_SAMP "; + break; + } + case 1255: { + s += " stddevSampStable "; + break; + } + case 1256: { + s += " stem "; + break; + } + case 1257: { + s += " STEP "; + break; + } + case 1258: { + s += " stochasticLinearRegression "; + break; + } + case 1259: { + s += " stochasticLogisticRegression "; + break; + } + case 1260: { + s += " STOP "; + break; + } + case 1261: { + s += " String "; + break; + } + case 1262: { + s += " stringToH3 "; + break; + } + case 1263: { + s += " studentTTest "; + break; + } + case 1264: { + s += " subBitmap "; + break; + } + case 1265: { + s += " substr "; + break; + } + case 1266: { + s += " substring "; + break; + } + case 1267: { + s += " SUBSTRING "; + break; + } + case 1268: { + s += " substringUTF8 "; + break; + } + case 1269: { + s += " subtractDays "; + break; + } + case 1270: { + s += " subtractHours "; + break; + } + case 1271: { + s += " subtractMinutes "; + break; + } + case 1272: { + s += " subtractMonths "; + break; + } + case 1273: { + s += " subtractQuarters "; + break; + } + case 1274: { + s += " subtractSeconds "; + break; + } + case 1275: { + s += " subtractWeeks "; + break; + } + case 1276: { + s += " subtractYears "; + break; + } + case 1277: { + s += " sum "; + break; + } + case 1278: { + s += " sumCount "; + break; + } + case 1279: { + s += " sumKahan "; + break; + } + case 1280: { + s += " sumMap "; + break; + } + case 1281: { + s += " sumMapFiltered "; + break; + } + case 1282: { + s += " sumMapFilteredWithOverflow "; + break; + } + case 1283: { + s += " sumMapWithOverflow "; + break; + } + case 1284: { + s += " sumWithOverflow "; + break; + } + case 1285: { + s += " SUSPEND "; + break; + } + case 1286: { + s += " svg "; + break; + } + case 1287: { + s += " SVG "; + break; + } + case 1288: { + s += " SYNC "; + break; + } + case 1289: { + s += " synonyms "; + break; + } + case 1290: { + s += " SYNTAX "; + break; + } + case 1291: { + s += " SYSTEM "; + break; + } + case 1292: { + s += " TABLE "; + break; + } + case 1293: { + s += " TABLES "; + break; + } + case 1294: { + s += " tan "; + break; + } + case 1295: { + s += " tanh "; + break; + } + case 1296: { + s += " tcpPort "; + break; + } + case 1297: { + s += " TEMPORARY "; + break; + } + case 1298: { + s += " TEST "; + break; + } + case 1299: { + s += " TEXT "; + break; + } + case 1300: { + s += " tgamma "; + break; + } + case 1301: { + s += " THEN "; + break; + } + case 1302: { + s += " throwIf "; + break; + } + case 1303: { + s += " tid "; + break; + } + case 1304: { + s += " TIES "; + break; + } + case 1305: { + s += " TIMEOUT "; + break; + } + case 1306: { + s += " timeSlot "; + break; + } + case 1307: { + s += " timeSlots "; + break; + } + case 1308: { + s += " TIMESTAMP "; + break; + } + case 1309: { + s += " TIMESTAMP_ADD "; + break; + } + case 1310: { + s += " TIMESTAMPADD "; + break; + } + case 1311: { + s += " TIMESTAMP_DIFF "; + break; + } + case 1312: { + s += " TIMESTAMPDIFF "; + break; + } + case 1313: { + s += " TIMESTAMP_SUB "; + break; + } + case 1314: { + s += " TIMESTAMPSUB "; + break; + } + case 1315: { + s += " timezone "; + break; + } + case 1316: { + s += " timeZone "; + break; + } + case 1317: { + s += " timezoneOf "; + break; + } + case 1318: { + s += " timeZoneOf "; + break; + } + case 1319: { + s += " timezoneOffset "; + break; + } + case 1320: { + s += " timeZoneOffset "; + break; + } + case 1321: { + s += " TINYBLOB "; + break; + } + case 1322: { + s += " TINYINT "; + break; + } + case 1323: { + s += " TINYINT SIGNED "; + break; + } + case 1324: { + s += " TINYINT UNSIGNED "; + break; + } + case 1325: { + s += " TINYTEXT "; + break; + } + case 1326: { + s += " TO "; + break; + } + case 1327: { + s += " TO_BASE64 "; + break; + } + case 1328: { + s += " toColumnTypeName "; + break; + } + case 1329: { + s += " toDate "; + break; + } + case 1330: { + s += " toDate32 "; + break; + } + case 1331: { + s += " toDate32OrNull "; + break; + } + case 1332: { + s += " toDate32OrZero "; + break; + } + case 1333: { + s += " toDateOrNull "; + break; + } + case 1334: { + s += " toDateOrZero "; + break; + } + case 1335: { + s += " toDateTime "; + break; + } + case 1336: { + s += " toDateTime32 "; + break; + } + case 1337: { + s += " toDateTime64 "; + break; + } + case 1338: { + s += " toDateTime64OrNull "; + break; + } + case 1339: { + s += " toDateTime64OrZero "; + break; + } + case 1340: { + s += " toDateTimeOrNull "; + break; + } + case 1341: { + s += " toDateTimeOrZero "; + break; + } + case 1342: { + s += " today "; + break; + } + case 1343: { + s += " toDayOfMonth "; + break; + } + case 1344: { + s += " toDayOfWeek "; + break; + } + case 1345: { + s += " toDayOfYear "; + break; + } + case 1346: { + s += " toDecimal128 "; + break; + } + case 1347: { + s += " toDecimal128OrNull "; + break; + } + case 1348: { + s += " toDecimal128OrZero "; + break; + } + case 1349: { + s += " toDecimal256 "; + break; + } + case 1350: { + s += " toDecimal256OrNull "; + break; + } + case 1351: { + s += " toDecimal256OrZero "; + break; + } + case 1352: { + s += " toDecimal32 "; + break; + } + case 1353: { + s += " toDecimal32OrNull "; + break; + } + case 1354: { + s += " toDecimal32OrZero "; + break; + } + case 1355: { + s += " toDecimal64 "; + break; + } + case 1356: { + s += " toDecimal64OrNull "; + break; + } + case 1357: { + s += " toDecimal64OrZero "; + break; + } + case 1358: { + s += " TO DISK "; + break; + } + case 1359: { + s += " toFixedString "; + break; + } + case 1360: { + s += " toFloat32 "; + break; + } + case 1361: { + s += " toFloat32OrNull "; + break; + } + case 1362: { + s += " toFloat32OrZero "; + break; + } + case 1363: { + s += " toFloat64 "; + break; + } + case 1364: { + s += " toFloat64OrNull "; + break; + } + case 1365: { + s += " toFloat64OrZero "; + break; + } + case 1366: { + s += " toHour "; + break; + } + case 1367: { + s += " toInt128 "; + break; + } + case 1368: { + s += " toInt128OrNull "; + break; + } + case 1369: { + s += " toInt128OrZero "; + break; + } + case 1370: { + s += " toInt16 "; + break; + } + case 1371: { + s += " toInt16OrNull "; + break; + } + case 1372: { + s += " toInt16OrZero "; + break; + } + case 1373: { + s += " toInt256 "; + break; + } + case 1374: { + s += " toInt256OrNull "; + break; + } + case 1375: { + s += " toInt256OrZero "; + break; + } + case 1376: { + s += " toInt32 "; + break; + } + case 1377: { + s += " toInt32OrNull "; + break; + } + case 1378: { + s += " toInt32OrZero "; + break; + } + case 1379: { + s += " toInt64 "; + break; + } + case 1380: { + s += " toInt64OrNull "; + break; + } + case 1381: { + s += " toInt64OrZero "; + break; + } + case 1382: { + s += " toInt8 "; + break; + } + case 1383: { + s += " toInt8OrNull "; + break; + } + case 1384: { + s += " toInt8OrZero "; + break; + } + case 1385: { + s += " toIntervalDay "; + break; + } + case 1386: { + s += " toIntervalHour "; + break; + } + case 1387: { + s += " toIntervalMinute "; + break; + } + case 1388: { + s += " toIntervalMonth "; + break; + } + case 1389: { + s += " toIntervalQuarter "; + break; + } + case 1390: { + s += " toIntervalSecond "; + break; + } + case 1391: { + s += " toIntervalWeek "; + break; + } + case 1392: { + s += " toIntervalYear "; + break; + } + case 1393: { + s += " toIPv4 "; + break; + } + case 1394: { + s += " toIPv6 "; + break; + } + case 1395: { + s += " toISOWeek "; + break; + } + case 1396: { + s += " toISOYear "; + break; + } + case 1397: { + s += " toJSONString "; + break; + } + case 1398: { + s += " toLowCardinality "; + break; + } + case 1399: { + s += " toMinute "; + break; + } + case 1400: { + s += " toModifiedJulianDay "; + break; + } + case 1401: { + s += " toModifiedJulianDayOrNull "; + break; + } + case 1402: { + s += " toMonday "; + break; + } + case 1403: { + s += " toMonth "; + break; + } + case 1404: { + s += " toNullable "; + break; + } + case 1405: { + s += " TOP "; + break; + } + case 1406: { + s += " topK "; + break; + } + case 1407: { + s += " topKWeighted "; + break; + } + case 1408: { + s += " topLevelDomain "; + break; + } + case 1409: { + s += " toQuarter "; + break; + } + case 1410: { + s += " toRelativeDayNum "; + break; + } + case 1411: { + s += " toRelativeHourNum "; + break; + } + case 1412: { + s += " toRelativeMinuteNum "; + break; + } + case 1413: { + s += " toRelativeMonthNum "; + break; + } + case 1414: { + s += " toRelativeQuarterNum "; + break; + } + case 1415: { + s += " toRelativeSecondNum "; + break; + } + case 1416: { + s += " toRelativeWeekNum "; + break; + } + case 1417: { + s += " toRelativeYearNum "; + break; + } + case 1418: { + s += " toSecond "; + break; + } + case 1419: { + s += " toStartOfDay "; + break; + } + case 1420: { + s += " toStartOfFifteenMinutes "; + break; + } + case 1421: { + s += " toStartOfFiveMinutes "; + break; + } + case 1422: { + s += " toStartOfHour "; + break; + } + case 1423: { + s += " toStartOfInterval "; + break; + } + case 1424: { + s += " toStartOfISOYear "; + break; + } + case 1425: { + s += " toStartOfMinute "; + break; + } + case 1426: { + s += " toStartOfMonth "; + break; + } + case 1427: { + s += " toStartOfQuarter "; + break; + } + case 1428: { + s += " toStartOfSecond "; + break; + } + case 1429: { + s += " toStartOfTenMinutes "; + break; + } + case 1430: { + s += " toStartOfWeek "; + break; + } + case 1431: { + s += " toStartOfYear "; + break; + } + case 1432: { + s += " toString "; + break; + } + case 1433: { + s += " toStringCutToZero "; + break; + } + case 1434: { + s += " TO TABLE "; + break; + } + case 1435: { + s += " TOTALS "; + break; + } + case 1436: { + s += " toTime "; + break; + } + case 1437: { + s += " toTimezone "; + break; + } + case 1438: { + s += " toTimeZone "; + break; + } + case 1439: { + s += " toTypeName "; + break; + } + case 1440: { + s += " toUInt128 "; + break; + } + case 1441: { + s += " toUInt128OrNull "; + break; + } + case 1442: { + s += " toUInt128OrZero "; + break; + } + case 1443: { + s += " toUInt16 "; + break; + } + case 1444: { + s += " toUInt16OrNull "; + break; + } + case 1445: { + s += " toUInt16OrZero "; + break; + } + case 1446: { + s += " toUInt256 "; + break; + } + case 1447: { + s += " toUInt256OrNull "; + break; + } + case 1448: { + s += " toUInt256OrZero "; + break; + } + case 1449: { + s += " toUInt32 "; + break; + } + case 1450: { + s += " toUInt32OrNull "; + break; + } + case 1451: { + s += " toUInt32OrZero "; + break; + } + case 1452: { + s += " toUInt64 "; + break; + } + case 1453: { + s += " toUInt64OrNull "; + break; + } + case 1454: { + s += " toUInt64OrZero "; + break; + } + case 1455: { + s += " toUInt8 "; + break; + } + case 1456: { + s += " toUInt8OrNull "; + break; + } + case 1457: { + s += " toUInt8OrZero "; + break; + } + case 1458: { + s += " toUnixTimestamp "; + break; + } + case 1459: { + s += " toUnixTimestamp64Micro "; + break; + } + case 1460: { + s += " toUnixTimestamp64Milli "; + break; + } + case 1461: { + s += " toUnixTimestamp64Nano "; + break; + } + case 1462: { + s += " toUUID "; + break; + } + case 1463: { + s += " toUUIDOrNull "; + break; + } + case 1464: { + s += " toUUIDOrZero "; + break; + } + case 1465: { + s += " toValidUTF8 "; + break; + } + case 1466: { + s += " TO VOLUME "; + break; + } + case 1467: { + s += " toWeek "; + break; + } + case 1468: { + s += " toYear "; + break; + } + case 1469: { + s += " toYearWeek "; + break; + } + case 1470: { + s += " toYYYYMM "; + break; + } + case 1471: { + s += " toYYYYMMDD "; + break; + } + case 1472: { + s += " toYYYYMMDDhhmmss "; + break; + } + case 1473: { + s += " TRAILING "; + break; + } + case 1474: { + s += " transform "; + break; + } + case 1475: { + s += " TRIM "; + break; + } + case 1476: { + s += " trimBoth "; + break; + } + case 1477: { + s += " trimLeft "; + break; + } + case 1478: { + s += " trimRight "; + break; + } + case 1479: { + s += " trunc "; + break; + } + case 1480: { + s += " truncate "; + break; + } + case 1481: { + s += " TRUNCATE "; + break; + } + case 1482: { + s += " tryBase64Decode "; + break; + } + case 1483: { + s += " TTL "; + break; + } + case 1484: { + s += " tuple "; + break; + } + case 1485: { + s += " Tuple "; + break; + } + case 1486: { + s += " tupleElement "; + break; + } + case 1487: { + s += " tupleHammingDistance "; + break; + } + case 1488: { + s += " tupleToNameValuePairs "; + break; + } + case 1489: { + s += " TYPE "; + break; + } + case 1490: { + s += " ucase "; + break; + } + case 1491: { + s += " UInt128 "; + break; + } + case 1492: { + s += " UInt16 "; + break; + } + case 1493: { + s += " UInt256 "; + break; + } + case 1494: { + s += " UInt32 "; + break; + } + case 1495: { + s += " UInt64 "; + break; + } + case 1496: { + s += " UInt8 "; + break; + } + case 1497: { + s += " unbin "; + break; + } + case 1498: { + s += " unhex "; + break; + } + case 1499: { + s += " UNION "; + break; + } + case 1500: { + s += " uniq "; + break; + } + case 1501: { + s += " uniqCombined "; + break; + } + case 1502: { + s += " uniqCombined64 "; + break; + } + case 1503: { + s += " uniqExact "; + break; + } + case 1504: { + s += " uniqHLL12 "; + break; + } + case 1505: { + s += " uniqTheta "; + break; + } + case 1506: { + s += " uniqUpTo "; + break; + } + case 1507: { + s += " UPDATE "; + break; + } + case 1508: { + s += " upper "; + break; + } + case 1509: { + s += " upperUTF8 "; + break; + } + case 1510: { + s += " uptime "; + break; + } + case 1511: { + s += " URLHash "; + break; + } + case 1512: { + s += " URLHierarchy "; + break; + } + case 1513: { + s += " URLPathHierarchy "; + break; + } + case 1514: { + s += " USE "; + break; + } + case 1515: { + s += " user "; + break; + } + case 1516: { + s += " USING "; + break; + } + case 1517: { + s += " UUID "; + break; + } + case 1518: { + s += " UUIDNumToString "; + break; + } + case 1519: { + s += " UUIDStringToNum "; + break; + } + case 1520: { + s += " validateNestedArraySizes "; + break; + } + case 1521: { + s += " VALUES "; + break; + } + case 1522: { + s += " VARCHAR "; + break; + } + case 1523: { + s += " VARCHAR2 "; + break; + } + case 1524: { + s += " varPop "; + break; + } + case 1525: { + s += " VAR_POP "; + break; + } + case 1526: { + s += " varPopStable "; + break; + } + case 1527: { + s += " varSamp "; + break; + } + case 1528: { + s += " VAR_SAMP "; + break; + } + case 1529: { + s += " varSampStable "; + break; + } + case 1530: { + s += " version "; + break; + } + case 1531: { + s += " VIEW "; + break; + } + case 1532: { + s += " visibleWidth "; + break; + } + case 1533: { + s += " visitParamExtractBool "; + break; + } + case 1534: { + s += " visitParamExtractFloat "; + break; + } + case 1535: { + s += " visitParamExtractInt "; + break; + } + case 1536: { + s += " visitParamExtractRaw "; + break; + } + case 1537: { + s += " visitParamExtractString "; + break; + } + case 1538: { + s += " visitParamExtractUInt "; + break; + } + case 1539: { + s += " visitParamHas "; + break; + } + case 1540: { + s += " VOLUME "; + break; + } + case 1541: { + s += " WATCH "; + break; + } + case 1542: { + s += " week "; + break; + } + case 1543: { + s += " WEEK "; + break; + } + case 1544: { + s += " welchTTest "; + break; + } + case 1545: { + s += " WHEN "; + break; + } + case 1546: { + s += " WHERE "; + break; + } + case 1547: { + s += " windowFunnel "; + break; + } + case 1548: { + s += " WITH "; + break; + } + case 1549: { + s += " WITH FILL "; + break; + } + case 1550: { + s += " WITH TIES "; + break; + } + case 1551: { + s += " WK "; + break; + } + case 1552: { + s += " wkt "; + break; + } + case 1553: { + s += " wordShingleMinHash "; + break; + } + case 1554: { + s += " wordShingleMinHashArg "; + break; + } + case 1555: { + s += " wordShingleMinHashArgCaseInsensitive "; + break; + } + case 1556: { + s += " wordShingleMinHashArgCaseInsensitiveUTF8 "; + break; + } + case 1557: { + s += " wordShingleMinHashArgUTF8 "; + break; + } + case 1558: { + s += " wordShingleMinHashCaseInsensitive "; + break; + } + case 1559: { + s += " wordShingleMinHashCaseInsensitiveUTF8 "; + break; + } + case 1560: { + s += " wordShingleMinHashUTF8 "; + break; + } + case 1561: { + s += " wordShingleSimHash "; + break; + } + case 1562: { + s += " wordShingleSimHashCaseInsensitive "; + break; + } + case 1563: { + s += " wordShingleSimHashCaseInsensitiveUTF8 "; + break; + } + case 1564: { + s += " wordShingleSimHashUTF8 "; + break; + } + case 1565: { + s += " WW "; + break; + } + case 1566: { + s += " xor "; + break; + } + case 1567: { + s += " xxHash32 "; + break; + } + case 1568: { + s += " xxHash64 "; + break; + } + case 1569: { + s += " kostikConsistentHash "; + break; + } + case 1570: { + s += " YEAR "; + break; + } + case 1571: { + s += " yearweek "; + break; + } + case 1572: { + s += " yesterday "; + break; + } + case 1573: { + s += " YY "; + break; + } + case 1574: { + s += " YYYY "; + break; + } + case 1575: { + s += " zookeeperSessionUptime "; + break; + } + default: break; + } +} diff --git a/src/Parsers/fuzzers/codegen_fuzzer/out.proto b/src/Parsers/fuzzers/codegen_fuzzer/out.proto new file mode 100644 index 00000000000..60992ca6a81 --- /dev/null +++ b/src/Parsers/fuzzers/codegen_fuzzer/out.proto @@ -0,0 +1,1587 @@ +syntax = "proto3"; + +message Word { + enum Value { + value_0 = 0; + value_1 = 1; + value_2 = 2; + value_3 = 3; + value_4 = 4; + value_5 = 5; + value_6 = 6; + value_7 = 7; + value_8 = 8; + value_9 = 9; + value_10 = 10; + value_11 = 11; + value_12 = 12; + value_13 = 13; + value_14 = 14; + value_15 = 15; + value_16 = 16; + value_17 = 17; + value_18 = 18; + value_19 = 19; + value_20 = 20; + value_21 = 21; + value_22 = 22; + value_23 = 23; + value_24 = 24; + value_25 = 25; + value_26 = 26; + value_27 = 27; + value_28 = 28; + value_29 = 29; + value_30 = 30; + value_31 = 31; + value_32 = 32; + value_33 = 33; + value_34 = 34; + value_35 = 35; + value_36 = 36; + value_37 = 37; + value_38 = 38; + value_39 = 39; + value_40 = 40; + value_41 = 41; + value_42 = 42; + value_43 = 43; + value_44 = 44; + value_45 = 45; + value_46 = 46; + value_47 = 47; + value_48 = 48; + value_49 = 49; + value_50 = 50; + value_51 = 51; + value_52 = 52; + value_53 = 53; + value_54 = 54; + value_55 = 55; + value_56 = 56; + value_57 = 57; + value_58 = 58; + value_59 = 59; + value_60 = 60; + value_61 = 61; + value_62 = 62; + value_63 = 63; + value_64 = 64; + value_65 = 65; + value_66 = 66; + value_67 = 67; + value_68 = 68; + value_69 = 69; + value_70 = 70; + value_71 = 71; + value_72 = 72; + value_73 = 73; + value_74 = 74; + value_75 = 75; + value_76 = 76; + value_77 = 77; + value_78 = 78; + value_79 = 79; + value_80 = 80; + value_81 = 81; + value_82 = 82; + value_83 = 83; + value_84 = 84; + value_85 = 85; + value_86 = 86; + value_87 = 87; + value_88 = 88; + value_89 = 89; + value_90 = 90; + value_91 = 91; + value_92 = 92; + value_93 = 93; + value_94 = 94; + value_95 = 95; + value_96 = 96; + value_97 = 97; + value_98 = 98; + value_99 = 99; + value_100 = 100; + value_101 = 101; + value_102 = 102; + value_103 = 103; + value_104 = 104; + value_105 = 105; + value_106 = 106; + value_107 = 107; + value_108 = 108; + value_109 = 109; + value_110 = 110; + value_111 = 111; + value_112 = 112; + value_113 = 113; + value_114 = 114; + value_115 = 115; + value_116 = 116; + value_117 = 117; + value_118 = 118; + value_119 = 119; + value_120 = 120; + value_121 = 121; + value_122 = 122; + value_123 = 123; + value_124 = 124; + value_125 = 125; + value_126 = 126; + value_127 = 127; + value_128 = 128; + value_129 = 129; + value_130 = 130; + value_131 = 131; + value_132 = 132; + value_133 = 133; + value_134 = 134; + value_135 = 135; + value_136 = 136; + value_137 = 137; + value_138 = 138; + value_139 = 139; + value_140 = 140; + value_141 = 141; + value_142 = 142; + value_143 = 143; + value_144 = 144; + value_145 = 145; + value_146 = 146; + value_147 = 147; + value_148 = 148; + value_149 = 149; + value_150 = 150; + value_151 = 151; + value_152 = 152; + value_153 = 153; + value_154 = 154; + value_155 = 155; + value_156 = 156; + value_157 = 157; + value_158 = 158; + value_159 = 159; + value_160 = 160; + value_161 = 161; + value_162 = 162; + value_163 = 163; + value_164 = 164; + value_165 = 165; + value_166 = 166; + value_167 = 167; + value_168 = 168; + value_169 = 169; + value_170 = 170; + value_171 = 171; + value_172 = 172; + value_173 = 173; + value_174 = 174; + value_175 = 175; + value_176 = 176; + value_177 = 177; + value_178 = 178; + value_179 = 179; + value_180 = 180; + value_181 = 181; + value_182 = 182; + value_183 = 183; + value_184 = 184; + value_185 = 185; + value_186 = 186; + value_187 = 187; + value_188 = 188; + value_189 = 189; + value_190 = 190; + value_191 = 191; + value_192 = 192; + value_193 = 193; + value_194 = 194; + value_195 = 195; + value_196 = 196; + value_197 = 197; + value_198 = 198; + value_199 = 199; + value_200 = 200; + value_201 = 201; + value_202 = 202; + value_203 = 203; + value_204 = 204; + value_205 = 205; + value_206 = 206; + value_207 = 207; + value_208 = 208; + value_209 = 209; + value_210 = 210; + value_211 = 211; + value_212 = 212; + value_213 = 213; + value_214 = 214; + value_215 = 215; + value_216 = 216; + value_217 = 217; + value_218 = 218; + value_219 = 219; + value_220 = 220; + value_221 = 221; + value_222 = 222; + value_223 = 223; + value_224 = 224; + value_225 = 225; + value_226 = 226; + value_227 = 227; + value_228 = 228; + value_229 = 229; + value_230 = 230; + value_231 = 231; + value_232 = 232; + value_233 = 233; + value_234 = 234; + value_235 = 235; + value_236 = 236; + value_237 = 237; + value_238 = 238; + value_239 = 239; + value_240 = 240; + value_241 = 241; + value_242 = 242; + value_243 = 243; + value_244 = 244; + value_245 = 245; + value_246 = 246; + value_247 = 247; + value_248 = 248; + value_249 = 249; + value_250 = 250; + value_251 = 251; + value_252 = 252; + value_253 = 253; + value_254 = 254; + value_255 = 255; + value_256 = 256; + value_257 = 257; + value_258 = 258; + value_259 = 259; + value_260 = 260; + value_261 = 261; + value_262 = 262; + value_263 = 263; + value_264 = 264; + value_265 = 265; + value_266 = 266; + value_267 = 267; + value_268 = 268; + value_269 = 269; + value_270 = 270; + value_271 = 271; + value_272 = 272; + value_273 = 273; + value_274 = 274; + value_275 = 275; + value_276 = 276; + value_277 = 277; + value_278 = 278; + value_279 = 279; + value_280 = 280; + value_281 = 281; + value_282 = 282; + value_283 = 283; + value_284 = 284; + value_285 = 285; + value_286 = 286; + value_287 = 287; + value_288 = 288; + value_289 = 289; + value_290 = 290; + value_291 = 291; + value_292 = 292; + value_293 = 293; + value_294 = 294; + value_295 = 295; + value_296 = 296; + value_297 = 297; + value_298 = 298; + value_299 = 299; + value_300 = 300; + value_301 = 301; + value_302 = 302; + value_303 = 303; + value_304 = 304; + value_305 = 305; + value_306 = 306; + value_307 = 307; + value_308 = 308; + value_309 = 309; + value_310 = 310; + value_311 = 311; + value_312 = 312; + value_313 = 313; + value_314 = 314; + value_315 = 315; + value_316 = 316; + value_317 = 317; + value_318 = 318; + value_319 = 319; + value_320 = 320; + value_321 = 321; + value_322 = 322; + value_323 = 323; + value_324 = 324; + value_325 = 325; + value_326 = 326; + value_327 = 327; + value_328 = 328; + value_329 = 329; + value_330 = 330; + value_331 = 331; + value_332 = 332; + value_333 = 333; + value_334 = 334; + value_335 = 335; + value_336 = 336; + value_337 = 337; + value_338 = 338; + value_339 = 339; + value_340 = 340; + value_341 = 341; + value_342 = 342; + value_343 = 343; + value_344 = 344; + value_345 = 345; + value_346 = 346; + value_347 = 347; + value_348 = 348; + value_349 = 349; + value_350 = 350; + value_351 = 351; + value_352 = 352; + value_353 = 353; + value_354 = 354; + value_355 = 355; + value_356 = 356; + value_357 = 357; + value_358 = 358; + value_359 = 359; + value_360 = 360; + value_361 = 361; + value_362 = 362; + value_363 = 363; + value_364 = 364; + value_365 = 365; + value_366 = 366; + value_367 = 367; + value_368 = 368; + value_369 = 369; + value_370 = 370; + value_371 = 371; + value_372 = 372; + value_373 = 373; + value_374 = 374; + value_375 = 375; + value_376 = 376; + value_377 = 377; + value_378 = 378; + value_379 = 379; + value_380 = 380; + value_381 = 381; + value_382 = 382; + value_383 = 383; + value_384 = 384; + value_385 = 385; + value_386 = 386; + value_387 = 387; + value_388 = 388; + value_389 = 389; + value_390 = 390; + value_391 = 391; + value_392 = 392; + value_393 = 393; + value_394 = 394; + value_395 = 395; + value_396 = 396; + value_397 = 397; + value_398 = 398; + value_399 = 399; + value_400 = 400; + value_401 = 401; + value_402 = 402; + value_403 = 403; + value_404 = 404; + value_405 = 405; + value_406 = 406; + value_407 = 407; + value_408 = 408; + value_409 = 409; + value_410 = 410; + value_411 = 411; + value_412 = 412; + value_413 = 413; + value_414 = 414; + value_415 = 415; + value_416 = 416; + value_417 = 417; + value_418 = 418; + value_419 = 419; + value_420 = 420; + value_421 = 421; + value_422 = 422; + value_423 = 423; + value_424 = 424; + value_425 = 425; + value_426 = 426; + value_427 = 427; + value_428 = 428; + value_429 = 429; + value_430 = 430; + value_431 = 431; + value_432 = 432; + value_433 = 433; + value_434 = 434; + value_435 = 435; + value_436 = 436; + value_437 = 437; + value_438 = 438; + value_439 = 439; + value_440 = 440; + value_441 = 441; + value_442 = 442; + value_443 = 443; + value_444 = 444; + value_445 = 445; + value_446 = 446; + value_447 = 447; + value_448 = 448; + value_449 = 449; + value_450 = 450; + value_451 = 451; + value_452 = 452; + value_453 = 453; + value_454 = 454; + value_455 = 455; + value_456 = 456; + value_457 = 457; + value_458 = 458; + value_459 = 459; + value_460 = 460; + value_461 = 461; + value_462 = 462; + value_463 = 463; + value_464 = 464; + value_465 = 465; + value_466 = 466; + value_467 = 467; + value_468 = 468; + value_469 = 469; + value_470 = 470; + value_471 = 471; + value_472 = 472; + value_473 = 473; + value_474 = 474; + value_475 = 475; + value_476 = 476; + value_477 = 477; + value_478 = 478; + value_479 = 479; + value_480 = 480; + value_481 = 481; + value_482 = 482; + value_483 = 483; + value_484 = 484; + value_485 = 485; + value_486 = 486; + value_487 = 487; + value_488 = 488; + value_489 = 489; + value_490 = 490; + value_491 = 491; + value_492 = 492; + value_493 = 493; + value_494 = 494; + value_495 = 495; + value_496 = 496; + value_497 = 497; + value_498 = 498; + value_499 = 499; + value_500 = 500; + value_501 = 501; + value_502 = 502; + value_503 = 503; + value_504 = 504; + value_505 = 505; + value_506 = 506; + value_507 = 507; + value_508 = 508; + value_509 = 509; + value_510 = 510; + value_511 = 511; + value_512 = 512; + value_513 = 513; + value_514 = 514; + value_515 = 515; + value_516 = 516; + value_517 = 517; + value_518 = 518; + value_519 = 519; + value_520 = 520; + value_521 = 521; + value_522 = 522; + value_523 = 523; + value_524 = 524; + value_525 = 525; + value_526 = 526; + value_527 = 527; + value_528 = 528; + value_529 = 529; + value_530 = 530; + value_531 = 531; + value_532 = 532; + value_533 = 533; + value_534 = 534; + value_535 = 535; + value_536 = 536; + value_537 = 537; + value_538 = 538; + value_539 = 539; + value_540 = 540; + value_541 = 541; + value_542 = 542; + value_543 = 543; + value_544 = 544; + value_545 = 545; + value_546 = 546; + value_547 = 547; + value_548 = 548; + value_549 = 549; + value_550 = 550; + value_551 = 551; + value_552 = 552; + value_553 = 553; + value_554 = 554; + value_555 = 555; + value_556 = 556; + value_557 = 557; + value_558 = 558; + value_559 = 559; + value_560 = 560; + value_561 = 561; + value_562 = 562; + value_563 = 563; + value_564 = 564; + value_565 = 565; + value_566 = 566; + value_567 = 567; + value_568 = 568; + value_569 = 569; + value_570 = 570; + value_571 = 571; + value_572 = 572; + value_573 = 573; + value_574 = 574; + value_575 = 575; + value_576 = 576; + value_577 = 577; + value_578 = 578; + value_579 = 579; + value_580 = 580; + value_581 = 581; + value_582 = 582; + value_583 = 583; + value_584 = 584; + value_585 = 585; + value_586 = 586; + value_587 = 587; + value_588 = 588; + value_589 = 589; + value_590 = 590; + value_591 = 591; + value_592 = 592; + value_593 = 593; + value_594 = 594; + value_595 = 595; + value_596 = 596; + value_597 = 597; + value_598 = 598; + value_599 = 599; + value_600 = 600; + value_601 = 601; + value_602 = 602; + value_603 = 603; + value_604 = 604; + value_605 = 605; + value_606 = 606; + value_607 = 607; + value_608 = 608; + value_609 = 609; + value_610 = 610; + value_611 = 611; + value_612 = 612; + value_613 = 613; + value_614 = 614; + value_615 = 615; + value_616 = 616; + value_617 = 617; + value_618 = 618; + value_619 = 619; + value_620 = 620; + value_621 = 621; + value_622 = 622; + value_623 = 623; + value_624 = 624; + value_625 = 625; + value_626 = 626; + value_627 = 627; + value_628 = 628; + value_629 = 629; + value_630 = 630; + value_631 = 631; + value_632 = 632; + value_633 = 633; + value_634 = 634; + value_635 = 635; + value_636 = 636; + value_637 = 637; + value_638 = 638; + value_639 = 639; + value_640 = 640; + value_641 = 641; + value_642 = 642; + value_643 = 643; + value_644 = 644; + value_645 = 645; + value_646 = 646; + value_647 = 647; + value_648 = 648; + value_649 = 649; + value_650 = 650; + value_651 = 651; + value_652 = 652; + value_653 = 653; + value_654 = 654; + value_655 = 655; + value_656 = 656; + value_657 = 657; + value_658 = 658; + value_659 = 659; + value_660 = 660; + value_661 = 661; + value_662 = 662; + value_663 = 663; + value_664 = 664; + value_665 = 665; + value_666 = 666; + value_667 = 667; + value_668 = 668; + value_669 = 669; + value_670 = 670; + value_671 = 671; + value_672 = 672; + value_673 = 673; + value_674 = 674; + value_675 = 675; + value_676 = 676; + value_677 = 677; + value_678 = 678; + value_679 = 679; + value_680 = 680; + value_681 = 681; + value_682 = 682; + value_683 = 683; + value_684 = 684; + value_685 = 685; + value_686 = 686; + value_687 = 687; + value_688 = 688; + value_689 = 689; + value_690 = 690; + value_691 = 691; + value_692 = 692; + value_693 = 693; + value_694 = 694; + value_695 = 695; + value_696 = 696; + value_697 = 697; + value_698 = 698; + value_699 = 699; + value_700 = 700; + value_701 = 701; + value_702 = 702; + value_703 = 703; + value_704 = 704; + value_705 = 705; + value_706 = 706; + value_707 = 707; + value_708 = 708; + value_709 = 709; + value_710 = 710; + value_711 = 711; + value_712 = 712; + value_713 = 713; + value_714 = 714; + value_715 = 715; + value_716 = 716; + value_717 = 717; + value_718 = 718; + value_719 = 719; + value_720 = 720; + value_721 = 721; + value_722 = 722; + value_723 = 723; + value_724 = 724; + value_725 = 725; + value_726 = 726; + value_727 = 727; + value_728 = 728; + value_729 = 729; + value_730 = 730; + value_731 = 731; + value_732 = 732; + value_733 = 733; + value_734 = 734; + value_735 = 735; + value_736 = 736; + value_737 = 737; + value_738 = 738; + value_739 = 739; + value_740 = 740; + value_741 = 741; + value_742 = 742; + value_743 = 743; + value_744 = 744; + value_745 = 745; + value_746 = 746; + value_747 = 747; + value_748 = 748; + value_749 = 749; + value_750 = 750; + value_751 = 751; + value_752 = 752; + value_753 = 753; + value_754 = 754; + value_755 = 755; + value_756 = 756; + value_757 = 757; + value_758 = 758; + value_759 = 759; + value_760 = 760; + value_761 = 761; + value_762 = 762; + value_763 = 763; + value_764 = 764; + value_765 = 765; + value_766 = 766; + value_767 = 767; + value_768 = 768; + value_769 = 769; + value_770 = 770; + value_771 = 771; + value_772 = 772; + value_773 = 773; + value_774 = 774; + value_775 = 775; + value_776 = 776; + value_777 = 777; + value_778 = 778; + value_779 = 779; + value_780 = 780; + value_781 = 781; + value_782 = 782; + value_783 = 783; + value_784 = 784; + value_785 = 785; + value_786 = 786; + value_787 = 787; + value_788 = 788; + value_789 = 789; + value_790 = 790; + value_791 = 791; + value_792 = 792; + value_793 = 793; + value_794 = 794; + value_795 = 795; + value_796 = 796; + value_797 = 797; + value_798 = 798; + value_799 = 799; + value_800 = 800; + value_801 = 801; + value_802 = 802; + value_803 = 803; + value_804 = 804; + value_805 = 805; + value_806 = 806; + value_807 = 807; + value_808 = 808; + value_809 = 809; + value_810 = 810; + value_811 = 811; + value_812 = 812; + value_813 = 813; + value_814 = 814; + value_815 = 815; + value_816 = 816; + value_817 = 817; + value_818 = 818; + value_819 = 819; + value_820 = 820; + value_821 = 821; + value_822 = 822; + value_823 = 823; + value_824 = 824; + value_825 = 825; + value_826 = 826; + value_827 = 827; + value_828 = 828; + value_829 = 829; + value_830 = 830; + value_831 = 831; + value_832 = 832; + value_833 = 833; + value_834 = 834; + value_835 = 835; + value_836 = 836; + value_837 = 837; + value_838 = 838; + value_839 = 839; + value_840 = 840; + value_841 = 841; + value_842 = 842; + value_843 = 843; + value_844 = 844; + value_845 = 845; + value_846 = 846; + value_847 = 847; + value_848 = 848; + value_849 = 849; + value_850 = 850; + value_851 = 851; + value_852 = 852; + value_853 = 853; + value_854 = 854; + value_855 = 855; + value_856 = 856; + value_857 = 857; + value_858 = 858; + value_859 = 859; + value_860 = 860; + value_861 = 861; + value_862 = 862; + value_863 = 863; + value_864 = 864; + value_865 = 865; + value_866 = 866; + value_867 = 867; + value_868 = 868; + value_869 = 869; + value_870 = 870; + value_871 = 871; + value_872 = 872; + value_873 = 873; + value_874 = 874; + value_875 = 875; + value_876 = 876; + value_877 = 877; + value_878 = 878; + value_879 = 879; + value_880 = 880; + value_881 = 881; + value_882 = 882; + value_883 = 883; + value_884 = 884; + value_885 = 885; + value_886 = 886; + value_887 = 887; + value_888 = 888; + value_889 = 889; + value_890 = 890; + value_891 = 891; + value_892 = 892; + value_893 = 893; + value_894 = 894; + value_895 = 895; + value_896 = 896; + value_897 = 897; + value_898 = 898; + value_899 = 899; + value_900 = 900; + value_901 = 901; + value_902 = 902; + value_903 = 903; + value_904 = 904; + value_905 = 905; + value_906 = 906; + value_907 = 907; + value_908 = 908; + value_909 = 909; + value_910 = 910; + value_911 = 911; + value_912 = 912; + value_913 = 913; + value_914 = 914; + value_915 = 915; + value_916 = 916; + value_917 = 917; + value_918 = 918; + value_919 = 919; + value_920 = 920; + value_921 = 921; + value_922 = 922; + value_923 = 923; + value_924 = 924; + value_925 = 925; + value_926 = 926; + value_927 = 927; + value_928 = 928; + value_929 = 929; + value_930 = 930; + value_931 = 931; + value_932 = 932; + value_933 = 933; + value_934 = 934; + value_935 = 935; + value_936 = 936; + value_937 = 937; + value_938 = 938; + value_939 = 939; + value_940 = 940; + value_941 = 941; + value_942 = 942; + value_943 = 943; + value_944 = 944; + value_945 = 945; + value_946 = 946; + value_947 = 947; + value_948 = 948; + value_949 = 949; + value_950 = 950; + value_951 = 951; + value_952 = 952; + value_953 = 953; + value_954 = 954; + value_955 = 955; + value_956 = 956; + value_957 = 957; + value_958 = 958; + value_959 = 959; + value_960 = 960; + value_961 = 961; + value_962 = 962; + value_963 = 963; + value_964 = 964; + value_965 = 965; + value_966 = 966; + value_967 = 967; + value_968 = 968; + value_969 = 969; + value_970 = 970; + value_971 = 971; + value_972 = 972; + value_973 = 973; + value_974 = 974; + value_975 = 975; + value_976 = 976; + value_977 = 977; + value_978 = 978; + value_979 = 979; + value_980 = 980; + value_981 = 981; + value_982 = 982; + value_983 = 983; + value_984 = 984; + value_985 = 985; + value_986 = 986; + value_987 = 987; + value_988 = 988; + value_989 = 989; + value_990 = 990; + value_991 = 991; + value_992 = 992; + value_993 = 993; + value_994 = 994; + value_995 = 995; + value_996 = 996; + value_997 = 997; + value_998 = 998; + value_999 = 999; + value_1000 = 1000; + value_1001 = 1001; + value_1002 = 1002; + value_1003 = 1003; + value_1004 = 1004; + value_1005 = 1005; + value_1006 = 1006; + value_1007 = 1007; + value_1008 = 1008; + value_1009 = 1009; + value_1010 = 1010; + value_1011 = 1011; + value_1012 = 1012; + value_1013 = 1013; + value_1014 = 1014; + value_1015 = 1015; + value_1016 = 1016; + value_1017 = 1017; + value_1018 = 1018; + value_1019 = 1019; + value_1020 = 1020; + value_1021 = 1021; + value_1022 = 1022; + value_1023 = 1023; + value_1024 = 1024; + value_1025 = 1025; + value_1026 = 1026; + value_1027 = 1027; + value_1028 = 1028; + value_1029 = 1029; + value_1030 = 1030; + value_1031 = 1031; + value_1032 = 1032; + value_1033 = 1033; + value_1034 = 1034; + value_1035 = 1035; + value_1036 = 1036; + value_1037 = 1037; + value_1038 = 1038; + value_1039 = 1039; + value_1040 = 1040; + value_1041 = 1041; + value_1042 = 1042; + value_1043 = 1043; + value_1044 = 1044; + value_1045 = 1045; + value_1046 = 1046; + value_1047 = 1047; + value_1048 = 1048; + value_1049 = 1049; + value_1050 = 1050; + value_1051 = 1051; + value_1052 = 1052; + value_1053 = 1053; + value_1054 = 1054; + value_1055 = 1055; + value_1056 = 1056; + value_1057 = 1057; + value_1058 = 1058; + value_1059 = 1059; + value_1060 = 1060; + value_1061 = 1061; + value_1062 = 1062; + value_1063 = 1063; + value_1064 = 1064; + value_1065 = 1065; + value_1066 = 1066; + value_1067 = 1067; + value_1068 = 1068; + value_1069 = 1069; + value_1070 = 1070; + value_1071 = 1071; + value_1072 = 1072; + value_1073 = 1073; + value_1074 = 1074; + value_1075 = 1075; + value_1076 = 1076; + value_1077 = 1077; + value_1078 = 1078; + value_1079 = 1079; + value_1080 = 1080; + value_1081 = 1081; + value_1082 = 1082; + value_1083 = 1083; + value_1084 = 1084; + value_1085 = 1085; + value_1086 = 1086; + value_1087 = 1087; + value_1088 = 1088; + value_1089 = 1089; + value_1090 = 1090; + value_1091 = 1091; + value_1092 = 1092; + value_1093 = 1093; + value_1094 = 1094; + value_1095 = 1095; + value_1096 = 1096; + value_1097 = 1097; + value_1098 = 1098; + value_1099 = 1099; + value_1100 = 1100; + value_1101 = 1101; + value_1102 = 1102; + value_1103 = 1103; + value_1104 = 1104; + value_1105 = 1105; + value_1106 = 1106; + value_1107 = 1107; + value_1108 = 1108; + value_1109 = 1109; + value_1110 = 1110; + value_1111 = 1111; + value_1112 = 1112; + value_1113 = 1113; + value_1114 = 1114; + value_1115 = 1115; + value_1116 = 1116; + value_1117 = 1117; + value_1118 = 1118; + value_1119 = 1119; + value_1120 = 1120; + value_1121 = 1121; + value_1122 = 1122; + value_1123 = 1123; + value_1124 = 1124; + value_1125 = 1125; + value_1126 = 1126; + value_1127 = 1127; + value_1128 = 1128; + value_1129 = 1129; + value_1130 = 1130; + value_1131 = 1131; + value_1132 = 1132; + value_1133 = 1133; + value_1134 = 1134; + value_1135 = 1135; + value_1136 = 1136; + value_1137 = 1137; + value_1138 = 1138; + value_1139 = 1139; + value_1140 = 1140; + value_1141 = 1141; + value_1142 = 1142; + value_1143 = 1143; + value_1144 = 1144; + value_1145 = 1145; + value_1146 = 1146; + value_1147 = 1147; + value_1148 = 1148; + value_1149 = 1149; + value_1150 = 1150; + value_1151 = 1151; + value_1152 = 1152; + value_1153 = 1153; + value_1154 = 1154; + value_1155 = 1155; + value_1156 = 1156; + value_1157 = 1157; + value_1158 = 1158; + value_1159 = 1159; + value_1160 = 1160; + value_1161 = 1161; + value_1162 = 1162; + value_1163 = 1163; + value_1164 = 1164; + value_1165 = 1165; + value_1166 = 1166; + value_1167 = 1167; + value_1168 = 1168; + value_1169 = 1169; + value_1170 = 1170; + value_1171 = 1171; + value_1172 = 1172; + value_1173 = 1173; + value_1174 = 1174; + value_1175 = 1175; + value_1176 = 1176; + value_1177 = 1177; + value_1178 = 1178; + value_1179 = 1179; + value_1180 = 1180; + value_1181 = 1181; + value_1182 = 1182; + value_1183 = 1183; + value_1184 = 1184; + value_1185 = 1185; + value_1186 = 1186; + value_1187 = 1187; + value_1188 = 1188; + value_1189 = 1189; + value_1190 = 1190; + value_1191 = 1191; + value_1192 = 1192; + value_1193 = 1193; + value_1194 = 1194; + value_1195 = 1195; + value_1196 = 1196; + value_1197 = 1197; + value_1198 = 1198; + value_1199 = 1199; + value_1200 = 1200; + value_1201 = 1201; + value_1202 = 1202; + value_1203 = 1203; + value_1204 = 1204; + value_1205 = 1205; + value_1206 = 1206; + value_1207 = 1207; + value_1208 = 1208; + value_1209 = 1209; + value_1210 = 1210; + value_1211 = 1211; + value_1212 = 1212; + value_1213 = 1213; + value_1214 = 1214; + value_1215 = 1215; + value_1216 = 1216; + value_1217 = 1217; + value_1218 = 1218; + value_1219 = 1219; + value_1220 = 1220; + value_1221 = 1221; + value_1222 = 1222; + value_1223 = 1223; + value_1224 = 1224; + value_1225 = 1225; + value_1226 = 1226; + value_1227 = 1227; + value_1228 = 1228; + value_1229 = 1229; + value_1230 = 1230; + value_1231 = 1231; + value_1232 = 1232; + value_1233 = 1233; + value_1234 = 1234; + value_1235 = 1235; + value_1236 = 1236; + value_1237 = 1237; + value_1238 = 1238; + value_1239 = 1239; + value_1240 = 1240; + value_1241 = 1241; + value_1242 = 1242; + value_1243 = 1243; + value_1244 = 1244; + value_1245 = 1245; + value_1246 = 1246; + value_1247 = 1247; + value_1248 = 1248; + value_1249 = 1249; + value_1250 = 1250; + value_1251 = 1251; + value_1252 = 1252; + value_1253 = 1253; + value_1254 = 1254; + value_1255 = 1255; + value_1256 = 1256; + value_1257 = 1257; + value_1258 = 1258; + value_1259 = 1259; + value_1260 = 1260; + value_1261 = 1261; + value_1262 = 1262; + value_1263 = 1263; + value_1264 = 1264; + value_1265 = 1265; + value_1266 = 1266; + value_1267 = 1267; + value_1268 = 1268; + value_1269 = 1269; + value_1270 = 1270; + value_1271 = 1271; + value_1272 = 1272; + value_1273 = 1273; + value_1274 = 1274; + value_1275 = 1275; + value_1276 = 1276; + value_1277 = 1277; + value_1278 = 1278; + value_1279 = 1279; + value_1280 = 1280; + value_1281 = 1281; + value_1282 = 1282; + value_1283 = 1283; + value_1284 = 1284; + value_1285 = 1285; + value_1286 = 1286; + value_1287 = 1287; + value_1288 = 1288; + value_1289 = 1289; + value_1290 = 1290; + value_1291 = 1291; + value_1292 = 1292; + value_1293 = 1293; + value_1294 = 1294; + value_1295 = 1295; + value_1296 = 1296; + value_1297 = 1297; + value_1298 = 1298; + value_1299 = 1299; + value_1300 = 1300; + value_1301 = 1301; + value_1302 = 1302; + value_1303 = 1303; + value_1304 = 1304; + value_1305 = 1305; + value_1306 = 1306; + value_1307 = 1307; + value_1308 = 1308; + value_1309 = 1309; + value_1310 = 1310; + value_1311 = 1311; + value_1312 = 1312; + value_1313 = 1313; + value_1314 = 1314; + value_1315 = 1315; + value_1316 = 1316; + value_1317 = 1317; + value_1318 = 1318; + value_1319 = 1319; + value_1320 = 1320; + value_1321 = 1321; + value_1322 = 1322; + value_1323 = 1323; + value_1324 = 1324; + value_1325 = 1325; + value_1326 = 1326; + value_1327 = 1327; + value_1328 = 1328; + value_1329 = 1329; + value_1330 = 1330; + value_1331 = 1331; + value_1332 = 1332; + value_1333 = 1333; + value_1334 = 1334; + value_1335 = 1335; + value_1336 = 1336; + value_1337 = 1337; + value_1338 = 1338; + value_1339 = 1339; + value_1340 = 1340; + value_1341 = 1341; + value_1342 = 1342; + value_1343 = 1343; + value_1344 = 1344; + value_1345 = 1345; + value_1346 = 1346; + value_1347 = 1347; + value_1348 = 1348; + value_1349 = 1349; + value_1350 = 1350; + value_1351 = 1351; + value_1352 = 1352; + value_1353 = 1353; + value_1354 = 1354; + value_1355 = 1355; + value_1356 = 1356; + value_1357 = 1357; + value_1358 = 1358; + value_1359 = 1359; + value_1360 = 1360; + value_1361 = 1361; + value_1362 = 1362; + value_1363 = 1363; + value_1364 = 1364; + value_1365 = 1365; + value_1366 = 1366; + value_1367 = 1367; + value_1368 = 1368; + value_1369 = 1369; + value_1370 = 1370; + value_1371 = 1371; + value_1372 = 1372; + value_1373 = 1373; + value_1374 = 1374; + value_1375 = 1375; + value_1376 = 1376; + value_1377 = 1377; + value_1378 = 1378; + value_1379 = 1379; + value_1380 = 1380; + value_1381 = 1381; + value_1382 = 1382; + value_1383 = 1383; + value_1384 = 1384; + value_1385 = 1385; + value_1386 = 1386; + value_1387 = 1387; + value_1388 = 1388; + value_1389 = 1389; + value_1390 = 1390; + value_1391 = 1391; + value_1392 = 1392; + value_1393 = 1393; + value_1394 = 1394; + value_1395 = 1395; + value_1396 = 1396; + value_1397 = 1397; + value_1398 = 1398; + value_1399 = 1399; + value_1400 = 1400; + value_1401 = 1401; + value_1402 = 1402; + value_1403 = 1403; + value_1404 = 1404; + value_1405 = 1405; + value_1406 = 1406; + value_1407 = 1407; + value_1408 = 1408; + value_1409 = 1409; + value_1410 = 1410; + value_1411 = 1411; + value_1412 = 1412; + value_1413 = 1413; + value_1414 = 1414; + value_1415 = 1415; + value_1416 = 1416; + value_1417 = 1417; + value_1418 = 1418; + value_1419 = 1419; + value_1420 = 1420; + value_1421 = 1421; + value_1422 = 1422; + value_1423 = 1423; + value_1424 = 1424; + value_1425 = 1425; + value_1426 = 1426; + value_1427 = 1427; + value_1428 = 1428; + value_1429 = 1429; + value_1430 = 1430; + value_1431 = 1431; + value_1432 = 1432; + value_1433 = 1433; + value_1434 = 1434; + value_1435 = 1435; + value_1436 = 1436; + value_1437 = 1437; + value_1438 = 1438; + value_1439 = 1439; + value_1440 = 1440; + value_1441 = 1441; + value_1442 = 1442; + value_1443 = 1443; + value_1444 = 1444; + value_1445 = 1445; + value_1446 = 1446; + value_1447 = 1447; + value_1448 = 1448; + value_1449 = 1449; + value_1450 = 1450; + value_1451 = 1451; + value_1452 = 1452; + value_1453 = 1453; + value_1454 = 1454; + value_1455 = 1455; + value_1456 = 1456; + value_1457 = 1457; + value_1458 = 1458; + value_1459 = 1459; + value_1460 = 1460; + value_1461 = 1461; + value_1462 = 1462; + value_1463 = 1463; + value_1464 = 1464; + value_1465 = 1465; + value_1466 = 1466; + value_1467 = 1467; + value_1468 = 1468; + value_1469 = 1469; + value_1470 = 1470; + value_1471 = 1471; + value_1472 = 1472; + value_1473 = 1473; + value_1474 = 1474; + value_1475 = 1475; + value_1476 = 1476; + value_1477 = 1477; + value_1478 = 1478; + value_1479 = 1479; + value_1480 = 1480; + value_1481 = 1481; + value_1482 = 1482; + value_1483 = 1483; + value_1484 = 1484; + value_1485 = 1485; + value_1486 = 1486; + value_1487 = 1487; + value_1488 = 1488; + value_1489 = 1489; + value_1490 = 1490; + value_1491 = 1491; + value_1492 = 1492; + value_1493 = 1493; + value_1494 = 1494; + value_1495 = 1495; + value_1496 = 1496; + value_1497 = 1497; + value_1498 = 1498; + value_1499 = 1499; + value_1500 = 1500; + value_1501 = 1501; + value_1502 = 1502; + value_1503 = 1503; + value_1504 = 1504; + value_1505 = 1505; + value_1506 = 1506; + value_1507 = 1507; + value_1508 = 1508; + value_1509 = 1509; + value_1510 = 1510; + value_1511 = 1511; + value_1512 = 1512; + value_1513 = 1513; + value_1514 = 1514; + value_1515 = 1515; + value_1516 = 1516; + value_1517 = 1517; + value_1518 = 1518; + value_1519 = 1519; + value_1520 = 1520; + value_1521 = 1521; + value_1522 = 1522; + value_1523 = 1523; + value_1524 = 1524; + value_1525 = 1525; + value_1526 = 1526; + value_1527 = 1527; + value_1528 = 1528; + value_1529 = 1529; + value_1530 = 1530; + value_1531 = 1531; + value_1532 = 1532; + value_1533 = 1533; + value_1534 = 1534; + value_1535 = 1535; + value_1536 = 1536; + value_1537 = 1537; + value_1538 = 1538; + value_1539 = 1539; + value_1540 = 1540; + value_1541 = 1541; + value_1542 = 1542; + value_1543 = 1543; + value_1544 = 1544; + value_1545 = 1545; + value_1546 = 1546; + value_1547 = 1547; + value_1548 = 1548; + value_1549 = 1549; + value_1550 = 1550; + value_1551 = 1551; + value_1552 = 1552; + value_1553 = 1553; + value_1554 = 1554; + value_1555 = 1555; + value_1556 = 1556; + value_1557 = 1557; + value_1558 = 1558; + value_1559 = 1559; + value_1560 = 1560; + value_1561 = 1561; + value_1562 = 1562; + value_1563 = 1563; + value_1564 = 1564; + value_1565 = 1565; + value_1566 = 1566; + value_1567 = 1567; + value_1568 = 1568; + value_1569 = 1569; + value_1570 = 1570; + value_1571 = 1571; + value_1572 = 1572; + value_1573 = 1573; + value_1574 = 1574; + value_1575 = 1575; + } + Value value = 1; + Sentence inner = 2; +} +message Sentence { + repeated Word words = 1; +} From 811ba86a20c67a6cebb70ae0574f9d124e3678ec Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 4 Jul 2023 00:24:35 +0200 Subject: [PATCH 1143/1997] Remove garbage --- .../fuzzers/codegen_fuzzer/clickhouse.g | 1592 ---- src/Parsers/fuzzers/codegen_fuzzer/out.cpp | 6461 ----------------- src/Parsers/fuzzers/codegen_fuzzer/out.proto | 1587 ---- 3 files changed, 9640 deletions(-) delete mode 100644 src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g delete mode 100644 src/Parsers/fuzzers/codegen_fuzzer/out.cpp delete mode 100644 src/Parsers/fuzzers/codegen_fuzzer/out.proto diff --git a/src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g b/src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g deleted file mode 100644 index 0ae74055eda..00000000000 --- a/src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g +++ /dev/null @@ -1,1592 +0,0 @@ -" "; -" "; -" "; -";"; - - -"(" $1 ")"; -"(" $1 ", " $2 ")"; -"(" $1 ", " $2 ", " $3 ")"; - -$1 ", " $2 ; -$1 ", " $2 ", " $3 ; -$1 ", " $2 ", " $3 ", " $4 ; -$1 ", " $2 ", " $3 ", " $4 ", " $5 ; - -"[" $1 ", " $2 "]"; -"[" $1 ", " $2 ", " $3 "]"; -"[" $1 ", " $2 ", " $3 ", " $4 "]"; -"[" $1 ", " $2 ", " $3 ", " $4 ", " $5 "]"; - -$0 "(" $1 ")"; -$0 "(" $1 ", " $2 ")"; -$0 "(" $1 ", " $2 ", " $3 ")"; - -$1 " as " $2 ; - - -// TODO: add more clickhouse specific stuff -"SELECT " $1 " FROM " $2 " WHERE " $3 ; -"SELECT " $1 " FROM " $2 " GROUP BY " $3 ; -"SELECT " $1 " FROM " $2 " SORT BY " $3 ; -"SELECT " $1 " FROM " $2 " LIMIT " $3 ; -"SELECT " $1 " FROM " $2 " JOIN " $3 ; -"SELECT " $1 " FROM " $2 " ARRAY JOIN " $3 ; -"SELECT " $1 " FROM " $2 " JOIN " $3 " ON " $4 ; -"SELECT " $1 " FROM " $2 " JOIN " $3 " USING " $5 ; -"SELECT " $1 " INTO OUTFILE " $2 ; - -"WITH " $1 " AS " $2 ; - -"{" $1 ":" $2 "}"; -"[" $1 "," $2 "]"; -"[]"; - - -" x "; -"x"; -" `x` "; -"`x`"; - -" \"value\" "; -"\"value\""; -" 0 "; -"0"; -"1"; -"2"; -"123123123123123123"; -"182374019873401982734091873420923123123123123123"; -"1e-1"; -"1.1"; -"\"\""; -" '../../../../../../../../../etc/passwd' "; - -"/"; -"="; -"=="; -"!="; -"<>"; -"<"; -"<="; -">"; -">="; -"<<"; -"|<<"; -"&"; -"|"; -"||"; -"<|"; -"|>"; -"+"; -"-"; -"~"; -"*"; -"/"; -"\\"; -"%"; -""; -"."; -","; -","; -","; -","; -","; -","; -"("; -")"; -"("; -")"; -"("; -")"; -"("; -")"; -"("; -")"; -"("; -")"; -"?"; -":"; -"@"; -"@@"; -"$"; -"\""; -"`"; -"{"; -"}"; -"^"; -"::"; -"->"; -"]"; -"["; - -" abs "; -" accurate_Cast "; -" accurateCast "; -" accurate_CastOrNull "; -" accurateCastOrNull "; -" acos "; -" acosh "; -" ADD "; -" ADD COLUMN "; -" ADD CONSTRAINT "; -" addDays "; -" addHours "; -" ADD INDEX "; -" addMinutes "; -" addMonths "; -" addQuarters "; -" addressToLine "; -" addressToSymbol "; -" addSeconds "; -" addWeeks "; -" addYears "; -" aes_decrypt_mysql "; -" aes_encrypt_mysql "; -" AFTER "; -" AggregateFunction "; -" aggThrow "; -" ALIAS "; -" ALL "; -" alphaTokens "; -" ALTER "; -" ALTER LIVE VIEW "; -" ALTER TABLE "; -" and "; -" AND "; -" ANTI "; -" any "; -" ANY "; -" anyHeavy "; -" anyLast "; -" appendTrailingCharIfAbsent "; -" argMax "; -" argMin "; -" array "; -" Array "; -" ARRAY "; -" arrayAll "; -" arrayAUC "; -" arrayAvg "; -" arrayCompact "; -" arrayConcat "; -" arrayCount "; -" arrayCumSum "; -" arrayCumSumNonNegative "; -" arrayDifference "; -" arrayDistinct "; -" arrayElement "; -" arrayEnumerate "; -" arrayEnumerateDense "; -" arrayEnumerateDenseRanked "; -" arrayEnumerateUniq "; -" arrayEnumerateUniqRanked "; -" arrayExists "; -" arrayFill "; -" arrayFilter "; -" arrayFirst "; -" arrayFirstIndex "; -" arrayFlatten "; -" arrayIntersect "; -" arrayJoin "; -" ARRAY JOIN "; -" arrayMap "; -" arrayMax "; -" arrayMin "; -" arrayPartialReverseSort "; -" arrayPartialShuffle "; -" arrayPartialSort "; -" arrayPopBack "; -" arrayPopFront "; -" arrayProduct "; -" arrayPushBack "; -" arrayPushFront "; -" arrayReduce "; -" arrayReduceInRanges "; -" arrayResize "; -" arrayReverse "; -" arrayReverseFill "; -" arrayReverseSort "; -" arrayReverseSplit "; -" arrayShuffle "; -" arraySlice "; -" arraySort "; -" arraySplit "; -" arrayStringConcat "; -" arraySum "; -" arrayUniq "; -" arrayWithConstant "; -" arrayZip "; -" AS "; -" ASC "; -" ASCENDING "; -" asin "; -" asinh "; -" ASOF "; -" assumeNotNull "; -" AST "; -" ASYNC "; -" atan "; -" atan2 "; -" atanh "; -" ATTACH "; -" ATTACH PART "; -" ATTACH PARTITION "; -" avg "; -" avgWeighted "; -" bar "; -" base64Decode "; -" base64Encode "; -" basename "; -" bayesAB "; -" BETWEEN "; -" BIGINT "; -" BIGINT SIGNED "; -" BIGINT UNSIGNED "; -" bin "; -" BINARY "; -" BINARY LARGE OBJECT "; -" BINARY VARYING "; -" bitAnd "; -" BIT_AND "; -" __bitBoolMaskAnd "; -" __bitBoolMaskOr "; -" bitCount "; -" bitHammingDistance "; -" bitmapAnd "; -" bitmapAndCardinality "; -" bitmapAndnot "; -" bitmapAndnotCardinality "; -" bitmapBuild "; -" bitmapCardinality "; -" bitmapContains "; -" bitmapHasAll "; -" bitmapHasAny "; -" bitmapMax "; -" bitmapMin "; -" bitmapOr "; -" bitmapOrCardinality "; -" bitmapSubsetInRange "; -" bitmapSubsetLimit "; -" bitmapToArray "; -" bitmapTransform "; -" bitmapXor "; -" bitmapXorCardinality "; -" bitmaskToArray "; -" bitmaskToList "; -" bitNot "; -" bitOr "; -" BIT_OR "; -" bitPositionsToArray "; -" bitRotateLeft "; -" bitRotateRight "; -" bitShiftLeft "; -" bitShiftRight "; -" __bitSwapLastTwo "; -" bitTest "; -" bitTestAll "; -" bitTestAny "; -" __bitWrapperFunc "; -" bitXor "; -" BIT_XOR "; -" BLOB "; -" blockNumber "; -" blockSerializedSize "; -" blockSize "; -" BOOL "; -" BOOLEAN "; -" BOTH "; -" boundingRatio "; -" buildId "; -" BY "; -" BYTE "; -" BYTEA "; -" byteSize "; -" CASE "; -" caseWithExpr "; -" caseWithExpression "; -" caseWithoutExpr "; -" caseWithoutExpression "; -" _CAST "; -" CAST "; -" categoricalInformationValue "; -" cbrt "; -" ceil "; -" ceiling "; -" char "; -" CHAR "; -" CHARACTER "; -" CHARACTER LARGE OBJECT "; -" CHARACTER_LENGTH "; -" CHARACTER VARYING "; -" CHAR LARGE OBJECT "; -" CHAR_LENGTH "; -" CHAR VARYING "; -" CHECK "; -" CHECK TABLE "; -" cityHash64 "; -" CLEAR "; -" CLEAR COLUMN "; -" CLEAR INDEX "; -" CLOB "; -" CLUSTER "; -" coalesce "; -" CODEC "; -" COLLATE "; -" COLUMN "; -" COLUMNS "; -" COMMENT "; -" COMMENT COLUMN "; -" concat "; -" concatAssumeInjective "; -" connection_id "; -" connectionid "; -" connectionId "; -" CONSTRAINT "; -" convertCharset "; -" corr "; -" corrStable "; -" cos "; -" cosh "; -" count "; -" countDigits "; -" countEqual "; -" countMatches "; -" countMatchesCaseInsensitive "; -" countSubstrings "; -" countSubstringsCaseInsensitive "; -" countSubstringsCaseInsensitiveUTF8 "; -" covarPop "; -" COVAR_POP "; -" covarPopStable "; -" covarSamp "; -" COVAR_SAMP "; -" covarSampStable "; -" CRC32 "; -" CRC32IEEE "; -" CRC64 "; -" CREATE "; -" CROSS "; -" CUBE "; -" currentDatabase "; -" currentProfiles "; -" currentRoles "; -" currentUser "; -" cutFragment "; -" cutIPv6 "; -" cutQueryString "; -" cutQueryStringAndFragment "; -" cutToFirstSignificantSubdomain "; -" cutToFirstSignificantSubdomainCustom "; -" cutToFirstSignificantSubdomainCustomWithWWW "; -" cutToFirstSignificantSubdomainWithWWW "; -" cutURLParameter "; -" cutWWW "; -" D "; -" DATABASE "; -" DATABASES "; -" Date "; -" DATE "; -" Date32 "; -" DATE_ADD "; -" DATEADD "; -" dateDiff "; -" DATE_DIFF "; -" DATEDIFF "; -" dateName "; -" DATE_SUB "; -" DATESUB "; -" DateTime "; -" DateTime32 "; -" DateTime64 "; -" dateTime64ToSnowflake "; -" dateTimeToSnowflake "; -" date_trunc "; -" dateTrunc "; -" DAY "; -" DAYOFMONTH "; -" DAYOFWEEK "; -" DAYOFYEAR "; -" DD "; -" DEC "; -" Decimal "; -" Decimal128 "; -" Decimal256 "; -" Decimal32 "; -" Decimal64 "; -" decodeURLComponent "; -" decodeXMLComponent "; -" decrypt "; -" DEDUPLICATE "; -" DEFAULT "; -" defaultProfiles "; -" defaultRoles "; -" defaultValueOfArgumentType "; -" defaultValueOfTypeName "; -" DELAY "; -" DELETE "; -" DELETE WHERE "; -" deltaSum "; -" deltaSumTimestamp "; -" demangle "; -" dense_rank "; -" DESC "; -" DESCENDING "; -" DESCRIBE "; -" DETACH "; -" DETACH PARTITION "; -" dictGet "; -" dictGetChildren "; -" dictGetDate "; -" dictGetDateOrDefault "; -" dictGetDateTime "; -" dictGetDateTimeOrDefault "; -" dictGetDescendants "; -" dictGetFloat32 "; -" dictGetFloat32OrDefault "; -" dictGetFloat64 "; -" dictGetFloat64OrDefault "; -" dictGetHierarchy "; -" dictGetInt16 "; -" dictGetInt16OrDefault "; -" dictGetInt32 "; -" dictGetInt32OrDefault "; -" dictGetInt64 "; -" dictGetInt64OrDefault "; -" dictGetInt8 "; -" dictGetInt8OrDefault "; -" dictGetOrDefault "; -" dictGetOrNull "; -" dictGetString "; -" dictGetStringOrDefault "; -" dictGetUInt16 "; -" dictGetUInt16OrDefault "; -" dictGetUInt32 "; -" dictGetUInt32OrDefault "; -" dictGetUInt64 "; -" dictGetUInt64OrDefault "; -" dictGetUInt8 "; -" dictGetUInt8OrDefault "; -" dictGetUUID "; -" dictGetUUIDOrDefault "; -" dictHas "; -" DICTIONARIES "; -" DICTIONARY "; -" dictIsIn "; -" DISK "; -" DISTINCT "; -" DISTRIBUTED "; -" divide "; -" domain "; -" domainWithoutWWW "; -" DOUBLE "; -" DOUBLE PRECISION "; -" DROP "; -" DROP COLUMN "; -" DROP CONSTRAINT "; -" DROP DETACHED PART "; -" DROP DETACHED PARTITION "; -" DROP INDEX "; -" DROP PARTITION "; -" dumpColumnStructure "; -" e "; -" ELSE "; -" empty "; -" emptyArrayDate "; -" emptyArrayDateTime "; -" emptyArrayFloat32 "; -" emptyArrayFloat64 "; -" emptyArrayInt16 "; -" emptyArrayInt32 "; -" emptyArrayInt64 "; -" emptyArrayInt8 "; -" emptyArrayString "; -" emptyArrayToSingle "; -" emptyArrayUInt16 "; -" emptyArrayUInt32 "; -" emptyArrayUInt64 "; -" emptyArrayUInt8 "; -" enabledProfiles "; -" enabledRoles "; -" encodeXMLComponent "; -" encrypt "; -" END "; -" endsWith "; -" ENGINE "; -" entropy "; -" Enum "; -" ENUM "; -" Enum16 "; -" Enum8 "; -" equals "; -" erf "; -" erfc "; -" errorCodeToName "; -" evalMLMethod "; -" EVENTS "; -" EXCHANGE TABLES "; -" EXISTS "; -" exp "; -" exp10 "; -" exp2 "; -" EXPLAIN "; -" exponentialMovingAverage "; -" EXPRESSION "; -" extract "; -" EXTRACT "; -" extractAll "; -" extractAllGroups "; -" extractAllGroupsHorizontal "; -" extractAllGroupsVertical "; -" extractGroups "; -" extractTextFromHTML "; -" extractURLParameter "; -" extractURLParameterNames "; -" extractURLParameters "; -" farmFingerprint64 "; -" farmHash64 "; -" FETCHES "; -" FETCH PART "; -" FETCH PARTITION "; -" file "; -" filesystemAvailable "; -" filesystemCapacity "; -" filesystemFree "; -" FINAL "; -" finalizeAggregation "; -" FIRST "; -" firstSignificantSubdomain "; -" firstSignificantSubdomainCustom "; -" first_value "; -" FIXED "; -" FixedString "; -" flatten "; -" FLOAT "; -" Float32 "; -" Float64 "; -" floor "; -" FLUSH "; -" FOR "; -" ForEach "; -" format "; -" FORMAT "; -" formatDateTime "; -" formatReadableQuantity "; -" formatReadableDecimalSize "; -" formatReadableSize "; -" formatReadableTimeDelta "; -" formatRow "; -" formatRowNoNewline "; -" FQDN "; -" fragment "; -" FREEZE "; -" FROM "; -" FROM_BASE64 "; -" fromModifiedJulianDay "; -" fromModifiedJulianDayOrNull "; -" FROM_UNIXTIME "; -" fromUnixTimestamp "; -" fromUnixTimestamp64Micro "; -" fromUnixTimestamp64Milli "; -" fromUnixTimestamp64Nano "; -" FULL "; -" fullHostName "; -" FUNCTION "; -" fuzzBits "; -" gccMurmurHash "; -" gcd "; -" generateUUIDv4 "; -" geoDistance "; -" geohashDecode "; -" geohashEncode "; -" geohashesInBox "; -" geoToH3 "; -" geoToS2 "; -" getMacro "; -" __getScalar "; -" getServerPort "; -" getSetting "; -" getSizeOfEnumType "; -" GLOBAL "; -" globalIn "; -" globalInIgnoreSet "; -" globalNotIn "; -" globalNotInIgnoreSet "; -" globalNotNullIn "; -" globalNotNullInIgnoreSet "; -" globalNullIn "; -" globalNullInIgnoreSet "; -" globalVariable "; -" GRANULARITY "; -" greatCircleAngle "; -" greatCircleDistance "; -" greater "; -" greaterOrEquals "; -" greatest "; -" GROUP "; -" groupArray "; -" groupArrayInsertAt "; -" groupArrayMovingAvg "; -" groupArrayMovingSum "; -" groupArraySample "; -" groupBitAnd "; -" groupBitmap "; -" groupBitmapAnd "; -" groupBitmapOr "; -" groupBitmapXor "; -" groupBitOr "; -" groupBitXor "; -" GROUP BY "; -" groupUniqArray "; -" h3EdgeAngle "; -" h3EdgeLengthM "; -" h3GetBaseCell "; -" h3GetFaces "; -" h3GetResolution "; -" h3HexAreaM2 "; -" h3IndexesAreNeighbors "; -" h3IsPentagon "; -" h3IsResClassIII "; -" h3IsValid "; -" h3kRing "; -" h3ToChildren "; -" h3ToGeo "; -" h3ToGeoBoundary "; -" h3ToParent "; -" h3ToString "; -" halfMD5 "; -" has "; -" hasAll "; -" hasAny "; -" hasColumnInTable "; -" hasSubstr "; -" hasThreadFuzzer "; -" hasToken "; -" hasTokenCaseInsensitive "; -" HAVING "; -" hex "; -" HH "; -" HIERARCHICAL "; -" histogram "; -" hiveHash "; -" hostname "; -" hostName "; -" HOUR "; -" hypot "; -" ID "; -" identity "; -" if "; -" IF "; -" IF EXISTS "; -" IF NOT EXISTS "; -" ifNotFinite "; -" ifNull "; -" ignore "; -" ilike "; -" ILIKE "; -" in "; -" IN "; -" INDEX "; -" indexHint "; -" indexOf "; -" INET4 "; -" INET6 "; -" INET6_ATON "; -" INET6_NTOA "; -" INET_ATON "; -" INET_NTOA "; -" INF "; -" inIgnoreSet "; -" initializeAggregation "; -" initial_query_id "; -" initialQueryID "; -" INJECTIVE "; -" INNER "; -" IN PARTITION "; -" INSERT "; -" INSERT INTO "; -" INT "; -" INT1 "; -" Int128 "; -" Int16 "; -" INT1 SIGNED "; -" INT1 UNSIGNED "; -" Int256 "; -" Int32 "; -" Int64 "; -" Int8 "; -" intDiv "; -" intDivOrZero "; -" INTEGER "; -" INTEGER SIGNED "; -" INTEGER UNSIGNED "; -" INTERVAL "; -" IntervalDay "; -" IntervalHour "; -" intervalLengthSum "; -" IntervalMinute "; -" IntervalMonth "; -" IntervalQuarter "; -" IntervalSecond "; -" IntervalWeek "; -" IntervalYear "; -" intExp10 "; -" intExp2 "; -" intHash32 "; -" intHash64 "; -" INTO "; -" INTO OUTFILE "; -" INT SIGNED "; -" INT UNSIGNED "; -" IPv4 "; -" IPv4CIDRToRange "; -" IPv4NumToString "; -" IPv4NumToStringClassC "; -" IPv4StringToNum "; -" IPv4ToIPv6 "; -" IPv6 "; -" IPv6CIDRToRange "; -" IPv6NumToString "; -" IPv6StringToNum "; -" IS "; -" isConstant "; -" isDecimalOverflow "; -" isFinite "; -" isInfinite "; -" isIPAddressInRange "; -" isIPv4String "; -" isIPv6String "; -" isNaN "; -" isNotNull "; -" isNull "; -" IS_OBJECT_ID "; -" isValidJSON "; -" isValidUTF8 "; -" isZeroOrNull "; -" javaHash "; -" javaHashUTF16LE "; -" JOIN "; -" joinGet "; -" joinGetOrNull "; -" JSON_EXISTS "; -" JSONExtract "; -" JSONExtractArrayRaw "; -" JSONExtractBool "; -" JSONExtractFloat "; -" JSONExtractInt "; -" JSONExtractKeysAndValues "; -" JSONExtractKeysAndValuesRaw "; -" JSONExtractKeys "; -" JSONExtractRaw "; -" JSONExtractString "; -" JSONExtractUInt "; -" JSONHas "; -" JSONKey "; -" JSONLength "; -" JSON_QUERY "; -" JSONType "; -" JSON_VALUE "; -" jumpConsistentHash "; -" KEY "; -" KILL "; -" kurtPop "; -" kurtSamp "; -" lagInFrame "; -" LAST "; -" last_value "; -" LAYOUT "; -" lcase "; -" lcm "; -" leadInFrame "; -" LEADING "; -" least "; -" LEFT "; -" LEFT ARRAY JOIN "; -" leftPad "; -" leftPadUTF8 "; -" lemmatize "; -" length "; -" lengthUTF8 "; -" less "; -" lessOrEquals "; -" lgamma "; -" LIFETIME "; -" like "; -" LIKE "; -" LIMIT "; -" LIVE "; -" ln "; -" LOCAL "; -" locate "; -" log "; -" log10 "; -" log1p "; -" log2 "; -" LOGS "; -" logTrace "; -" LONGBLOB "; -" LONGTEXT "; -" LowCardinality "; -" lowCardinalityIndices "; -" lowCardinalityKeys "; -" lower "; -" lowerUTF8 "; -" lpad "; -" LTRIM "; -" M "; -" MACNumToString "; -" MACStringToNum "; -" MACStringToOUI "; -" mannWhitneyUTest "; -" map "; -" Map "; -" mapAdd "; -" mapContains "; -" mapKeys "; -" mapPopulateSeries "; -" mapSubtract "; -" mapValues "; -" match "; -" materialize "; -" MATERIALIZE "; -" MATERIALIZED "; -" MATERIALIZE INDEX "; -" MATERIALIZE TTL "; -" max "; -" MAX "; -" maxIntersections "; -" maxIntersectionsPosition "; -" maxMap "; -" MD4 "; -" MD5 "; -" median "; -" medianBFloat16 "; -" medianBFloat16Weighted "; -" medianDeterministic "; -" medianExact "; -" medianExactHigh "; -" medianExactLow "; -" medianExactWeighted "; -" medianTDigest "; -" medianTDigestWeighted "; -" medianTiming "; -" medianTimingWeighted "; -" MEDIUMBLOB "; -" MEDIUMINT "; -" MEDIUMINT SIGNED "; -" MEDIUMINT UNSIGNED "; -" MEDIUMTEXT "; -" Merge "; -" MERGES "; -" metroHash64 "; -" MI "; -" mid "; -" min "; -" MIN "; -" minMap "; -" minus "; -" MINUTE "; -" MM "; -" mod "; -" MODIFY "; -" MODIFY COLUMN "; -" MODIFY ORDER BY "; -" MODIFY QUERY "; -" MODIFY SETTING "; -" MODIFY TTL "; -" modulo "; -" moduloLegacy "; -" moduloOrZero "; -" MONTH "; -" MOVE "; -" MOVE PART "; -" MOVE PARTITION "; -" movingXXX "; -" multiFuzzyMatchAllIndices "; -" multiFuzzyMatchAny "; -" multiFuzzyMatchAnyIndex "; -" multiIf "; -" multiMatchAllIndices "; -" multiMatchAny "; -" multiMatchAnyIndex "; -" multiply "; -" MultiPolygon "; -" multiSearchAllPositions "; -" multiSearchAllPositionsCaseInsensitive "; -" multiSearchAllPositionsCaseInsensitiveUTF8 "; -" multiSearchAllPositionsUTF8 "; -" multiSearchAny "; -" multiSearchAnyCaseInsensitive "; -" multiSearchAnyCaseInsensitiveUTF8 "; -" multiSearchAnyUTF8 "; -" multiSearchFirstIndex "; -" multiSearchFirstIndexCaseInsensitive "; -" multiSearchFirstIndexCaseInsensitiveUTF8 "; -" multiSearchFirstIndexUTF8 "; -" multiSearchFirstPosition "; -" multiSearchFirstPositionCaseInsensitive "; -" multiSearchFirstPositionCaseInsensitiveUTF8 "; -" multiSearchFirstPositionUTF8 "; -" murmurHash2_32 "; -" murmurHash2_64 "; -" murmurHash3_128 "; -" murmurHash3_32 "; -" murmurHash3_64 "; -" MUTATION "; -" N "; -" NAME "; -" NAN_SQL "; -" NATIONAL CHAR "; -" NATIONAL CHARACTER "; -" NATIONAL CHARACTER LARGE OBJECT "; -" NATIONAL CHARACTER VARYING "; -" NATIONAL CHAR VARYING "; -" NCHAR "; -" NCHAR LARGE OBJECT "; -" NCHAR VARYING "; -" negate "; -" neighbor "; -" Nested "; -" netloc "; -" ngramDistance "; -" ngramDistanceCaseInsensitive "; -" ngramDistanceCaseInsensitiveUTF8 "; -" ngramDistanceUTF8 "; -" ngramMinHash "; -" ngramMinHashArg "; -" ngramMinHashArgCaseInsensitive "; -" ngramMinHashArgCaseInsensitiveUTF8 "; -" ngramMinHashArgUTF8 "; -" ngramMinHashCaseInsensitive "; -" ngramMinHashCaseInsensitiveUTF8 "; -" ngramMinHashUTF8 "; -" ngramSearch "; -" ngramSearchCaseInsensitive "; -" ngramSearchCaseInsensitiveUTF8 "; -" ngramSearchUTF8 "; -" ngramSimHash "; -" ngramSimHashCaseInsensitive "; -" ngramSimHashCaseInsensitiveUTF8 "; -" ngramSimHashUTF8 "; -" NO "; -" NO DELAY "; -" NONE "; -" normalizedQueryHash "; -" normalizedQueryHashKeepNames "; -" normalizeQuery "; -" normalizeQueryKeepNames "; -" not "; -" NOT "; -" notEmpty "; -" notEquals "; -" nothing "; -" Nothing "; -" notILike "; -" notIn "; -" notInIgnoreSet "; -" notLike "; -" notNullIn "; -" notNullInIgnoreSet "; -" now "; -" now64 "; -" Null "; -" Nullable "; -" nullIf "; -" nullIn "; -" nullInIgnoreSet "; -" NULLS "; -" NULL_SQL "; -" NUMERIC "; -" NVARCHAR "; -" OFFSET "; -" ON "; -" ONLY "; -" OPTIMIZE "; -" OPTIMIZE TABLE "; -" or "; -" OR "; -" ORDER "; -" ORDER BY "; -" OR REPLACE "; -" OUTER "; -" OUTFILE "; -" parseDateTime32BestEffort "; -" parseDateTime32BestEffortOrNull "; -" parseDateTime32BestEffortOrZero "; -" parseDateTime64BestEffort "; -" parseDateTime64BestEffortOrNull "; -" parseDateTime64BestEffortOrZero "; -" parseDateTimeBestEffort "; -" parseDateTimeBestEffortOrNull "; -" parseDateTimeBestEffortOrZero "; -" parseDateTimeBestEffortUS "; -" parseDateTimeBestEffortUSOrNull "; -" parseDateTimeBestEffortUSOrZero "; -" parseTimeDelta "; -" PARTITION "; -" PARTITION BY "; -" partitionId "; -" path "; -" pathFull "; -" pi "; -" plus "; -" Point "; -" pointInEllipses "; -" pointInPolygon "; -" Polygon "; -" polygonAreaCartesian "; -" polygonAreaSpherical "; -" polygonConvexHullCartesian "; -" polygonPerimeterCartesian "; -" polygonPerimeterSpherical "; -" polygonsDistanceCartesian "; -" polygonsDistanceSpherical "; -" polygonsEqualsCartesian "; -" polygonsIntersectionCartesian "; -" polygonsIntersectionSpherical "; -" polygonsSymDifferenceCartesian "; -" polygonsSymDifferenceSpherical "; -" polygonsUnionCartesian "; -" polygonsUnionSpherical "; -" polygonsWithinCartesian "; -" polygonsWithinSpherical "; -" POPULATE "; -" port "; -" position "; -" positionCaseInsensitive "; -" positionCaseInsensitiveUTF8 "; -" positionUTF8 "; -" pow "; -" power "; -" PREWHERE "; -" PRIMARY "; -" PRIMARY KEY "; -" PROJECTION "; -" protocol "; -" Q "; -" QQ "; -" quantile "; -" quantileBFloat16 "; -" quantileBFloat16Weighted "; -" quantileDeterministic "; -" quantileExact "; -" quantileExactExclusive "; -" quantileExactHigh "; -" quantileExactInclusive "; -" quantileExactLow "; -" quantileExactWeighted "; -" quantiles "; -" quantilesBFloat16 "; -" quantilesBFloat16Weighted "; -" quantilesDeterministic "; -" quantilesExact "; -" quantilesExactExclusive "; -" quantilesExactHigh "; -" quantilesExactInclusive "; -" quantilesExactLow "; -" quantilesExactWeighted "; -" quantilesTDigest "; -" quantilesTDigestWeighted "; -" quantilesTiming "; -" quantilesTimingWeighted "; -" quantileTDigest "; -" quantileTDigestWeighted "; -" quantileTiming "; -" quantileTimingWeighted "; -" QUARTER "; -" query_id "; -" queryID "; -" queryString "; -" queryStringAndFragment "; -" rand "; -" rand32 "; -" rand64 "; -" randConstant "; -" randomFixedString "; -" randomPrintableASCII "; -" randomString "; -" randomStringUTF8 "; -" range "; -" RANGE "; -" rank "; -" rankCorr "; -" readWKTMultiPolygon "; -" readWKTPoint "; -" readWKTPolygon "; -" readWKTRing "; -" REAL "; -" REFRESH "; -" regexpQuoteMeta "; -" regionHierarchy "; -" regionIn "; -" regionToArea "; -" regionToCity "; -" regionToContinent "; -" regionToCountry "; -" regionToDistrict "; -" regionToName "; -" regionToPopulation "; -" regionToTopContinent "; -" reinterpret "; -" reinterpretAsDate "; -" reinterpretAsDateTime "; -" reinterpretAsFixedString "; -" reinterpretAsFloat32 "; -" reinterpretAsFloat64 "; -" reinterpretAsInt128 "; -" reinterpretAsInt16 "; -" reinterpretAsInt256 "; -" reinterpretAsInt32 "; -" reinterpretAsInt64 "; -" reinterpretAsInt8 "; -" reinterpretAsString "; -" reinterpretAsUInt128 "; -" reinterpretAsUInt16 "; -" reinterpretAsUInt256 "; -" reinterpretAsUInt32 "; -" reinterpretAsUInt64 "; -" reinterpretAsUInt8 "; -" reinterpretAsUUID "; -" RELOAD "; -" REMOVE "; -" RENAME "; -" RENAME COLUMN "; -" RENAME TABLE "; -" repeat "; -" replace "; -" REPLACE "; -" replaceAll "; -" replaceOne "; -" REPLACE PARTITION "; -" replaceRegexpAll "; -" replaceRegexpOne "; -" REPLICA "; -" replicate "; -" REPLICATED "; -" Resample "; -" RESUME "; -" retention "; -" reverse "; -" reverseUTF8 "; -" RIGHT "; -" rightPad "; -" rightPadUTF8 "; -" Ring "; -" ROLLUP "; -" round "; -" roundAge "; -" roundBankers "; -" roundDown "; -" roundDuration "; -" roundToExp2 "; -" row_number "; -" rowNumberInAllBlocks "; -" rowNumberInBlock "; -" rpad "; -" RTRIM "; -" runningAccumulate "; -" runningConcurrency "; -" runningDifference "; -" runningDifferenceStartingWithFirstValue "; -" S "; -" s2CapContains "; -" s2CapUnion "; -" s2CellsIntersect "; -" s2GetNeighbors "; -" s2RectAdd "; -" s2RectContains "; -" s2RectIntersection "; -" s2RectUnion "; -" s2ToGeo "; -" SAMPLE "; -" SAMPLE BY "; -" SECOND "; -" SELECT "; -" SEMI "; -" SENDS "; -" sequenceCount "; -" sequenceMatch "; -" sequenceNextNode "; -" serverUUID "; -" SET "; -" SETTINGS "; -" SHA1 "; -" SHA224 "; -" SHA256 "; -" SHA384 "; -" SHA512 "; -" shardCount "; -" shardNum "; -" SHOW "; -" SHOW PROCESSLIST "; -" sigmoid "; -" sign "; -" SimpleAggregateFunction "; -" simpleJSONExtractBool "; -" simpleJSONExtractFloat "; -" simpleJSONExtractInt "; -" simpleJSONExtractRaw "; -" simpleJSONExtractString "; -" simpleJSONExtractUInt "; -" simpleJSONHas "; -" simpleLinearRegression "; -" sin "; -" SINGLE "; -" singleValueOrNull "; -" sinh "; -" sipHash128 "; -" sipHash64 "; -" skewPop "; -" skewSamp "; -" sleep "; -" sleepEachRow "; -" SMALLINT "; -" SMALLINT SIGNED "; -" SMALLINT UNSIGNED "; -" snowflakeToDateTime "; -" snowflakeToDateTime64 "; -" SOURCE "; -" sparkbar "; -" splitByChar "; -" splitByNonAlpha "; -" splitByRegexp "; -" splitByString "; -" splitByWhitespace "; -" SQL_TSI_DAY "; -" SQL_TSI_HOUR "; -" SQL_TSI_MINUTE "; -" SQL_TSI_MONTH "; -" SQL_TSI_QUARTER "; -" SQL_TSI_SECOND "; -" SQL_TSI_WEEK "; -" SQL_TSI_YEAR "; -" sqrt "; -" SS "; -" START "; -" startsWith "; -" State "; -" stddevPop "; -" STDDEV_POP "; -" stddevPopStable "; -" stddevSamp "; -" STDDEV_SAMP "; -" stddevSampStable "; -" stem "; -" STEP "; -" stochasticLinearRegression "; -" stochasticLogisticRegression "; -" STOP "; -" String "; -" stringToH3 "; -" studentTTest "; -" subBitmap "; -" substr "; -" substring "; -" SUBSTRING "; -" substringUTF8 "; -" subtractDays "; -" subtractHours "; -" subtractMinutes "; -" subtractMonths "; -" subtractQuarters "; -" subtractSeconds "; -" subtractWeeks "; -" subtractYears "; -" sum "; -" sumCount "; -" sumKahan "; -" sumMap "; -" sumMapFiltered "; -" sumMapFilteredWithOverflow "; -" sumMapWithOverflow "; -" sumWithOverflow "; -" SUSPEND "; -" svg "; -" SVG "; -" SYNC "; -" synonyms "; -" SYNTAX "; -" SYSTEM "; -" TABLE "; -" TABLES "; -" tan "; -" tanh "; -" tcpPort "; -" TEMPORARY "; -" TEST "; -" TEXT "; -" tgamma "; -" THEN "; -" throwIf "; -" tid "; -" TIES "; -" TIMEOUT "; -" timeSlot "; -" timeSlots "; -" TIMESTAMP "; -" TIMESTAMP_ADD "; -" TIMESTAMPADD "; -" TIMESTAMP_DIFF "; -" TIMESTAMPDIFF "; -" TIMESTAMP_SUB "; -" TIMESTAMPSUB "; -" timezone "; -" timeZone "; -" timezoneOf "; -" timeZoneOf "; -" timezoneOffset "; -" timeZoneOffset "; -" TINYBLOB "; -" TINYINT "; -" TINYINT SIGNED "; -" TINYINT UNSIGNED "; -" TINYTEXT "; -" TO "; -" TO_BASE64 "; -" toColumnTypeName "; -" toDate "; -" toDate32 "; -" toDate32OrNull "; -" toDate32OrZero "; -" toDateOrNull "; -" toDateOrZero "; -" toDateTime "; -" toDateTime32 "; -" toDateTime64 "; -" toDateTime64OrNull "; -" toDateTime64OrZero "; -" toDateTimeOrNull "; -" toDateTimeOrZero "; -" today "; -" toDayOfMonth "; -" toDayOfWeek "; -" toDayOfYear "; -" toDecimal128 "; -" toDecimal128OrNull "; -" toDecimal128OrZero "; -" toDecimal256 "; -" toDecimal256OrNull "; -" toDecimal256OrZero "; -" toDecimal32 "; -" toDecimal32OrNull "; -" toDecimal32OrZero "; -" toDecimal64 "; -" toDecimal64OrNull "; -" toDecimal64OrZero "; -" TO DISK "; -" toFixedString "; -" toFloat32 "; -" toFloat32OrNull "; -" toFloat32OrZero "; -" toFloat64 "; -" toFloat64OrNull "; -" toFloat64OrZero "; -" toHour "; -" toInt128 "; -" toInt128OrNull "; -" toInt128OrZero "; -" toInt16 "; -" toInt16OrNull "; -" toInt16OrZero "; -" toInt256 "; -" toInt256OrNull "; -" toInt256OrZero "; -" toInt32 "; -" toInt32OrNull "; -" toInt32OrZero "; -" toInt64 "; -" toInt64OrNull "; -" toInt64OrZero "; -" toInt8 "; -" toInt8OrNull "; -" toInt8OrZero "; -" toIntervalDay "; -" toIntervalHour "; -" toIntervalMinute "; -" toIntervalMonth "; -" toIntervalQuarter "; -" toIntervalSecond "; -" toIntervalWeek "; -" toIntervalYear "; -" toIPv4 "; -" toIPv6 "; -" toISOWeek "; -" toISOYear "; -" toJSONString "; -" toLowCardinality "; -" toMinute "; -" toModifiedJulianDay "; -" toModifiedJulianDayOrNull "; -" toMonday "; -" toMonth "; -" toNullable "; -" TOP "; -" topK "; -" topKWeighted "; -" topLevelDomain "; -" toQuarter "; -" toRelativeDayNum "; -" toRelativeHourNum "; -" toRelativeMinuteNum "; -" toRelativeMonthNum "; -" toRelativeQuarterNum "; -" toRelativeSecondNum "; -" toRelativeWeekNum "; -" toRelativeYearNum "; -" toSecond "; -" toStartOfDay "; -" toStartOfFifteenMinutes "; -" toStartOfFiveMinutes "; -" toStartOfHour "; -" toStartOfInterval "; -" toStartOfISOYear "; -" toStartOfMinute "; -" toStartOfMonth "; -" toStartOfQuarter "; -" toStartOfSecond "; -" toStartOfTenMinutes "; -" toStartOfWeek "; -" toStartOfYear "; -" toString "; -" toStringCutToZero "; -" TO TABLE "; -" TOTALS "; -" toTime "; -" toTimezone "; -" toTimeZone "; -" toTypeName "; -" toUInt128 "; -" toUInt128OrNull "; -" toUInt128OrZero "; -" toUInt16 "; -" toUInt16OrNull "; -" toUInt16OrZero "; -" toUInt256 "; -" toUInt256OrNull "; -" toUInt256OrZero "; -" toUInt32 "; -" toUInt32OrNull "; -" toUInt32OrZero "; -" toUInt64 "; -" toUInt64OrNull "; -" toUInt64OrZero "; -" toUInt8 "; -" toUInt8OrNull "; -" toUInt8OrZero "; -" toUnixTimestamp "; -" toUnixTimestamp64Micro "; -" toUnixTimestamp64Milli "; -" toUnixTimestamp64Nano "; -" toUUID "; -" toUUIDOrNull "; -" toUUIDOrZero "; -" toValidUTF8 "; -" TO VOLUME "; -" toWeek "; -" toYear "; -" toYearWeek "; -" toYYYYMM "; -" toYYYYMMDD "; -" toYYYYMMDDhhmmss "; -" TRAILING "; -" transform "; -" TRIM "; -" trimBoth "; -" trimLeft "; -" trimRight "; -" trunc "; -" truncate "; -" TRUNCATE "; -" tryBase64Decode "; -" TTL "; -" tuple "; -" Tuple "; -" tupleElement "; -" tupleHammingDistance "; -" tupleToNameValuePairs "; -" TYPE "; -" ucase "; -" UInt128 "; -" UInt16 "; -" UInt256 "; -" UInt32 "; -" UInt64 "; -" UInt8 "; -" unbin "; -" unhex "; -" UNION "; -" uniq "; -" uniqCombined "; -" uniqCombined64 "; -" uniqExact "; -" uniqHLL12 "; -" uniqTheta "; -" uniqUpTo "; -" UPDATE "; -" upper "; -" upperUTF8 "; -" uptime "; -" URLHash "; -" URLHierarchy "; -" URLPathHierarchy "; -" USE "; -" user "; -" USING "; -" UUID "; -" UUIDNumToString "; -" UUIDStringToNum "; -" validateNestedArraySizes "; -" VALUES "; -" VARCHAR "; -" VARCHAR2 "; -" varPop "; -" VAR_POP "; -" varPopStable "; -" varSamp "; -" VAR_SAMP "; -" varSampStable "; -" version "; -" VIEW "; -" visibleWidth "; -" visitParamExtractBool "; -" visitParamExtractFloat "; -" visitParamExtractInt "; -" visitParamExtractRaw "; -" visitParamExtractString "; -" visitParamExtractUInt "; -" visitParamHas "; -" VOLUME "; -" WATCH "; -" week "; -" WEEK "; -" welchTTest "; -" WHEN "; -" WHERE "; -" windowFunnel "; -" WITH "; -" WITH FILL "; -" WITH TIES "; -" WK "; -" wkt "; -" wordShingleMinHash "; -" wordShingleMinHashArg "; -" wordShingleMinHashArgCaseInsensitive "; -" wordShingleMinHashArgCaseInsensitiveUTF8 "; -" wordShingleMinHashArgUTF8 "; -" wordShingleMinHashCaseInsensitive "; -" wordShingleMinHashCaseInsensitiveUTF8 "; -" wordShingleMinHashUTF8 "; -" wordShingleSimHash "; -" wordShingleSimHashCaseInsensitive "; -" wordShingleSimHashCaseInsensitiveUTF8 "; -" wordShingleSimHashUTF8 "; -" WW "; -" xor "; -" xxHash32 "; -" xxHash64 "; -" kostikConsistentHash "; -" YEAR "; -" yearweek "; -" yesterday "; -" YY "; -" YYYY "; -" zookeeperSessionUptime "; diff --git a/src/Parsers/fuzzers/codegen_fuzzer/out.cpp b/src/Parsers/fuzzers/codegen_fuzzer/out.cpp deleted file mode 100644 index 29168751d71..00000000000 --- a/src/Parsers/fuzzers/codegen_fuzzer/out.cpp +++ /dev/null @@ -1,6461 +0,0 @@ -#include -#include -#include - -#include - -#include "out.pb.h" - -void GenerateWord(const Word&, std::string&, int); - -void GenerateSentence(const Sentence& stc, std::string &s, int depth) { - for (int i = 0; i < stc.words_size(); i++ ) { - GenerateWord(stc.words(i), s, ++depth); - } -} -void GenerateWord(const Word& word, std::string &s, int depth) { - if (depth > 5) return; - - switch (word.value()) { - case 0: { - s += " "; - break; - } - case 1: { - s += " "; - break; - } - case 2: { - s += " "; - break; - } - case 3: { - s += ";"; - break; - } - case 4: { - s += "("; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ")"; - break; - } - case 5: { - s += "("; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ")"; - break; - } - case 6: { - s += "("; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ")"; - break; - } - case 7: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - break; - } - case 8: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 9: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ", "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - break; - } - case 10: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ", "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - s += ", "; - if (word.inner().words_size() > 4) GenerateWord(word.inner().words(4), s, ++depth); - break; - } - case 11: { - s += "["; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += "]"; - break; - } - case 12: { - s += "["; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += "]"; - break; - } - case 13: { - s += "["; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ", "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - s += "]"; - break; - } - case 14: { - s += "["; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ", "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - s += ", "; - if (word.inner().words_size() > 4) GenerateWord(word.inner().words(4), s, ++depth); - s += "]"; - break; - } - case 15: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += "("; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ")"; - break; - } - case 16: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += "("; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ")"; - break; - } - case 17: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += "("; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ", "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - s += ")"; - break; - } - case 18: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " as "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - break; - } - case 19: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " WHERE "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 20: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " GROUP BY "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 21: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " SORT BY "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 22: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " LIMIT "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 23: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " JOIN "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 24: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " ARRAY JOIN "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 25: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " JOIN "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += " ON "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - break; - } - case 26: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " JOIN "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += " USING "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - break; - } - case 27: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " INTO OUTFILE "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - break; - } - case 28: { - s += "WITH "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " AS "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - break; - } - case 29: { - s += "{"; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ":"; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += "}"; - break; - } - case 30: { - s += "["; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ","; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += "]"; - break; - } - case 31: { - s += "[]"; - break; - } - case 32: { - s += " x "; - break; - } - case 33: { - s += "x"; - break; - } - case 34: { - s += " `x` "; - break; - } - case 35: { - s += "`x`"; - break; - } - case 36: { - s += " \"value\" "; - break; - } - case 37: { - s += "\"value\""; - break; - } - case 38: { - s += " 0 "; - break; - } - case 39: { - s += "0"; - break; - } - case 40: { - s += "1"; - break; - } - case 41: { - s += "2"; - break; - } - case 42: { - s += "123123123123123123"; - break; - } - case 43: { - s += "182374019873401982734091873420923123123123123123"; - break; - } - case 44: { - s += "1e-1"; - break; - } - case 45: { - s += "1.1"; - break; - } - case 46: { - s += "\"\""; - break; - } - case 47: { - s += " '../../../../../../../../../etc/passwd' "; - break; - } - case 48: { - s += "/"; - break; - } - case 49: { - s += "="; - break; - } - case 50: { - s += "=="; - break; - } - case 51: { - s += "!="; - break; - } - case 52: { - s += "<>"; - break; - } - case 53: { - s += "<"; - break; - } - case 54: { - s += "<="; - break; - } - case 55: { - s += ">"; - break; - } - case 56: { - s += ">="; - break; - } - case 57: { - s += "<<"; - break; - } - case 58: { - s += "|<<"; - break; - } - case 59: { - s += "&"; - break; - } - case 60: { - s += "|"; - break; - } - case 61: { - s += "||"; - break; - } - case 62: { - s += "<|"; - break; - } - case 63: { - s += "|>"; - break; - } - case 64: { - s += "+"; - break; - } - case 65: { - s += "-"; - break; - } - case 66: { - s += "~"; - break; - } - case 67: { - s += "*"; - break; - } - case 68: { - s += "/"; - break; - } - case 69: { - s += "\\"; - break; - } - case 70: { - s += "%"; - break; - } - case 71: { - s += ""; - break; - } - case 72: { - s += "."; - break; - } - case 73: { - s += ","; - break; - } - case 74: { - s += ","; - break; - } - case 75: { - s += ","; - break; - } - case 76: { - s += ","; - break; - } - case 77: { - s += ","; - break; - } - case 78: { - s += ","; - break; - } - case 79: { - s += "("; - break; - } - case 80: { - s += ")"; - break; - } - case 81: { - s += "("; - break; - } - case 82: { - s += ")"; - break; - } - case 83: { - s += "("; - break; - } - case 84: { - s += ")"; - break; - } - case 85: { - s += "("; - break; - } - case 86: { - s += ")"; - break; - } - case 87: { - s += "("; - break; - } - case 88: { - s += ")"; - break; - } - case 89: { - s += "("; - break; - } - case 90: { - s += ")"; - break; - } - case 91: { - s += "?"; - break; - } - case 92: { - s += ":"; - break; - } - case 93: { - s += "@"; - break; - } - case 94: { - s += "@@"; - break; - } - case 95: { - s += "$"; - break; - } - case 96: { - s += "\""; - break; - } - case 97: { - s += "`"; - break; - } - case 98: { - s += "{"; - break; - } - case 99: { - s += "}"; - break; - } - case 100: { - s += "^"; - break; - } - case 101: { - s += "::"; - break; - } - case 102: { - s += "->"; - break; - } - case 103: { - s += "]"; - break; - } - case 104: { - s += "["; - break; - } - case 105: { - s += " abs "; - break; - } - case 106: { - s += " accurate_Cast "; - break; - } - case 107: { - s += " accurateCast "; - break; - } - case 108: { - s += " accurate_CastOrNull "; - break; - } - case 109: { - s += " accurateCastOrNull "; - break; - } - case 110: { - s += " acos "; - break; - } - case 111: { - s += " acosh "; - break; - } - case 112: { - s += " ADD "; - break; - } - case 113: { - s += " ADD COLUMN "; - break; - } - case 114: { - s += " ADD CONSTRAINT "; - break; - } - case 115: { - s += " addDays "; - break; - } - case 116: { - s += " addHours "; - break; - } - case 117: { - s += " ADD INDEX "; - break; - } - case 118: { - s += " addMinutes "; - break; - } - case 119: { - s += " addMonths "; - break; - } - case 120: { - s += " addQuarters "; - break; - } - case 121: { - s += " addressToLine "; - break; - } - case 122: { - s += " addressToSymbol "; - break; - } - case 123: { - s += " addSeconds "; - break; - } - case 124: { - s += " addWeeks "; - break; - } - case 125: { - s += " addYears "; - break; - } - case 126: { - s += " aes_decrypt_mysql "; - break; - } - case 127: { - s += " aes_encrypt_mysql "; - break; - } - case 128: { - s += " AFTER "; - break; - } - case 129: { - s += " AggregateFunction "; - break; - } - case 130: { - s += " aggThrow "; - break; - } - case 131: { - s += " ALIAS "; - break; - } - case 132: { - s += " ALL "; - break; - } - case 133: { - s += " alphaTokens "; - break; - } - case 134: { - s += " ALTER "; - break; - } - case 135: { - s += " ALTER LIVE VIEW "; - break; - } - case 136: { - s += " ALTER TABLE "; - break; - } - case 137: { - s += " and "; - break; - } - case 138: { - s += " AND "; - break; - } - case 139: { - s += " ANTI "; - break; - } - case 140: { - s += " any "; - break; - } - case 141: { - s += " ANY "; - break; - } - case 142: { - s += " anyHeavy "; - break; - } - case 143: { - s += " anyLast "; - break; - } - case 144: { - s += " appendTrailingCharIfAbsent "; - break; - } - case 145: { - s += " argMax "; - break; - } - case 146: { - s += " argMin "; - break; - } - case 147: { - s += " array "; - break; - } - case 148: { - s += " Array "; - break; - } - case 149: { - s += " ARRAY "; - break; - } - case 150: { - s += " arrayAll "; - break; - } - case 151: { - s += " arrayAUC "; - break; - } - case 152: { - s += " arrayAvg "; - break; - } - case 153: { - s += " arrayCompact "; - break; - } - case 154: { - s += " arrayConcat "; - break; - } - case 155: { - s += " arrayCount "; - break; - } - case 156: { - s += " arrayCumSum "; - break; - } - case 157: { - s += " arrayCumSumNonNegative "; - break; - } - case 158: { - s += " arrayDifference "; - break; - } - case 159: { - s += " arrayDistinct "; - break; - } - case 160: { - s += " arrayElement "; - break; - } - case 161: { - s += " arrayEnumerate "; - break; - } - case 162: { - s += " arrayEnumerateDense "; - break; - } - case 163: { - s += " arrayEnumerateDenseRanked "; - break; - } - case 164: { - s += " arrayEnumerateUniq "; - break; - } - case 165: { - s += " arrayEnumerateUniqRanked "; - break; - } - case 166: { - s += " arrayExists "; - break; - } - case 167: { - s += " arrayFill "; - break; - } - case 168: { - s += " arrayFilter "; - break; - } - case 169: { - s += " arrayFirst "; - break; - } - case 170: { - s += " arrayFirstIndex "; - break; - } - case 171: { - s += " arrayFlatten "; - break; - } - case 172: { - s += " arrayIntersect "; - break; - } - case 173: { - s += " arrayJoin "; - break; - } - case 174: { - s += " ARRAY JOIN "; - break; - } - case 175: { - s += " arrayMap "; - break; - } - case 176: { - s += " arrayMax "; - break; - } - case 177: { - s += " arrayMin "; - break; - } - case 178: { - s += " arrayPartialReverseSort "; - break; - } - case 179: { - s += " arrayPartialShuffle "; - break; - } - case 180: { - s += " arrayPartialSort "; - break; - } - case 181: { - s += " arrayPopBack "; - break; - } - case 182: { - s += " arrayPopFront "; - break; - } - case 183: { - s += " arrayProduct "; - break; - } - case 184: { - s += " arrayPushBack "; - break; - } - case 185: { - s += " arrayPushFront "; - break; - } - case 186: { - s += " arrayReduce "; - break; - } - case 187: { - s += " arrayReduceInRanges "; - break; - } - case 188: { - s += " arrayResize "; - break; - } - case 189: { - s += " arrayReverse "; - break; - } - case 190: { - s += " arrayReverseFill "; - break; - } - case 191: { - s += " arrayReverseSort "; - break; - } - case 192: { - s += " arrayReverseSplit "; - break; - } - case 193: { - s += " arrayShuffle "; - break; - } - case 194: { - s += " arraySlice "; - break; - } - case 195: { - s += " arraySort "; - break; - } - case 196: { - s += " arraySplit "; - break; - } - case 197: { - s += " arrayStringConcat "; - break; - } - case 198: { - s += " arraySum "; - break; - } - case 199: { - s += " arrayUniq "; - break; - } - case 200: { - s += " arrayWithConstant "; - break; - } - case 201: { - s += " arrayZip "; - break; - } - case 202: { - s += " AS "; - break; - } - case 203: { - s += " ASC "; - break; - } - case 204: { - s += " ASCENDING "; - break; - } - case 205: { - s += " asin "; - break; - } - case 206: { - s += " asinh "; - break; - } - case 207: { - s += " ASOF "; - break; - } - case 208: { - s += " assumeNotNull "; - break; - } - case 209: { - s += " AST "; - break; - } - case 210: { - s += " ASYNC "; - break; - } - case 211: { - s += " atan "; - break; - } - case 212: { - s += " atan2 "; - break; - } - case 213: { - s += " atanh "; - break; - } - case 214: { - s += " ATTACH "; - break; - } - case 215: { - s += " ATTACH PART "; - break; - } - case 216: { - s += " ATTACH PARTITION "; - break; - } - case 217: { - s += " avg "; - break; - } - case 218: { - s += " avgWeighted "; - break; - } - case 219: { - s += " bar "; - break; - } - case 220: { - s += " base64Decode "; - break; - } - case 221: { - s += " base64Encode "; - break; - } - case 222: { - s += " basename "; - break; - } - case 223: { - s += " bayesAB "; - break; - } - case 224: { - s += " BETWEEN "; - break; - } - case 225: { - s += " BIGINT "; - break; - } - case 226: { - s += " BIGINT SIGNED "; - break; - } - case 227: { - s += " BIGINT UNSIGNED "; - break; - } - case 228: { - s += " bin "; - break; - } - case 229: { - s += " BINARY "; - break; - } - case 230: { - s += " BINARY LARGE OBJECT "; - break; - } - case 231: { - s += " BINARY VARYING "; - break; - } - case 232: { - s += " bitAnd "; - break; - } - case 233: { - s += " BIT_AND "; - break; - } - case 234: { - s += " __bitBoolMaskAnd "; - break; - } - case 235: { - s += " __bitBoolMaskOr "; - break; - } - case 236: { - s += " bitCount "; - break; - } - case 237: { - s += " bitHammingDistance "; - break; - } - case 238: { - s += " bitmapAnd "; - break; - } - case 239: { - s += " bitmapAndCardinality "; - break; - } - case 240: { - s += " bitmapAndnot "; - break; - } - case 241: { - s += " bitmapAndnotCardinality "; - break; - } - case 242: { - s += " bitmapBuild "; - break; - } - case 243: { - s += " bitmapCardinality "; - break; - } - case 244: { - s += " bitmapContains "; - break; - } - case 245: { - s += " bitmapHasAll "; - break; - } - case 246: { - s += " bitmapHasAny "; - break; - } - case 247: { - s += " bitmapMax "; - break; - } - case 248: { - s += " bitmapMin "; - break; - } - case 249: { - s += " bitmapOr "; - break; - } - case 250: { - s += " bitmapOrCardinality "; - break; - } - case 251: { - s += " bitmapSubsetInRange "; - break; - } - case 252: { - s += " bitmapSubsetLimit "; - break; - } - case 253: { - s += " bitmapToArray "; - break; - } - case 254: { - s += " bitmapTransform "; - break; - } - case 255: { - s += " bitmapXor "; - break; - } - case 256: { - s += " bitmapXorCardinality "; - break; - } - case 257: { - s += " bitmaskToArray "; - break; - } - case 258: { - s += " bitmaskToList "; - break; - } - case 259: { - s += " bitNot "; - break; - } - case 260: { - s += " bitOr "; - break; - } - case 261: { - s += " BIT_OR "; - break; - } - case 262: { - s += " bitPositionsToArray "; - break; - } - case 263: { - s += " bitRotateLeft "; - break; - } - case 264: { - s += " bitRotateRight "; - break; - } - case 265: { - s += " bitShiftLeft "; - break; - } - case 266: { - s += " bitShiftRight "; - break; - } - case 267: { - s += " __bitSwapLastTwo "; - break; - } - case 268: { - s += " bitTest "; - break; - } - case 269: { - s += " bitTestAll "; - break; - } - case 270: { - s += " bitTestAny "; - break; - } - case 271: { - s += " __bitWrapperFunc "; - break; - } - case 272: { - s += " bitXor "; - break; - } - case 273: { - s += " BIT_XOR "; - break; - } - case 274: { - s += " BLOB "; - break; - } - case 275: { - s += " blockNumber "; - break; - } - case 276: { - s += " blockSerializedSize "; - break; - } - case 277: { - s += " blockSize "; - break; - } - case 278: { - s += " BOOL "; - break; - } - case 279: { - s += " BOOLEAN "; - break; - } - case 280: { - s += " BOTH "; - break; - } - case 281: { - s += " boundingRatio "; - break; - } - case 282: { - s += " buildId "; - break; - } - case 283: { - s += " BY "; - break; - } - case 284: { - s += " BYTE "; - break; - } - case 285: { - s += " BYTEA "; - break; - } - case 286: { - s += " byteSize "; - break; - } - case 287: { - s += " CASE "; - break; - } - case 288: { - s += " caseWithExpr "; - break; - } - case 289: { - s += " caseWithExpression "; - break; - } - case 290: { - s += " caseWithoutExpr "; - break; - } - case 291: { - s += " caseWithoutExpression "; - break; - } - case 292: { - s += " _CAST "; - break; - } - case 293: { - s += " CAST "; - break; - } - case 294: { - s += " categoricalInformationValue "; - break; - } - case 295: { - s += " cbrt "; - break; - } - case 296: { - s += " ceil "; - break; - } - case 297: { - s += " ceiling "; - break; - } - case 298: { - s += " char "; - break; - } - case 299: { - s += " CHAR "; - break; - } - case 300: { - s += " CHARACTER "; - break; - } - case 301: { - s += " CHARACTER LARGE OBJECT "; - break; - } - case 302: { - s += " CHARACTER_LENGTH "; - break; - } - case 303: { - s += " CHARACTER VARYING "; - break; - } - case 304: { - s += " CHAR LARGE OBJECT "; - break; - } - case 305: { - s += " CHAR_LENGTH "; - break; - } - case 306: { - s += " CHAR VARYING "; - break; - } - case 307: { - s += " CHECK "; - break; - } - case 308: { - s += " CHECK TABLE "; - break; - } - case 309: { - s += " cityHash64 "; - break; - } - case 310: { - s += " CLEAR "; - break; - } - case 311: { - s += " CLEAR COLUMN "; - break; - } - case 312: { - s += " CLEAR INDEX "; - break; - } - case 313: { - s += " CLOB "; - break; - } - case 314: { - s += " CLUSTER "; - break; - } - case 315: { - s += " coalesce "; - break; - } - case 316: { - s += " CODEC "; - break; - } - case 317: { - s += " COLLATE "; - break; - } - case 318: { - s += " COLUMN "; - break; - } - case 319: { - s += " COLUMNS "; - break; - } - case 320: { - s += " COMMENT "; - break; - } - case 321: { - s += " COMMENT COLUMN "; - break; - } - case 322: { - s += " concat "; - break; - } - case 323: { - s += " concatAssumeInjective "; - break; - } - case 324: { - s += " connection_id "; - break; - } - case 325: { - s += " connectionid "; - break; - } - case 326: { - s += " connectionId "; - break; - } - case 327: { - s += " CONSTRAINT "; - break; - } - case 328: { - s += " convertCharset "; - break; - } - case 329: { - s += " corr "; - break; - } - case 330: { - s += " corrStable "; - break; - } - case 331: { - s += " cos "; - break; - } - case 332: { - s += " cosh "; - break; - } - case 333: { - s += " count "; - break; - } - case 334: { - s += " countDigits "; - break; - } - case 335: { - s += " countEqual "; - break; - } - case 336: { - s += " countMatches "; - break; - } - case 337: { - s += " countMatchesCaseInsensitive "; - break; - } - case 338: { - s += " countSubstrings "; - break; - } - case 339: { - s += " countSubstringsCaseInsensitive "; - break; - } - case 340: { - s += " countSubstringsCaseInsensitiveUTF8 "; - break; - } - case 341: { - s += " covarPop "; - break; - } - case 342: { - s += " COVAR_POP "; - break; - } - case 343: { - s += " covarPopStable "; - break; - } - case 344: { - s += " covarSamp "; - break; - } - case 345: { - s += " COVAR_SAMP "; - break; - } - case 346: { - s += " covarSampStable "; - break; - } - case 347: { - s += " CRC32 "; - break; - } - case 348: { - s += " CRC32IEEE "; - break; - } - case 349: { - s += " CRC64 "; - break; - } - case 350: { - s += " CREATE "; - break; - } - case 351: { - s += " CROSS "; - break; - } - case 352: { - s += " CUBE "; - break; - } - case 353: { - s += " currentDatabase "; - break; - } - case 354: { - s += " currentProfiles "; - break; - } - case 355: { - s += " currentRoles "; - break; - } - case 356: { - s += " currentUser "; - break; - } - case 357: { - s += " cutFragment "; - break; - } - case 358: { - s += " cutIPv6 "; - break; - } - case 359: { - s += " cutQueryString "; - break; - } - case 360: { - s += " cutQueryStringAndFragment "; - break; - } - case 361: { - s += " cutToFirstSignificantSubdomain "; - break; - } - case 362: { - s += " cutToFirstSignificantSubdomainCustom "; - break; - } - case 363: { - s += " cutToFirstSignificantSubdomainCustomWithWWW "; - break; - } - case 364: { - s += " cutToFirstSignificantSubdomainWithWWW "; - break; - } - case 365: { - s += " cutURLParameter "; - break; - } - case 366: { - s += " cutWWW "; - break; - } - case 367: { - s += " D "; - break; - } - case 368: { - s += " DATABASE "; - break; - } - case 369: { - s += " DATABASES "; - break; - } - case 370: { - s += " Date "; - break; - } - case 371: { - s += " DATE "; - break; - } - case 372: { - s += " Date32 "; - break; - } - case 373: { - s += " DATE_ADD "; - break; - } - case 374: { - s += " DATEADD "; - break; - } - case 375: { - s += " dateDiff "; - break; - } - case 376: { - s += " DATE_DIFF "; - break; - } - case 377: { - s += " DATEDIFF "; - break; - } - case 378: { - s += " dateName "; - break; - } - case 379: { - s += " DATE_SUB "; - break; - } - case 380: { - s += " DATESUB "; - break; - } - case 381: { - s += " DateTime "; - break; - } - case 382: { - s += " DateTime32 "; - break; - } - case 383: { - s += " DateTime64 "; - break; - } - case 384: { - s += " dateTime64ToSnowflake "; - break; - } - case 385: { - s += " dateTimeToSnowflake "; - break; - } - case 386: { - s += " date_trunc "; - break; - } - case 387: { - s += " dateTrunc "; - break; - } - case 388: { - s += " DAY "; - break; - } - case 389: { - s += " DAYOFMONTH "; - break; - } - case 390: { - s += " DAYOFWEEK "; - break; - } - case 391: { - s += " DAYOFYEAR "; - break; - } - case 392: { - s += " DD "; - break; - } - case 393: { - s += " DEC "; - break; - } - case 394: { - s += " Decimal "; - break; - } - case 395: { - s += " Decimal128 "; - break; - } - case 396: { - s += " Decimal256 "; - break; - } - case 397: { - s += " Decimal32 "; - break; - } - case 398: { - s += " Decimal64 "; - break; - } - case 399: { - s += " decodeURLComponent "; - break; - } - case 400: { - s += " decodeXMLComponent "; - break; - } - case 401: { - s += " decrypt "; - break; - } - case 402: { - s += " DEDUPLICATE "; - break; - } - case 403: { - s += " DEFAULT "; - break; - } - case 404: { - s += " defaultProfiles "; - break; - } - case 405: { - s += " defaultRoles "; - break; - } - case 406: { - s += " defaultValueOfArgumentType "; - break; - } - case 407: { - s += " defaultValueOfTypeName "; - break; - } - case 408: { - s += " DELAY "; - break; - } - case 409: { - s += " DELETE "; - break; - } - case 410: { - s += " DELETE WHERE "; - break; - } - case 411: { - s += " deltaSum "; - break; - } - case 412: { - s += " deltaSumTimestamp "; - break; - } - case 413: { - s += " demangle "; - break; - } - case 414: { - s += " dense_rank "; - break; - } - case 415: { - s += " DESC "; - break; - } - case 416: { - s += " DESCENDING "; - break; - } - case 417: { - s += " DESCRIBE "; - break; - } - case 418: { - s += " DETACH "; - break; - } - case 419: { - s += " DETACH PARTITION "; - break; - } - case 420: { - s += " dictGet "; - break; - } - case 421: { - s += " dictGetChildren "; - break; - } - case 422: { - s += " dictGetDate "; - break; - } - case 423: { - s += " dictGetDateOrDefault "; - break; - } - case 424: { - s += " dictGetDateTime "; - break; - } - case 425: { - s += " dictGetDateTimeOrDefault "; - break; - } - case 426: { - s += " dictGetDescendants "; - break; - } - case 427: { - s += " dictGetFloat32 "; - break; - } - case 428: { - s += " dictGetFloat32OrDefault "; - break; - } - case 429: { - s += " dictGetFloat64 "; - break; - } - case 430: { - s += " dictGetFloat64OrDefault "; - break; - } - case 431: { - s += " dictGetHierarchy "; - break; - } - case 432: { - s += " dictGetInt16 "; - break; - } - case 433: { - s += " dictGetInt16OrDefault "; - break; - } - case 434: { - s += " dictGetInt32 "; - break; - } - case 435: { - s += " dictGetInt32OrDefault "; - break; - } - case 436: { - s += " dictGetInt64 "; - break; - } - case 437: { - s += " dictGetInt64OrDefault "; - break; - } - case 438: { - s += " dictGetInt8 "; - break; - } - case 439: { - s += " dictGetInt8OrDefault "; - break; - } - case 440: { - s += " dictGetOrDefault "; - break; - } - case 441: { - s += " dictGetOrNull "; - break; - } - case 442: { - s += " dictGetString "; - break; - } - case 443: { - s += " dictGetStringOrDefault "; - break; - } - case 444: { - s += " dictGetUInt16 "; - break; - } - case 445: { - s += " dictGetUInt16OrDefault "; - break; - } - case 446: { - s += " dictGetUInt32 "; - break; - } - case 447: { - s += " dictGetUInt32OrDefault "; - break; - } - case 448: { - s += " dictGetUInt64 "; - break; - } - case 449: { - s += " dictGetUInt64OrDefault "; - break; - } - case 450: { - s += " dictGetUInt8 "; - break; - } - case 451: { - s += " dictGetUInt8OrDefault "; - break; - } - case 452: { - s += " dictGetUUID "; - break; - } - case 453: { - s += " dictGetUUIDOrDefault "; - break; - } - case 454: { - s += " dictHas "; - break; - } - case 455: { - s += " DICTIONARIES "; - break; - } - case 456: { - s += " DICTIONARY "; - break; - } - case 457: { - s += " dictIsIn "; - break; - } - case 458: { - s += " DISK "; - break; - } - case 459: { - s += " DISTINCT "; - break; - } - case 460: { - s += " DISTRIBUTED "; - break; - } - case 461: { - s += " divide "; - break; - } - case 462: { - s += " domain "; - break; - } - case 463: { - s += " domainWithoutWWW "; - break; - } - case 464: { - s += " DOUBLE "; - break; - } - case 465: { - s += " DOUBLE PRECISION "; - break; - } - case 466: { - s += " DROP "; - break; - } - case 467: { - s += " DROP COLUMN "; - break; - } - case 468: { - s += " DROP CONSTRAINT "; - break; - } - case 469: { - s += " DROP DETACHED PART "; - break; - } - case 470: { - s += " DROP DETACHED PARTITION "; - break; - } - case 471: { - s += " DROP INDEX "; - break; - } - case 472: { - s += " DROP PARTITION "; - break; - } - case 473: { - s += " dumpColumnStructure "; - break; - } - case 474: { - s += " e "; - break; - } - case 475: { - s += " ELSE "; - break; - } - case 476: { - s += " empty "; - break; - } - case 477: { - s += " emptyArrayDate "; - break; - } - case 478: { - s += " emptyArrayDateTime "; - break; - } - case 479: { - s += " emptyArrayFloat32 "; - break; - } - case 480: { - s += " emptyArrayFloat64 "; - break; - } - case 481: { - s += " emptyArrayInt16 "; - break; - } - case 482: { - s += " emptyArrayInt32 "; - break; - } - case 483: { - s += " emptyArrayInt64 "; - break; - } - case 484: { - s += " emptyArrayInt8 "; - break; - } - case 485: { - s += " emptyArrayString "; - break; - } - case 486: { - s += " emptyArrayToSingle "; - break; - } - case 487: { - s += " emptyArrayUInt16 "; - break; - } - case 488: { - s += " emptyArrayUInt32 "; - break; - } - case 489: { - s += " emptyArrayUInt64 "; - break; - } - case 490: { - s += " emptyArrayUInt8 "; - break; - } - case 491: { - s += " enabledProfiles "; - break; - } - case 492: { - s += " enabledRoles "; - break; - } - case 493: { - s += " encodeXMLComponent "; - break; - } - case 494: { - s += " encrypt "; - break; - } - case 495: { - s += " END "; - break; - } - case 496: { - s += " endsWith "; - break; - } - case 497: { - s += " ENGINE "; - break; - } - case 498: { - s += " entropy "; - break; - } - case 499: { - s += " Enum "; - break; - } - case 500: { - s += " ENUM "; - break; - } - case 501: { - s += " Enum16 "; - break; - } - case 502: { - s += " Enum8 "; - break; - } - case 503: { - s += " equals "; - break; - } - case 504: { - s += " erf "; - break; - } - case 505: { - s += " erfc "; - break; - } - case 506: { - s += " errorCodeToName "; - break; - } - case 507: { - s += " evalMLMethod "; - break; - } - case 508: { - s += " EVENTS "; - break; - } - case 509: { - s += " EXCHANGE TABLES "; - break; - } - case 510: { - s += " EXISTS "; - break; - } - case 511: { - s += " exp "; - break; - } - case 512: { - s += " exp10 "; - break; - } - case 513: { - s += " exp2 "; - break; - } - case 514: { - s += " EXPLAIN "; - break; - } - case 515: { - s += " exponentialMovingAverage "; - break; - } - case 516: { - s += " EXPRESSION "; - break; - } - case 517: { - s += " extract "; - break; - } - case 518: { - s += " EXTRACT "; - break; - } - case 519: { - s += " extractAll "; - break; - } - case 520: { - s += " extractAllGroups "; - break; - } - case 521: { - s += " extractAllGroupsHorizontal "; - break; - } - case 522: { - s += " extractAllGroupsVertical "; - break; - } - case 523: { - s += " extractGroups "; - break; - } - case 524: { - s += " extractTextFromHTML "; - break; - } - case 525: { - s += " extractURLParameter "; - break; - } - case 526: { - s += " extractURLParameterNames "; - break; - } - case 527: { - s += " extractURLParameters "; - break; - } - case 528: { - s += " farmFingerprint64 "; - break; - } - case 529: { - s += " farmHash64 "; - break; - } - case 530: { - s += " FETCHES "; - break; - } - case 531: { - s += " FETCH PART "; - break; - } - case 532: { - s += " FETCH PARTITION "; - break; - } - case 533: { - s += " file "; - break; - } - case 534: { - s += " filesystemAvailable "; - break; - } - case 535: { - s += " filesystemCapacity "; - break; - } - case 536: { - s += " filesystemFree "; - break; - } - case 537: { - s += " FINAL "; - break; - } - case 538: { - s += " finalizeAggregation "; - break; - } - case 539: { - s += " FIRST "; - break; - } - case 540: { - s += " firstSignificantSubdomain "; - break; - } - case 541: { - s += " firstSignificantSubdomainCustom "; - break; - } - case 542: { - s += " first_value "; - break; - } - case 543: { - s += " FIXED "; - break; - } - case 544: { - s += " FixedString "; - break; - } - case 545: { - s += " flatten "; - break; - } - case 546: { - s += " FLOAT "; - break; - } - case 547: { - s += " Float32 "; - break; - } - case 548: { - s += " Float64 "; - break; - } - case 549: { - s += " floor "; - break; - } - case 550: { - s += " FLUSH "; - break; - } - case 551: { - s += " FOR "; - break; - } - case 552: { - s += " ForEach "; - break; - } - case 553: { - s += " format "; - break; - } - case 554: { - s += " FORMAT "; - break; - } - case 555: { - s += " formatDateTime "; - break; - } - case 556: { - s += " formatReadableQuantity "; - break; - } - case 557: { - s += " formatReadableDecimalSize "; - break; - } - case 558: { - s += " formatReadableSize "; - break; - } - case 559: { - s += " formatReadableTimeDelta "; - break; - } - case 560: { - s += " formatRow "; - break; - } - case 561: { - s += " formatRowNoNewline "; - break; - } - case 562: { - s += " FQDN "; - break; - } - case 563: { - s += " fragment "; - break; - } - case 564: { - s += " FREEZE "; - break; - } - case 565: { - s += " FROM "; - break; - } - case 566: { - s += " FROM_BASE64 "; - break; - } - case 567: { - s += " fromModifiedJulianDay "; - break; - } - case 568: { - s += " fromModifiedJulianDayOrNull "; - break; - } - case 569: { - s += " FROM_UNIXTIME "; - break; - } - case 570: { - s += " fromUnixTimestamp "; - break; - } - case 571: { - s += " fromUnixTimestamp64Micro "; - break; - } - case 572: { - s += " fromUnixTimestamp64Milli "; - break; - } - case 573: { - s += " fromUnixTimestamp64Nano "; - break; - } - case 574: { - s += " FULL "; - break; - } - case 575: { - s += " fullHostName "; - break; - } - case 576: { - s += " FUNCTION "; - break; - } - case 577: { - s += " fuzzBits "; - break; - } - case 578: { - s += " gccMurmurHash "; - break; - } - case 579: { - s += " gcd "; - break; - } - case 580: { - s += " generateUUIDv4 "; - break; - } - case 581: { - s += " geoDistance "; - break; - } - case 582: { - s += " geohashDecode "; - break; - } - case 583: { - s += " geohashEncode "; - break; - } - case 584: { - s += " geohashesInBox "; - break; - } - case 585: { - s += " geoToH3 "; - break; - } - case 586: { - s += " geoToS2 "; - break; - } - case 587: { - s += " getMacro "; - break; - } - case 588: { - s += " __getScalar "; - break; - } - case 589: { - s += " getServerPort "; - break; - } - case 590: { - s += " getSetting "; - break; - } - case 591: { - s += " getSizeOfEnumType "; - break; - } - case 592: { - s += " GLOBAL "; - break; - } - case 593: { - s += " globalIn "; - break; - } - case 594: { - s += " globalInIgnoreSet "; - break; - } - case 595: { - s += " globalNotIn "; - break; - } - case 596: { - s += " globalNotInIgnoreSet "; - break; - } - case 597: { - s += " globalNotNullIn "; - break; - } - case 598: { - s += " globalNotNullInIgnoreSet "; - break; - } - case 599: { - s += " globalNullIn "; - break; - } - case 600: { - s += " globalNullInIgnoreSet "; - break; - } - case 601: { - s += " globalVariable "; - break; - } - case 602: { - s += " GRANULARITY "; - break; - } - case 603: { - s += " greatCircleAngle "; - break; - } - case 604: { - s += " greatCircleDistance "; - break; - } - case 605: { - s += " greater "; - break; - } - case 606: { - s += " greaterOrEquals "; - break; - } - case 607: { - s += " greatest "; - break; - } - case 608: { - s += " GROUP "; - break; - } - case 609: { - s += " groupArray "; - break; - } - case 610: { - s += " groupArrayInsertAt "; - break; - } - case 611: { - s += " groupArrayMovingAvg "; - break; - } - case 612: { - s += " groupArrayMovingSum "; - break; - } - case 613: { - s += " groupArraySample "; - break; - } - case 614: { - s += " groupBitAnd "; - break; - } - case 615: { - s += " groupBitmap "; - break; - } - case 616: { - s += " groupBitmapAnd "; - break; - } - case 617: { - s += " groupBitmapOr "; - break; - } - case 618: { - s += " groupBitmapXor "; - break; - } - case 619: { - s += " groupBitOr "; - break; - } - case 620: { - s += " groupBitXor "; - break; - } - case 621: { - s += " GROUP BY "; - break; - } - case 622: { - s += " groupUniqArray "; - break; - } - case 623: { - s += " h3EdgeAngle "; - break; - } - case 624: { - s += " h3EdgeLengthM "; - break; - } - case 625: { - s += " h3GetBaseCell "; - break; - } - case 626: { - s += " h3GetFaces "; - break; - } - case 627: { - s += " h3GetResolution "; - break; - } - case 628: { - s += " h3HexAreaM2 "; - break; - } - case 629: { - s += " h3IndexesAreNeighbors "; - break; - } - case 630: { - s += " h3IsPentagon "; - break; - } - case 631: { - s += " h3IsResClassIII "; - break; - } - case 632: { - s += " h3IsValid "; - break; - } - case 633: { - s += " h3kRing "; - break; - } - case 634: { - s += " h3ToChildren "; - break; - } - case 635: { - s += " h3ToGeo "; - break; - } - case 636: { - s += " h3ToGeoBoundary "; - break; - } - case 637: { - s += " h3ToParent "; - break; - } - case 638: { - s += " h3ToString "; - break; - } - case 639: { - s += " halfMD5 "; - break; - } - case 640: { - s += " has "; - break; - } - case 641: { - s += " hasAll "; - break; - } - case 642: { - s += " hasAny "; - break; - } - case 643: { - s += " hasColumnInTable "; - break; - } - case 644: { - s += " hasSubstr "; - break; - } - case 645: { - s += " hasThreadFuzzer "; - break; - } - case 646: { - s += " hasToken "; - break; - } - case 647: { - s += " hasTokenCaseInsensitive "; - break; - } - case 648: { - s += " HAVING "; - break; - } - case 649: { - s += " hex "; - break; - } - case 650: { - s += " HH "; - break; - } - case 651: { - s += " HIERARCHICAL "; - break; - } - case 652: { - s += " histogram "; - break; - } - case 653: { - s += " hiveHash "; - break; - } - case 654: { - s += " hostname "; - break; - } - case 655: { - s += " hostName "; - break; - } - case 656: { - s += " HOUR "; - break; - } - case 657: { - s += " hypot "; - break; - } - case 658: { - s += " ID "; - break; - } - case 659: { - s += " identity "; - break; - } - case 660: { - s += " if "; - break; - } - case 661: { - s += " IF "; - break; - } - case 662: { - s += " IF EXISTS "; - break; - } - case 663: { - s += " IF NOT EXISTS "; - break; - } - case 664: { - s += " ifNotFinite "; - break; - } - case 665: { - s += " ifNull "; - break; - } - case 666: { - s += " ignore "; - break; - } - case 667: { - s += " ilike "; - break; - } - case 668: { - s += " ILIKE "; - break; - } - case 669: { - s += " in "; - break; - } - case 670: { - s += " IN "; - break; - } - case 671: { - s += " INDEX "; - break; - } - case 672: { - s += " indexHint "; - break; - } - case 673: { - s += " indexOf "; - break; - } - case 674: { - s += " INET4 "; - break; - } - case 675: { - s += " INET6 "; - break; - } - case 676: { - s += " INET6_ATON "; - break; - } - case 677: { - s += " INET6_NTOA "; - break; - } - case 678: { - s += " INET_ATON "; - break; - } - case 679: { - s += " INET_NTOA "; - break; - } - case 680: { - s += " INF "; - break; - } - case 681: { - s += " inIgnoreSet "; - break; - } - case 682: { - s += " initializeAggregation "; - break; - } - case 683: { - s += " initial_query_id "; - break; - } - case 684: { - s += " initialQueryID "; - break; - } - case 685: { - s += " INJECTIVE "; - break; - } - case 686: { - s += " INNER "; - break; - } - case 687: { - s += " IN PARTITION "; - break; - } - case 688: { - s += " INSERT "; - break; - } - case 689: { - s += " INSERT INTO "; - break; - } - case 690: { - s += " INT "; - break; - } - case 691: { - s += " INT1 "; - break; - } - case 692: { - s += " Int128 "; - break; - } - case 693: { - s += " Int16 "; - break; - } - case 694: { - s += " INT1 SIGNED "; - break; - } - case 695: { - s += " INT1 UNSIGNED "; - break; - } - case 696: { - s += " Int256 "; - break; - } - case 697: { - s += " Int32 "; - break; - } - case 698: { - s += " Int64 "; - break; - } - case 699: { - s += " Int8 "; - break; - } - case 700: { - s += " intDiv "; - break; - } - case 701: { - s += " intDivOrZero "; - break; - } - case 702: { - s += " INTEGER "; - break; - } - case 703: { - s += " INTEGER SIGNED "; - break; - } - case 704: { - s += " INTEGER UNSIGNED "; - break; - } - case 705: { - s += " INTERVAL "; - break; - } - case 706: { - s += " IntervalDay "; - break; - } - case 707: { - s += " IntervalHour "; - break; - } - case 708: { - s += " intervalLengthSum "; - break; - } - case 709: { - s += " IntervalMinute "; - break; - } - case 710: { - s += " IntervalMonth "; - break; - } - case 711: { - s += " IntervalQuarter "; - break; - } - case 712: { - s += " IntervalSecond "; - break; - } - case 713: { - s += " IntervalWeek "; - break; - } - case 714: { - s += " IntervalYear "; - break; - } - case 715: { - s += " intExp10 "; - break; - } - case 716: { - s += " intExp2 "; - break; - } - case 717: { - s += " intHash32 "; - break; - } - case 718: { - s += " intHash64 "; - break; - } - case 719: { - s += " INTO "; - break; - } - case 720: { - s += " INTO OUTFILE "; - break; - } - case 721: { - s += " INT SIGNED "; - break; - } - case 722: { - s += " INT UNSIGNED "; - break; - } - case 723: { - s += " IPv4 "; - break; - } - case 724: { - s += " IPv4CIDRToRange "; - break; - } - case 725: { - s += " IPv4NumToString "; - break; - } - case 726: { - s += " IPv4NumToStringClassC "; - break; - } - case 727: { - s += " IPv4StringToNum "; - break; - } - case 728: { - s += " IPv4ToIPv6 "; - break; - } - case 729: { - s += " IPv6 "; - break; - } - case 730: { - s += " IPv6CIDRToRange "; - break; - } - case 731: { - s += " IPv6NumToString "; - break; - } - case 732: { - s += " IPv6StringToNum "; - break; - } - case 733: { - s += " IS "; - break; - } - case 734: { - s += " isConstant "; - break; - } - case 735: { - s += " isDecimalOverflow "; - break; - } - case 736: { - s += " isFinite "; - break; - } - case 737: { - s += " isInfinite "; - break; - } - case 738: { - s += " isIPAddressInRange "; - break; - } - case 739: { - s += " isIPv4String "; - break; - } - case 740: { - s += " isIPv6String "; - break; - } - case 741: { - s += " isNaN "; - break; - } - case 742: { - s += " isNotNull "; - break; - } - case 743: { - s += " isNull "; - break; - } - case 744: { - s += " IS_OBJECT_ID "; - break; - } - case 745: { - s += " isValidJSON "; - break; - } - case 746: { - s += " isValidUTF8 "; - break; - } - case 747: { - s += " isZeroOrNull "; - break; - } - case 748: { - s += " javaHash "; - break; - } - case 749: { - s += " javaHashUTF16LE "; - break; - } - case 750: { - s += " JOIN "; - break; - } - case 751: { - s += " joinGet "; - break; - } - case 752: { - s += " joinGetOrNull "; - break; - } - case 753: { - s += " JSON_EXISTS "; - break; - } - case 754: { - s += " JSONExtract "; - break; - } - case 755: { - s += " JSONExtractArrayRaw "; - break; - } - case 756: { - s += " JSONExtractBool "; - break; - } - case 757: { - s += " JSONExtractFloat "; - break; - } - case 758: { - s += " JSONExtractInt "; - break; - } - case 759: { - s += " JSONExtractKeysAndValues "; - break; - } - case 760: { - s += " JSONExtractKeysAndValuesRaw "; - break; - } - case 761: { - s += " JSONExtractKeys "; - break; - } - case 762: { - s += " JSONExtractRaw "; - break; - } - case 763: { - s += " JSONExtractString "; - break; - } - case 764: { - s += " JSONExtractUInt "; - break; - } - case 765: { - s += " JSONHas "; - break; - } - case 766: { - s += " JSONKey "; - break; - } - case 767: { - s += " JSONLength "; - break; - } - case 768: { - s += " JSON_QUERY "; - break; - } - case 769: { - s += " JSONType "; - break; - } - case 770: { - s += " JSON_VALUE "; - break; - } - case 771: { - s += " jumpConsistentHash "; - break; - } - case 772: { - s += " KEY "; - break; - } - case 773: { - s += " KILL "; - break; - } - case 774: { - s += " kurtPop "; - break; - } - case 775: { - s += " kurtSamp "; - break; - } - case 776: { - s += " lagInFrame "; - break; - } - case 777: { - s += " LAST "; - break; - } - case 778: { - s += " last_value "; - break; - } - case 779: { - s += " LAYOUT "; - break; - } - case 780: { - s += " lcase "; - break; - } - case 781: { - s += " lcm "; - break; - } - case 782: { - s += " leadInFrame "; - break; - } - case 783: { - s += " LEADING "; - break; - } - case 784: { - s += " least "; - break; - } - case 785: { - s += " LEFT "; - break; - } - case 786: { - s += " LEFT ARRAY JOIN "; - break; - } - case 787: { - s += " leftPad "; - break; - } - case 788: { - s += " leftPadUTF8 "; - break; - } - case 789: { - s += " lemmatize "; - break; - } - case 790: { - s += " length "; - break; - } - case 791: { - s += " lengthUTF8 "; - break; - } - case 792: { - s += " less "; - break; - } - case 793: { - s += " lessOrEquals "; - break; - } - case 794: { - s += " lgamma "; - break; - } - case 795: { - s += " LIFETIME "; - break; - } - case 796: { - s += " like "; - break; - } - case 797: { - s += " LIKE "; - break; - } - case 798: { - s += " LIMIT "; - break; - } - case 799: { - s += " LIVE "; - break; - } - case 800: { - s += " ln "; - break; - } - case 801: { - s += " LOCAL "; - break; - } - case 802: { - s += " locate "; - break; - } - case 803: { - s += " log "; - break; - } - case 804: { - s += " log10 "; - break; - } - case 805: { - s += " log1p "; - break; - } - case 806: { - s += " log2 "; - break; - } - case 807: { - s += " LOGS "; - break; - } - case 808: { - s += " logTrace "; - break; - } - case 809: { - s += " LONGBLOB "; - break; - } - case 810: { - s += " LONGTEXT "; - break; - } - case 811: { - s += " LowCardinality "; - break; - } - case 812: { - s += " lowCardinalityIndices "; - break; - } - case 813: { - s += " lowCardinalityKeys "; - break; - } - case 814: { - s += " lower "; - break; - } - case 815: { - s += " lowerUTF8 "; - break; - } - case 816: { - s += " lpad "; - break; - } - case 817: { - s += " LTRIM "; - break; - } - case 818: { - s += " M "; - break; - } - case 819: { - s += " MACNumToString "; - break; - } - case 820: { - s += " MACStringToNum "; - break; - } - case 821: { - s += " MACStringToOUI "; - break; - } - case 822: { - s += " mannWhitneyUTest "; - break; - } - case 823: { - s += " map "; - break; - } - case 824: { - s += " Map "; - break; - } - case 825: { - s += " mapAdd "; - break; - } - case 826: { - s += " mapContains "; - break; - } - case 827: { - s += " mapKeys "; - break; - } - case 828: { - s += " mapPopulateSeries "; - break; - } - case 829: { - s += " mapSubtract "; - break; - } - case 830: { - s += " mapValues "; - break; - } - case 831: { - s += " match "; - break; - } - case 832: { - s += " materialize "; - break; - } - case 833: { - s += " MATERIALIZE "; - break; - } - case 834: { - s += " MATERIALIZED "; - break; - } - case 835: { - s += " MATERIALIZE INDEX "; - break; - } - case 836: { - s += " MATERIALIZE TTL "; - break; - } - case 837: { - s += " max "; - break; - } - case 838: { - s += " MAX "; - break; - } - case 839: { - s += " maxIntersections "; - break; - } - case 840: { - s += " maxIntersectionsPosition "; - break; - } - case 841: { - s += " maxMap "; - break; - } - case 842: { - s += " MD4 "; - break; - } - case 843: { - s += " MD5 "; - break; - } - case 844: { - s += " median "; - break; - } - case 845: { - s += " medianBFloat16 "; - break; - } - case 846: { - s += " medianBFloat16Weighted "; - break; - } - case 847: { - s += " medianDeterministic "; - break; - } - case 848: { - s += " medianExact "; - break; - } - case 849: { - s += " medianExactHigh "; - break; - } - case 850: { - s += " medianExactLow "; - break; - } - case 851: { - s += " medianExactWeighted "; - break; - } - case 852: { - s += " medianTDigest "; - break; - } - case 853: { - s += " medianTDigestWeighted "; - break; - } - case 854: { - s += " medianTiming "; - break; - } - case 855: { - s += " medianTimingWeighted "; - break; - } - case 856: { - s += " MEDIUMBLOB "; - break; - } - case 857: { - s += " MEDIUMINT "; - break; - } - case 858: { - s += " MEDIUMINT SIGNED "; - break; - } - case 859: { - s += " MEDIUMINT UNSIGNED "; - break; - } - case 860: { - s += " MEDIUMTEXT "; - break; - } - case 861: { - s += " Merge "; - break; - } - case 862: { - s += " MERGES "; - break; - } - case 863: { - s += " metroHash64 "; - break; - } - case 864: { - s += " MI "; - break; - } - case 865: { - s += " mid "; - break; - } - case 866: { - s += " min "; - break; - } - case 867: { - s += " MIN "; - break; - } - case 868: { - s += " minMap "; - break; - } - case 869: { - s += " minus "; - break; - } - case 870: { - s += " MINUTE "; - break; - } - case 871: { - s += " MM "; - break; - } - case 872: { - s += " mod "; - break; - } - case 873: { - s += " MODIFY "; - break; - } - case 874: { - s += " MODIFY COLUMN "; - break; - } - case 875: { - s += " MODIFY ORDER BY "; - break; - } - case 876: { - s += " MODIFY QUERY "; - break; - } - case 877: { - s += " MODIFY SETTING "; - break; - } - case 878: { - s += " MODIFY TTL "; - break; - } - case 879: { - s += " modulo "; - break; - } - case 880: { - s += " moduloLegacy "; - break; - } - case 881: { - s += " moduloOrZero "; - break; - } - case 882: { - s += " MONTH "; - break; - } - case 883: { - s += " MOVE "; - break; - } - case 884: { - s += " MOVE PART "; - break; - } - case 885: { - s += " MOVE PARTITION "; - break; - } - case 886: { - s += " movingXXX "; - break; - } - case 887: { - s += " multiFuzzyMatchAllIndices "; - break; - } - case 888: { - s += " multiFuzzyMatchAny "; - break; - } - case 889: { - s += " multiFuzzyMatchAnyIndex "; - break; - } - case 890: { - s += " multiIf "; - break; - } - case 891: { - s += " multiMatchAllIndices "; - break; - } - case 892: { - s += " multiMatchAny "; - break; - } - case 893: { - s += " multiMatchAnyIndex "; - break; - } - case 894: { - s += " multiply "; - break; - } - case 895: { - s += " MultiPolygon "; - break; - } - case 896: { - s += " multiSearchAllPositions "; - break; - } - case 897: { - s += " multiSearchAllPositionsCaseInsensitive "; - break; - } - case 898: { - s += " multiSearchAllPositionsCaseInsensitiveUTF8 "; - break; - } - case 899: { - s += " multiSearchAllPositionsUTF8 "; - break; - } - case 900: { - s += " multiSearchAny "; - break; - } - case 901: { - s += " multiSearchAnyCaseInsensitive "; - break; - } - case 902: { - s += " multiSearchAnyCaseInsensitiveUTF8 "; - break; - } - case 903: { - s += " multiSearchAnyUTF8 "; - break; - } - case 904: { - s += " multiSearchFirstIndex "; - break; - } - case 905: { - s += " multiSearchFirstIndexCaseInsensitive "; - break; - } - case 906: { - s += " multiSearchFirstIndexCaseInsensitiveUTF8 "; - break; - } - case 907: { - s += " multiSearchFirstIndexUTF8 "; - break; - } - case 908: { - s += " multiSearchFirstPosition "; - break; - } - case 909: { - s += " multiSearchFirstPositionCaseInsensitive "; - break; - } - case 910: { - s += " multiSearchFirstPositionCaseInsensitiveUTF8 "; - break; - } - case 911: { - s += " multiSearchFirstPositionUTF8 "; - break; - } - case 912: { - s += " murmurHash2_32 "; - break; - } - case 913: { - s += " murmurHash2_64 "; - break; - } - case 914: { - s += " murmurHash3_128 "; - break; - } - case 915: { - s += " murmurHash3_32 "; - break; - } - case 916: { - s += " murmurHash3_64 "; - break; - } - case 917: { - s += " MUTATION "; - break; - } - case 918: { - s += " N "; - break; - } - case 919: { - s += " NAME "; - break; - } - case 920: { - s += " NAN_SQL "; - break; - } - case 921: { - s += " NATIONAL CHAR "; - break; - } - case 922: { - s += " NATIONAL CHARACTER "; - break; - } - case 923: { - s += " NATIONAL CHARACTER LARGE OBJECT "; - break; - } - case 924: { - s += " NATIONAL CHARACTER VARYING "; - break; - } - case 925: { - s += " NATIONAL CHAR VARYING "; - break; - } - case 926: { - s += " NCHAR "; - break; - } - case 927: { - s += " NCHAR LARGE OBJECT "; - break; - } - case 928: { - s += " NCHAR VARYING "; - break; - } - case 929: { - s += " negate "; - break; - } - case 930: { - s += " neighbor "; - break; - } - case 931: { - s += " Nested "; - break; - } - case 932: { - s += " netloc "; - break; - } - case 933: { - s += " ngramDistance "; - break; - } - case 934: { - s += " ngramDistanceCaseInsensitive "; - break; - } - case 935: { - s += " ngramDistanceCaseInsensitiveUTF8 "; - break; - } - case 936: { - s += " ngramDistanceUTF8 "; - break; - } - case 937: { - s += " ngramMinHash "; - break; - } - case 938: { - s += " ngramMinHashArg "; - break; - } - case 939: { - s += " ngramMinHashArgCaseInsensitive "; - break; - } - case 940: { - s += " ngramMinHashArgCaseInsensitiveUTF8 "; - break; - } - case 941: { - s += " ngramMinHashArgUTF8 "; - break; - } - case 942: { - s += " ngramMinHashCaseInsensitive "; - break; - } - case 943: { - s += " ngramMinHashCaseInsensitiveUTF8 "; - break; - } - case 944: { - s += " ngramMinHashUTF8 "; - break; - } - case 945: { - s += " ngramSearch "; - break; - } - case 946: { - s += " ngramSearchCaseInsensitive "; - break; - } - case 947: { - s += " ngramSearchCaseInsensitiveUTF8 "; - break; - } - case 948: { - s += " ngramSearchUTF8 "; - break; - } - case 949: { - s += " ngramSimHash "; - break; - } - case 950: { - s += " ngramSimHashCaseInsensitive "; - break; - } - case 951: { - s += " ngramSimHashCaseInsensitiveUTF8 "; - break; - } - case 952: { - s += " ngramSimHashUTF8 "; - break; - } - case 953: { - s += " NO "; - break; - } - case 954: { - s += " NO DELAY "; - break; - } - case 955: { - s += " NONE "; - break; - } - case 956: { - s += " normalizedQueryHash "; - break; - } - case 957: { - s += " normalizedQueryHashKeepNames "; - break; - } - case 958: { - s += " normalizeQuery "; - break; - } - case 959: { - s += " normalizeQueryKeepNames "; - break; - } - case 960: { - s += " not "; - break; - } - case 961: { - s += " NOT "; - break; - } - case 962: { - s += " notEmpty "; - break; - } - case 963: { - s += " notEquals "; - break; - } - case 964: { - s += " nothing "; - break; - } - case 965: { - s += " Nothing "; - break; - } - case 966: { - s += " notILike "; - break; - } - case 967: { - s += " notIn "; - break; - } - case 968: { - s += " notInIgnoreSet "; - break; - } - case 969: { - s += " notLike "; - break; - } - case 970: { - s += " notNullIn "; - break; - } - case 971: { - s += " notNullInIgnoreSet "; - break; - } - case 972: { - s += " now "; - break; - } - case 973: { - s += " now64 "; - break; - } - case 974: { - s += " Null "; - break; - } - case 975: { - s += " Nullable "; - break; - } - case 976: { - s += " nullIf "; - break; - } - case 977: { - s += " nullIn "; - break; - } - case 978: { - s += " nullInIgnoreSet "; - break; - } - case 979: { - s += " NULLS "; - break; - } - case 980: { - s += " NULL_SQL "; - break; - } - case 981: { - s += " NUMERIC "; - break; - } - case 982: { - s += " NVARCHAR "; - break; - } - case 983: { - s += " OFFSET "; - break; - } - case 984: { - s += " ON "; - break; - } - case 985: { - s += " ONLY "; - break; - } - case 986: { - s += " OPTIMIZE "; - break; - } - case 987: { - s += " OPTIMIZE TABLE "; - break; - } - case 988: { - s += " or "; - break; - } - case 989: { - s += " OR "; - break; - } - case 990: { - s += " ORDER "; - break; - } - case 991: { - s += " ORDER BY "; - break; - } - case 992: { - s += " OR REPLACE "; - break; - } - case 993: { - s += " OUTER "; - break; - } - case 994: { - s += " OUTFILE "; - break; - } - case 995: { - s += " parseDateTime32BestEffort "; - break; - } - case 996: { - s += " parseDateTime32BestEffortOrNull "; - break; - } - case 997: { - s += " parseDateTime32BestEffortOrZero "; - break; - } - case 998: { - s += " parseDateTime64BestEffort "; - break; - } - case 999: { - s += " parseDateTime64BestEffortOrNull "; - break; - } - case 1000: { - s += " parseDateTime64BestEffortOrZero "; - break; - } - case 1001: { - s += " parseDateTimeBestEffort "; - break; - } - case 1002: { - s += " parseDateTimeBestEffortOrNull "; - break; - } - case 1003: { - s += " parseDateTimeBestEffortOrZero "; - break; - } - case 1004: { - s += " parseDateTimeBestEffortUS "; - break; - } - case 1005: { - s += " parseDateTimeBestEffortUSOrNull "; - break; - } - case 1006: { - s += " parseDateTimeBestEffortUSOrZero "; - break; - } - case 1007: { - s += " parseTimeDelta "; - break; - } - case 1008: { - s += " PARTITION "; - break; - } - case 1009: { - s += " PARTITION BY "; - break; - } - case 1010: { - s += " partitionId "; - break; - } - case 1011: { - s += " path "; - break; - } - case 1012: { - s += " pathFull "; - break; - } - case 1013: { - s += " pi "; - break; - } - case 1014: { - s += " plus "; - break; - } - case 1015: { - s += " Point "; - break; - } - case 1016: { - s += " pointInEllipses "; - break; - } - case 1017: { - s += " pointInPolygon "; - break; - } - case 1018: { - s += " Polygon "; - break; - } - case 1019: { - s += " polygonAreaCartesian "; - break; - } - case 1020: { - s += " polygonAreaSpherical "; - break; - } - case 1021: { - s += " polygonConvexHullCartesian "; - break; - } - case 1022: { - s += " polygonPerimeterCartesian "; - break; - } - case 1023: { - s += " polygonPerimeterSpherical "; - break; - } - case 1024: { - s += " polygonsDistanceCartesian "; - break; - } - case 1025: { - s += " polygonsDistanceSpherical "; - break; - } - case 1026: { - s += " polygonsEqualsCartesian "; - break; - } - case 1027: { - s += " polygonsIntersectionCartesian "; - break; - } - case 1028: { - s += " polygonsIntersectionSpherical "; - break; - } - case 1029: { - s += " polygonsSymDifferenceCartesian "; - break; - } - case 1030: { - s += " polygonsSymDifferenceSpherical "; - break; - } - case 1031: { - s += " polygonsUnionCartesian "; - break; - } - case 1032: { - s += " polygonsUnionSpherical "; - break; - } - case 1033: { - s += " polygonsWithinCartesian "; - break; - } - case 1034: { - s += " polygonsWithinSpherical "; - break; - } - case 1035: { - s += " POPULATE "; - break; - } - case 1036: { - s += " port "; - break; - } - case 1037: { - s += " position "; - break; - } - case 1038: { - s += " positionCaseInsensitive "; - break; - } - case 1039: { - s += " positionCaseInsensitiveUTF8 "; - break; - } - case 1040: { - s += " positionUTF8 "; - break; - } - case 1041: { - s += " pow "; - break; - } - case 1042: { - s += " power "; - break; - } - case 1043: { - s += " PREWHERE "; - break; - } - case 1044: { - s += " PRIMARY "; - break; - } - case 1045: { - s += " PRIMARY KEY "; - break; - } - case 1046: { - s += " PROJECTION "; - break; - } - case 1047: { - s += " protocol "; - break; - } - case 1048: { - s += " Q "; - break; - } - case 1049: { - s += " QQ "; - break; - } - case 1050: { - s += " quantile "; - break; - } - case 1051: { - s += " quantileBFloat16 "; - break; - } - case 1052: { - s += " quantileBFloat16Weighted "; - break; - } - case 1053: { - s += " quantileDeterministic "; - break; - } - case 1054: { - s += " quantileExact "; - break; - } - case 1055: { - s += " quantileExactExclusive "; - break; - } - case 1056: { - s += " quantileExactHigh "; - break; - } - case 1057: { - s += " quantileExactInclusive "; - break; - } - case 1058: { - s += " quantileExactLow "; - break; - } - case 1059: { - s += " quantileExactWeighted "; - break; - } - case 1060: { - s += " quantiles "; - break; - } - case 1061: { - s += " quantilesBFloat16 "; - break; - } - case 1062: { - s += " quantilesBFloat16Weighted "; - break; - } - case 1063: { - s += " quantilesDeterministic "; - break; - } - case 1064: { - s += " quantilesExact "; - break; - } - case 1065: { - s += " quantilesExactExclusive "; - break; - } - case 1066: { - s += " quantilesExactHigh "; - break; - } - case 1067: { - s += " quantilesExactInclusive "; - break; - } - case 1068: { - s += " quantilesExactLow "; - break; - } - case 1069: { - s += " quantilesExactWeighted "; - break; - } - case 1070: { - s += " quantilesTDigest "; - break; - } - case 1071: { - s += " quantilesTDigestWeighted "; - break; - } - case 1072: { - s += " quantilesTiming "; - break; - } - case 1073: { - s += " quantilesTimingWeighted "; - break; - } - case 1074: { - s += " quantileTDigest "; - break; - } - case 1075: { - s += " quantileTDigestWeighted "; - break; - } - case 1076: { - s += " quantileTiming "; - break; - } - case 1077: { - s += " quantileTimingWeighted "; - break; - } - case 1078: { - s += " QUARTER "; - break; - } - case 1079: { - s += " query_id "; - break; - } - case 1080: { - s += " queryID "; - break; - } - case 1081: { - s += " queryString "; - break; - } - case 1082: { - s += " queryStringAndFragment "; - break; - } - case 1083: { - s += " rand "; - break; - } - case 1084: { - s += " rand32 "; - break; - } - case 1085: { - s += " rand64 "; - break; - } - case 1086: { - s += " randConstant "; - break; - } - case 1087: { - s += " randomFixedString "; - break; - } - case 1088: { - s += " randomPrintableASCII "; - break; - } - case 1089: { - s += " randomString "; - break; - } - case 1090: { - s += " randomStringUTF8 "; - break; - } - case 1091: { - s += " range "; - break; - } - case 1092: { - s += " RANGE "; - break; - } - case 1093: { - s += " rank "; - break; - } - case 1094: { - s += " rankCorr "; - break; - } - case 1095: { - s += " readWKTMultiPolygon "; - break; - } - case 1096: { - s += " readWKTPoint "; - break; - } - case 1097: { - s += " readWKTPolygon "; - break; - } - case 1098: { - s += " readWKTRing "; - break; - } - case 1099: { - s += " REAL "; - break; - } - case 1100: { - s += " REFRESH "; - break; - } - case 1101: { - s += " regexpQuoteMeta "; - break; - } - case 1102: { - s += " regionHierarchy "; - break; - } - case 1103: { - s += " regionIn "; - break; - } - case 1104: { - s += " regionToArea "; - break; - } - case 1105: { - s += " regionToCity "; - break; - } - case 1106: { - s += " regionToContinent "; - break; - } - case 1107: { - s += " regionToCountry "; - break; - } - case 1108: { - s += " regionToDistrict "; - break; - } - case 1109: { - s += " regionToName "; - break; - } - case 1110: { - s += " regionToPopulation "; - break; - } - case 1111: { - s += " regionToTopContinent "; - break; - } - case 1112: { - s += " reinterpret "; - break; - } - case 1113: { - s += " reinterpretAsDate "; - break; - } - case 1114: { - s += " reinterpretAsDateTime "; - break; - } - case 1115: { - s += " reinterpretAsFixedString "; - break; - } - case 1116: { - s += " reinterpretAsFloat32 "; - break; - } - case 1117: { - s += " reinterpretAsFloat64 "; - break; - } - case 1118: { - s += " reinterpretAsInt128 "; - break; - } - case 1119: { - s += " reinterpretAsInt16 "; - break; - } - case 1120: { - s += " reinterpretAsInt256 "; - break; - } - case 1121: { - s += " reinterpretAsInt32 "; - break; - } - case 1122: { - s += " reinterpretAsInt64 "; - break; - } - case 1123: { - s += " reinterpretAsInt8 "; - break; - } - case 1124: { - s += " reinterpretAsString "; - break; - } - case 1125: { - s += " reinterpretAsUInt128 "; - break; - } - case 1126: { - s += " reinterpretAsUInt16 "; - break; - } - case 1127: { - s += " reinterpretAsUInt256 "; - break; - } - case 1128: { - s += " reinterpretAsUInt32 "; - break; - } - case 1129: { - s += " reinterpretAsUInt64 "; - break; - } - case 1130: { - s += " reinterpretAsUInt8 "; - break; - } - case 1131: { - s += " reinterpretAsUUID "; - break; - } - case 1132: { - s += " RELOAD "; - break; - } - case 1133: { - s += " REMOVE "; - break; - } - case 1134: { - s += " RENAME "; - break; - } - case 1135: { - s += " RENAME COLUMN "; - break; - } - case 1136: { - s += " RENAME TABLE "; - break; - } - case 1137: { - s += " repeat "; - break; - } - case 1138: { - s += " replace "; - break; - } - case 1139: { - s += " REPLACE "; - break; - } - case 1140: { - s += " replaceAll "; - break; - } - case 1141: { - s += " replaceOne "; - break; - } - case 1142: { - s += " REPLACE PARTITION "; - break; - } - case 1143: { - s += " replaceRegexpAll "; - break; - } - case 1144: { - s += " replaceRegexpOne "; - break; - } - case 1145: { - s += " REPLICA "; - break; - } - case 1146: { - s += " replicate "; - break; - } - case 1147: { - s += " REPLICATED "; - break; - } - case 1148: { - s += " Resample "; - break; - } - case 1149: { - s += " RESUME "; - break; - } - case 1150: { - s += " retention "; - break; - } - case 1151: { - s += " reverse "; - break; - } - case 1152: { - s += " reverseUTF8 "; - break; - } - case 1153: { - s += " RIGHT "; - break; - } - case 1154: { - s += " rightPad "; - break; - } - case 1155: { - s += " rightPadUTF8 "; - break; - } - case 1156: { - s += " Ring "; - break; - } - case 1157: { - s += " ROLLUP "; - break; - } - case 1158: { - s += " round "; - break; - } - case 1159: { - s += " roundAge "; - break; - } - case 1160: { - s += " roundBankers "; - break; - } - case 1161: { - s += " roundDown "; - break; - } - case 1162: { - s += " roundDuration "; - break; - } - case 1163: { - s += " roundToExp2 "; - break; - } - case 1164: { - s += " row_number "; - break; - } - case 1165: { - s += " rowNumberInAllBlocks "; - break; - } - case 1166: { - s += " rowNumberInBlock "; - break; - } - case 1167: { - s += " rpad "; - break; - } - case 1168: { - s += " RTRIM "; - break; - } - case 1169: { - s += " runningAccumulate "; - break; - } - case 1170: { - s += " runningConcurrency "; - break; - } - case 1171: { - s += " runningDifference "; - break; - } - case 1172: { - s += " runningDifferenceStartingWithFirstValue "; - break; - } - case 1173: { - s += " S "; - break; - } - case 1174: { - s += " s2CapContains "; - break; - } - case 1175: { - s += " s2CapUnion "; - break; - } - case 1176: { - s += " s2CellsIntersect "; - break; - } - case 1177: { - s += " s2GetNeighbors "; - break; - } - case 1178: { - s += " s2RectAdd "; - break; - } - case 1179: { - s += " s2RectContains "; - break; - } - case 1180: { - s += " s2RectIntersection "; - break; - } - case 1181: { - s += " s2RectUnion "; - break; - } - case 1182: { - s += " s2ToGeo "; - break; - } - case 1183: { - s += " SAMPLE "; - break; - } - case 1184: { - s += " SAMPLE BY "; - break; - } - case 1185: { - s += " SECOND "; - break; - } - case 1186: { - s += " SELECT "; - break; - } - case 1187: { - s += " SEMI "; - break; - } - case 1188: { - s += " SENDS "; - break; - } - case 1189: { - s += " sequenceCount "; - break; - } - case 1190: { - s += " sequenceMatch "; - break; - } - case 1191: { - s += " sequenceNextNode "; - break; - } - case 1192: { - s += " serverUUID "; - break; - } - case 1193: { - s += " SET "; - break; - } - case 1194: { - s += " SETTINGS "; - break; - } - case 1195: { - s += " SHA1 "; - break; - } - case 1196: { - s += " SHA224 "; - break; - } - case 1197: { - s += " SHA256 "; - break; - } - case 1198: { - s += " SHA384 "; - break; - } - case 1199: { - s += " SHA512 "; - break; - } - case 1200: { - s += " shardCount "; - break; - } - case 1201: { - s += " shardNum "; - break; - } - case 1202: { - s += " SHOW "; - break; - } - case 1203: { - s += " SHOW PROCESSLIST "; - break; - } - case 1204: { - s += " sigmoid "; - break; - } - case 1205: { - s += " sign "; - break; - } - case 1206: { - s += " SimpleAggregateFunction "; - break; - } - case 1207: { - s += " simpleJSONExtractBool "; - break; - } - case 1208: { - s += " simpleJSONExtractFloat "; - break; - } - case 1209: { - s += " simpleJSONExtractInt "; - break; - } - case 1210: { - s += " simpleJSONExtractRaw "; - break; - } - case 1211: { - s += " simpleJSONExtractString "; - break; - } - case 1212: { - s += " simpleJSONExtractUInt "; - break; - } - case 1213: { - s += " simpleJSONHas "; - break; - } - case 1214: { - s += " simpleLinearRegression "; - break; - } - case 1215: { - s += " sin "; - break; - } - case 1216: { - s += " SINGLE "; - break; - } - case 1217: { - s += " singleValueOrNull "; - break; - } - case 1218: { - s += " sinh "; - break; - } - case 1219: { - s += " sipHash128 "; - break; - } - case 1220: { - s += " sipHash64 "; - break; - } - case 1221: { - s += " skewPop "; - break; - } - case 1222: { - s += " skewSamp "; - break; - } - case 1223: { - s += " sleep "; - break; - } - case 1224: { - s += " sleepEachRow "; - break; - } - case 1225: { - s += " SMALLINT "; - break; - } - case 1226: { - s += " SMALLINT SIGNED "; - break; - } - case 1227: { - s += " SMALLINT UNSIGNED "; - break; - } - case 1228: { - s += " snowflakeToDateTime "; - break; - } - case 1229: { - s += " snowflakeToDateTime64 "; - break; - } - case 1230: { - s += " SOURCE "; - break; - } - case 1231: { - s += " sparkbar "; - break; - } - case 1232: { - s += " splitByChar "; - break; - } - case 1233: { - s += " splitByNonAlpha "; - break; - } - case 1234: { - s += " splitByRegexp "; - break; - } - case 1235: { - s += " splitByString "; - break; - } - case 1236: { - s += " splitByWhitespace "; - break; - } - case 1237: { - s += " SQL_TSI_DAY "; - break; - } - case 1238: { - s += " SQL_TSI_HOUR "; - break; - } - case 1239: { - s += " SQL_TSI_MINUTE "; - break; - } - case 1240: { - s += " SQL_TSI_MONTH "; - break; - } - case 1241: { - s += " SQL_TSI_QUARTER "; - break; - } - case 1242: { - s += " SQL_TSI_SECOND "; - break; - } - case 1243: { - s += " SQL_TSI_WEEK "; - break; - } - case 1244: { - s += " SQL_TSI_YEAR "; - break; - } - case 1245: { - s += " sqrt "; - break; - } - case 1246: { - s += " SS "; - break; - } - case 1247: { - s += " START "; - break; - } - case 1248: { - s += " startsWith "; - break; - } - case 1249: { - s += " State "; - break; - } - case 1250: { - s += " stddevPop "; - break; - } - case 1251: { - s += " STDDEV_POP "; - break; - } - case 1252: { - s += " stddevPopStable "; - break; - } - case 1253: { - s += " stddevSamp "; - break; - } - case 1254: { - s += " STDDEV_SAMP "; - break; - } - case 1255: { - s += " stddevSampStable "; - break; - } - case 1256: { - s += " stem "; - break; - } - case 1257: { - s += " STEP "; - break; - } - case 1258: { - s += " stochasticLinearRegression "; - break; - } - case 1259: { - s += " stochasticLogisticRegression "; - break; - } - case 1260: { - s += " STOP "; - break; - } - case 1261: { - s += " String "; - break; - } - case 1262: { - s += " stringToH3 "; - break; - } - case 1263: { - s += " studentTTest "; - break; - } - case 1264: { - s += " subBitmap "; - break; - } - case 1265: { - s += " substr "; - break; - } - case 1266: { - s += " substring "; - break; - } - case 1267: { - s += " SUBSTRING "; - break; - } - case 1268: { - s += " substringUTF8 "; - break; - } - case 1269: { - s += " subtractDays "; - break; - } - case 1270: { - s += " subtractHours "; - break; - } - case 1271: { - s += " subtractMinutes "; - break; - } - case 1272: { - s += " subtractMonths "; - break; - } - case 1273: { - s += " subtractQuarters "; - break; - } - case 1274: { - s += " subtractSeconds "; - break; - } - case 1275: { - s += " subtractWeeks "; - break; - } - case 1276: { - s += " subtractYears "; - break; - } - case 1277: { - s += " sum "; - break; - } - case 1278: { - s += " sumCount "; - break; - } - case 1279: { - s += " sumKahan "; - break; - } - case 1280: { - s += " sumMap "; - break; - } - case 1281: { - s += " sumMapFiltered "; - break; - } - case 1282: { - s += " sumMapFilteredWithOverflow "; - break; - } - case 1283: { - s += " sumMapWithOverflow "; - break; - } - case 1284: { - s += " sumWithOverflow "; - break; - } - case 1285: { - s += " SUSPEND "; - break; - } - case 1286: { - s += " svg "; - break; - } - case 1287: { - s += " SVG "; - break; - } - case 1288: { - s += " SYNC "; - break; - } - case 1289: { - s += " synonyms "; - break; - } - case 1290: { - s += " SYNTAX "; - break; - } - case 1291: { - s += " SYSTEM "; - break; - } - case 1292: { - s += " TABLE "; - break; - } - case 1293: { - s += " TABLES "; - break; - } - case 1294: { - s += " tan "; - break; - } - case 1295: { - s += " tanh "; - break; - } - case 1296: { - s += " tcpPort "; - break; - } - case 1297: { - s += " TEMPORARY "; - break; - } - case 1298: { - s += " TEST "; - break; - } - case 1299: { - s += " TEXT "; - break; - } - case 1300: { - s += " tgamma "; - break; - } - case 1301: { - s += " THEN "; - break; - } - case 1302: { - s += " throwIf "; - break; - } - case 1303: { - s += " tid "; - break; - } - case 1304: { - s += " TIES "; - break; - } - case 1305: { - s += " TIMEOUT "; - break; - } - case 1306: { - s += " timeSlot "; - break; - } - case 1307: { - s += " timeSlots "; - break; - } - case 1308: { - s += " TIMESTAMP "; - break; - } - case 1309: { - s += " TIMESTAMP_ADD "; - break; - } - case 1310: { - s += " TIMESTAMPADD "; - break; - } - case 1311: { - s += " TIMESTAMP_DIFF "; - break; - } - case 1312: { - s += " TIMESTAMPDIFF "; - break; - } - case 1313: { - s += " TIMESTAMP_SUB "; - break; - } - case 1314: { - s += " TIMESTAMPSUB "; - break; - } - case 1315: { - s += " timezone "; - break; - } - case 1316: { - s += " timeZone "; - break; - } - case 1317: { - s += " timezoneOf "; - break; - } - case 1318: { - s += " timeZoneOf "; - break; - } - case 1319: { - s += " timezoneOffset "; - break; - } - case 1320: { - s += " timeZoneOffset "; - break; - } - case 1321: { - s += " TINYBLOB "; - break; - } - case 1322: { - s += " TINYINT "; - break; - } - case 1323: { - s += " TINYINT SIGNED "; - break; - } - case 1324: { - s += " TINYINT UNSIGNED "; - break; - } - case 1325: { - s += " TINYTEXT "; - break; - } - case 1326: { - s += " TO "; - break; - } - case 1327: { - s += " TO_BASE64 "; - break; - } - case 1328: { - s += " toColumnTypeName "; - break; - } - case 1329: { - s += " toDate "; - break; - } - case 1330: { - s += " toDate32 "; - break; - } - case 1331: { - s += " toDate32OrNull "; - break; - } - case 1332: { - s += " toDate32OrZero "; - break; - } - case 1333: { - s += " toDateOrNull "; - break; - } - case 1334: { - s += " toDateOrZero "; - break; - } - case 1335: { - s += " toDateTime "; - break; - } - case 1336: { - s += " toDateTime32 "; - break; - } - case 1337: { - s += " toDateTime64 "; - break; - } - case 1338: { - s += " toDateTime64OrNull "; - break; - } - case 1339: { - s += " toDateTime64OrZero "; - break; - } - case 1340: { - s += " toDateTimeOrNull "; - break; - } - case 1341: { - s += " toDateTimeOrZero "; - break; - } - case 1342: { - s += " today "; - break; - } - case 1343: { - s += " toDayOfMonth "; - break; - } - case 1344: { - s += " toDayOfWeek "; - break; - } - case 1345: { - s += " toDayOfYear "; - break; - } - case 1346: { - s += " toDecimal128 "; - break; - } - case 1347: { - s += " toDecimal128OrNull "; - break; - } - case 1348: { - s += " toDecimal128OrZero "; - break; - } - case 1349: { - s += " toDecimal256 "; - break; - } - case 1350: { - s += " toDecimal256OrNull "; - break; - } - case 1351: { - s += " toDecimal256OrZero "; - break; - } - case 1352: { - s += " toDecimal32 "; - break; - } - case 1353: { - s += " toDecimal32OrNull "; - break; - } - case 1354: { - s += " toDecimal32OrZero "; - break; - } - case 1355: { - s += " toDecimal64 "; - break; - } - case 1356: { - s += " toDecimal64OrNull "; - break; - } - case 1357: { - s += " toDecimal64OrZero "; - break; - } - case 1358: { - s += " TO DISK "; - break; - } - case 1359: { - s += " toFixedString "; - break; - } - case 1360: { - s += " toFloat32 "; - break; - } - case 1361: { - s += " toFloat32OrNull "; - break; - } - case 1362: { - s += " toFloat32OrZero "; - break; - } - case 1363: { - s += " toFloat64 "; - break; - } - case 1364: { - s += " toFloat64OrNull "; - break; - } - case 1365: { - s += " toFloat64OrZero "; - break; - } - case 1366: { - s += " toHour "; - break; - } - case 1367: { - s += " toInt128 "; - break; - } - case 1368: { - s += " toInt128OrNull "; - break; - } - case 1369: { - s += " toInt128OrZero "; - break; - } - case 1370: { - s += " toInt16 "; - break; - } - case 1371: { - s += " toInt16OrNull "; - break; - } - case 1372: { - s += " toInt16OrZero "; - break; - } - case 1373: { - s += " toInt256 "; - break; - } - case 1374: { - s += " toInt256OrNull "; - break; - } - case 1375: { - s += " toInt256OrZero "; - break; - } - case 1376: { - s += " toInt32 "; - break; - } - case 1377: { - s += " toInt32OrNull "; - break; - } - case 1378: { - s += " toInt32OrZero "; - break; - } - case 1379: { - s += " toInt64 "; - break; - } - case 1380: { - s += " toInt64OrNull "; - break; - } - case 1381: { - s += " toInt64OrZero "; - break; - } - case 1382: { - s += " toInt8 "; - break; - } - case 1383: { - s += " toInt8OrNull "; - break; - } - case 1384: { - s += " toInt8OrZero "; - break; - } - case 1385: { - s += " toIntervalDay "; - break; - } - case 1386: { - s += " toIntervalHour "; - break; - } - case 1387: { - s += " toIntervalMinute "; - break; - } - case 1388: { - s += " toIntervalMonth "; - break; - } - case 1389: { - s += " toIntervalQuarter "; - break; - } - case 1390: { - s += " toIntervalSecond "; - break; - } - case 1391: { - s += " toIntervalWeek "; - break; - } - case 1392: { - s += " toIntervalYear "; - break; - } - case 1393: { - s += " toIPv4 "; - break; - } - case 1394: { - s += " toIPv6 "; - break; - } - case 1395: { - s += " toISOWeek "; - break; - } - case 1396: { - s += " toISOYear "; - break; - } - case 1397: { - s += " toJSONString "; - break; - } - case 1398: { - s += " toLowCardinality "; - break; - } - case 1399: { - s += " toMinute "; - break; - } - case 1400: { - s += " toModifiedJulianDay "; - break; - } - case 1401: { - s += " toModifiedJulianDayOrNull "; - break; - } - case 1402: { - s += " toMonday "; - break; - } - case 1403: { - s += " toMonth "; - break; - } - case 1404: { - s += " toNullable "; - break; - } - case 1405: { - s += " TOP "; - break; - } - case 1406: { - s += " topK "; - break; - } - case 1407: { - s += " topKWeighted "; - break; - } - case 1408: { - s += " topLevelDomain "; - break; - } - case 1409: { - s += " toQuarter "; - break; - } - case 1410: { - s += " toRelativeDayNum "; - break; - } - case 1411: { - s += " toRelativeHourNum "; - break; - } - case 1412: { - s += " toRelativeMinuteNum "; - break; - } - case 1413: { - s += " toRelativeMonthNum "; - break; - } - case 1414: { - s += " toRelativeQuarterNum "; - break; - } - case 1415: { - s += " toRelativeSecondNum "; - break; - } - case 1416: { - s += " toRelativeWeekNum "; - break; - } - case 1417: { - s += " toRelativeYearNum "; - break; - } - case 1418: { - s += " toSecond "; - break; - } - case 1419: { - s += " toStartOfDay "; - break; - } - case 1420: { - s += " toStartOfFifteenMinutes "; - break; - } - case 1421: { - s += " toStartOfFiveMinutes "; - break; - } - case 1422: { - s += " toStartOfHour "; - break; - } - case 1423: { - s += " toStartOfInterval "; - break; - } - case 1424: { - s += " toStartOfISOYear "; - break; - } - case 1425: { - s += " toStartOfMinute "; - break; - } - case 1426: { - s += " toStartOfMonth "; - break; - } - case 1427: { - s += " toStartOfQuarter "; - break; - } - case 1428: { - s += " toStartOfSecond "; - break; - } - case 1429: { - s += " toStartOfTenMinutes "; - break; - } - case 1430: { - s += " toStartOfWeek "; - break; - } - case 1431: { - s += " toStartOfYear "; - break; - } - case 1432: { - s += " toString "; - break; - } - case 1433: { - s += " toStringCutToZero "; - break; - } - case 1434: { - s += " TO TABLE "; - break; - } - case 1435: { - s += " TOTALS "; - break; - } - case 1436: { - s += " toTime "; - break; - } - case 1437: { - s += " toTimezone "; - break; - } - case 1438: { - s += " toTimeZone "; - break; - } - case 1439: { - s += " toTypeName "; - break; - } - case 1440: { - s += " toUInt128 "; - break; - } - case 1441: { - s += " toUInt128OrNull "; - break; - } - case 1442: { - s += " toUInt128OrZero "; - break; - } - case 1443: { - s += " toUInt16 "; - break; - } - case 1444: { - s += " toUInt16OrNull "; - break; - } - case 1445: { - s += " toUInt16OrZero "; - break; - } - case 1446: { - s += " toUInt256 "; - break; - } - case 1447: { - s += " toUInt256OrNull "; - break; - } - case 1448: { - s += " toUInt256OrZero "; - break; - } - case 1449: { - s += " toUInt32 "; - break; - } - case 1450: { - s += " toUInt32OrNull "; - break; - } - case 1451: { - s += " toUInt32OrZero "; - break; - } - case 1452: { - s += " toUInt64 "; - break; - } - case 1453: { - s += " toUInt64OrNull "; - break; - } - case 1454: { - s += " toUInt64OrZero "; - break; - } - case 1455: { - s += " toUInt8 "; - break; - } - case 1456: { - s += " toUInt8OrNull "; - break; - } - case 1457: { - s += " toUInt8OrZero "; - break; - } - case 1458: { - s += " toUnixTimestamp "; - break; - } - case 1459: { - s += " toUnixTimestamp64Micro "; - break; - } - case 1460: { - s += " toUnixTimestamp64Milli "; - break; - } - case 1461: { - s += " toUnixTimestamp64Nano "; - break; - } - case 1462: { - s += " toUUID "; - break; - } - case 1463: { - s += " toUUIDOrNull "; - break; - } - case 1464: { - s += " toUUIDOrZero "; - break; - } - case 1465: { - s += " toValidUTF8 "; - break; - } - case 1466: { - s += " TO VOLUME "; - break; - } - case 1467: { - s += " toWeek "; - break; - } - case 1468: { - s += " toYear "; - break; - } - case 1469: { - s += " toYearWeek "; - break; - } - case 1470: { - s += " toYYYYMM "; - break; - } - case 1471: { - s += " toYYYYMMDD "; - break; - } - case 1472: { - s += " toYYYYMMDDhhmmss "; - break; - } - case 1473: { - s += " TRAILING "; - break; - } - case 1474: { - s += " transform "; - break; - } - case 1475: { - s += " TRIM "; - break; - } - case 1476: { - s += " trimBoth "; - break; - } - case 1477: { - s += " trimLeft "; - break; - } - case 1478: { - s += " trimRight "; - break; - } - case 1479: { - s += " trunc "; - break; - } - case 1480: { - s += " truncate "; - break; - } - case 1481: { - s += " TRUNCATE "; - break; - } - case 1482: { - s += " tryBase64Decode "; - break; - } - case 1483: { - s += " TTL "; - break; - } - case 1484: { - s += " tuple "; - break; - } - case 1485: { - s += " Tuple "; - break; - } - case 1486: { - s += " tupleElement "; - break; - } - case 1487: { - s += " tupleHammingDistance "; - break; - } - case 1488: { - s += " tupleToNameValuePairs "; - break; - } - case 1489: { - s += " TYPE "; - break; - } - case 1490: { - s += " ucase "; - break; - } - case 1491: { - s += " UInt128 "; - break; - } - case 1492: { - s += " UInt16 "; - break; - } - case 1493: { - s += " UInt256 "; - break; - } - case 1494: { - s += " UInt32 "; - break; - } - case 1495: { - s += " UInt64 "; - break; - } - case 1496: { - s += " UInt8 "; - break; - } - case 1497: { - s += " unbin "; - break; - } - case 1498: { - s += " unhex "; - break; - } - case 1499: { - s += " UNION "; - break; - } - case 1500: { - s += " uniq "; - break; - } - case 1501: { - s += " uniqCombined "; - break; - } - case 1502: { - s += " uniqCombined64 "; - break; - } - case 1503: { - s += " uniqExact "; - break; - } - case 1504: { - s += " uniqHLL12 "; - break; - } - case 1505: { - s += " uniqTheta "; - break; - } - case 1506: { - s += " uniqUpTo "; - break; - } - case 1507: { - s += " UPDATE "; - break; - } - case 1508: { - s += " upper "; - break; - } - case 1509: { - s += " upperUTF8 "; - break; - } - case 1510: { - s += " uptime "; - break; - } - case 1511: { - s += " URLHash "; - break; - } - case 1512: { - s += " URLHierarchy "; - break; - } - case 1513: { - s += " URLPathHierarchy "; - break; - } - case 1514: { - s += " USE "; - break; - } - case 1515: { - s += " user "; - break; - } - case 1516: { - s += " USING "; - break; - } - case 1517: { - s += " UUID "; - break; - } - case 1518: { - s += " UUIDNumToString "; - break; - } - case 1519: { - s += " UUIDStringToNum "; - break; - } - case 1520: { - s += " validateNestedArraySizes "; - break; - } - case 1521: { - s += " VALUES "; - break; - } - case 1522: { - s += " VARCHAR "; - break; - } - case 1523: { - s += " VARCHAR2 "; - break; - } - case 1524: { - s += " varPop "; - break; - } - case 1525: { - s += " VAR_POP "; - break; - } - case 1526: { - s += " varPopStable "; - break; - } - case 1527: { - s += " varSamp "; - break; - } - case 1528: { - s += " VAR_SAMP "; - break; - } - case 1529: { - s += " varSampStable "; - break; - } - case 1530: { - s += " version "; - break; - } - case 1531: { - s += " VIEW "; - break; - } - case 1532: { - s += " visibleWidth "; - break; - } - case 1533: { - s += " visitParamExtractBool "; - break; - } - case 1534: { - s += " visitParamExtractFloat "; - break; - } - case 1535: { - s += " visitParamExtractInt "; - break; - } - case 1536: { - s += " visitParamExtractRaw "; - break; - } - case 1537: { - s += " visitParamExtractString "; - break; - } - case 1538: { - s += " visitParamExtractUInt "; - break; - } - case 1539: { - s += " visitParamHas "; - break; - } - case 1540: { - s += " VOLUME "; - break; - } - case 1541: { - s += " WATCH "; - break; - } - case 1542: { - s += " week "; - break; - } - case 1543: { - s += " WEEK "; - break; - } - case 1544: { - s += " welchTTest "; - break; - } - case 1545: { - s += " WHEN "; - break; - } - case 1546: { - s += " WHERE "; - break; - } - case 1547: { - s += " windowFunnel "; - break; - } - case 1548: { - s += " WITH "; - break; - } - case 1549: { - s += " WITH FILL "; - break; - } - case 1550: { - s += " WITH TIES "; - break; - } - case 1551: { - s += " WK "; - break; - } - case 1552: { - s += " wkt "; - break; - } - case 1553: { - s += " wordShingleMinHash "; - break; - } - case 1554: { - s += " wordShingleMinHashArg "; - break; - } - case 1555: { - s += " wordShingleMinHashArgCaseInsensitive "; - break; - } - case 1556: { - s += " wordShingleMinHashArgCaseInsensitiveUTF8 "; - break; - } - case 1557: { - s += " wordShingleMinHashArgUTF8 "; - break; - } - case 1558: { - s += " wordShingleMinHashCaseInsensitive "; - break; - } - case 1559: { - s += " wordShingleMinHashCaseInsensitiveUTF8 "; - break; - } - case 1560: { - s += " wordShingleMinHashUTF8 "; - break; - } - case 1561: { - s += " wordShingleSimHash "; - break; - } - case 1562: { - s += " wordShingleSimHashCaseInsensitive "; - break; - } - case 1563: { - s += " wordShingleSimHashCaseInsensitiveUTF8 "; - break; - } - case 1564: { - s += " wordShingleSimHashUTF8 "; - break; - } - case 1565: { - s += " WW "; - break; - } - case 1566: { - s += " xor "; - break; - } - case 1567: { - s += " xxHash32 "; - break; - } - case 1568: { - s += " xxHash64 "; - break; - } - case 1569: { - s += " kostikConsistentHash "; - break; - } - case 1570: { - s += " YEAR "; - break; - } - case 1571: { - s += " yearweek "; - break; - } - case 1572: { - s += " yesterday "; - break; - } - case 1573: { - s += " YY "; - break; - } - case 1574: { - s += " YYYY "; - break; - } - case 1575: { - s += " zookeeperSessionUptime "; - break; - } - default: break; - } -} diff --git a/src/Parsers/fuzzers/codegen_fuzzer/out.proto b/src/Parsers/fuzzers/codegen_fuzzer/out.proto deleted file mode 100644 index 60992ca6a81..00000000000 --- a/src/Parsers/fuzzers/codegen_fuzzer/out.proto +++ /dev/null @@ -1,1587 +0,0 @@ -syntax = "proto3"; - -message Word { - enum Value { - value_0 = 0; - value_1 = 1; - value_2 = 2; - value_3 = 3; - value_4 = 4; - value_5 = 5; - value_6 = 6; - value_7 = 7; - value_8 = 8; - value_9 = 9; - value_10 = 10; - value_11 = 11; - value_12 = 12; - value_13 = 13; - value_14 = 14; - value_15 = 15; - value_16 = 16; - value_17 = 17; - value_18 = 18; - value_19 = 19; - value_20 = 20; - value_21 = 21; - value_22 = 22; - value_23 = 23; - value_24 = 24; - value_25 = 25; - value_26 = 26; - value_27 = 27; - value_28 = 28; - value_29 = 29; - value_30 = 30; - value_31 = 31; - value_32 = 32; - value_33 = 33; - value_34 = 34; - value_35 = 35; - value_36 = 36; - value_37 = 37; - value_38 = 38; - value_39 = 39; - value_40 = 40; - value_41 = 41; - value_42 = 42; - value_43 = 43; - value_44 = 44; - value_45 = 45; - value_46 = 46; - value_47 = 47; - value_48 = 48; - value_49 = 49; - value_50 = 50; - value_51 = 51; - value_52 = 52; - value_53 = 53; - value_54 = 54; - value_55 = 55; - value_56 = 56; - value_57 = 57; - value_58 = 58; - value_59 = 59; - value_60 = 60; - value_61 = 61; - value_62 = 62; - value_63 = 63; - value_64 = 64; - value_65 = 65; - value_66 = 66; - value_67 = 67; - value_68 = 68; - value_69 = 69; - value_70 = 70; - value_71 = 71; - value_72 = 72; - value_73 = 73; - value_74 = 74; - value_75 = 75; - value_76 = 76; - value_77 = 77; - value_78 = 78; - value_79 = 79; - value_80 = 80; - value_81 = 81; - value_82 = 82; - value_83 = 83; - value_84 = 84; - value_85 = 85; - value_86 = 86; - value_87 = 87; - value_88 = 88; - value_89 = 89; - value_90 = 90; - value_91 = 91; - value_92 = 92; - value_93 = 93; - value_94 = 94; - value_95 = 95; - value_96 = 96; - value_97 = 97; - value_98 = 98; - value_99 = 99; - value_100 = 100; - value_101 = 101; - value_102 = 102; - value_103 = 103; - value_104 = 104; - value_105 = 105; - value_106 = 106; - value_107 = 107; - value_108 = 108; - value_109 = 109; - value_110 = 110; - value_111 = 111; - value_112 = 112; - value_113 = 113; - value_114 = 114; - value_115 = 115; - value_116 = 116; - value_117 = 117; - value_118 = 118; - value_119 = 119; - value_120 = 120; - value_121 = 121; - value_122 = 122; - value_123 = 123; - value_124 = 124; - value_125 = 125; - value_126 = 126; - value_127 = 127; - value_128 = 128; - value_129 = 129; - value_130 = 130; - value_131 = 131; - value_132 = 132; - value_133 = 133; - value_134 = 134; - value_135 = 135; - value_136 = 136; - value_137 = 137; - value_138 = 138; - value_139 = 139; - value_140 = 140; - value_141 = 141; - value_142 = 142; - value_143 = 143; - value_144 = 144; - value_145 = 145; - value_146 = 146; - value_147 = 147; - value_148 = 148; - value_149 = 149; - value_150 = 150; - value_151 = 151; - value_152 = 152; - value_153 = 153; - value_154 = 154; - value_155 = 155; - value_156 = 156; - value_157 = 157; - value_158 = 158; - value_159 = 159; - value_160 = 160; - value_161 = 161; - value_162 = 162; - value_163 = 163; - value_164 = 164; - value_165 = 165; - value_166 = 166; - value_167 = 167; - value_168 = 168; - value_169 = 169; - value_170 = 170; - value_171 = 171; - value_172 = 172; - value_173 = 173; - value_174 = 174; - value_175 = 175; - value_176 = 176; - value_177 = 177; - value_178 = 178; - value_179 = 179; - value_180 = 180; - value_181 = 181; - value_182 = 182; - value_183 = 183; - value_184 = 184; - value_185 = 185; - value_186 = 186; - value_187 = 187; - value_188 = 188; - value_189 = 189; - value_190 = 190; - value_191 = 191; - value_192 = 192; - value_193 = 193; - value_194 = 194; - value_195 = 195; - value_196 = 196; - value_197 = 197; - value_198 = 198; - value_199 = 199; - value_200 = 200; - value_201 = 201; - value_202 = 202; - value_203 = 203; - value_204 = 204; - value_205 = 205; - value_206 = 206; - value_207 = 207; - value_208 = 208; - value_209 = 209; - value_210 = 210; - value_211 = 211; - value_212 = 212; - value_213 = 213; - value_214 = 214; - value_215 = 215; - value_216 = 216; - value_217 = 217; - value_218 = 218; - value_219 = 219; - value_220 = 220; - value_221 = 221; - value_222 = 222; - value_223 = 223; - value_224 = 224; - value_225 = 225; - value_226 = 226; - value_227 = 227; - value_228 = 228; - value_229 = 229; - value_230 = 230; - value_231 = 231; - value_232 = 232; - value_233 = 233; - value_234 = 234; - value_235 = 235; - value_236 = 236; - value_237 = 237; - value_238 = 238; - value_239 = 239; - value_240 = 240; - value_241 = 241; - value_242 = 242; - value_243 = 243; - value_244 = 244; - value_245 = 245; - value_246 = 246; - value_247 = 247; - value_248 = 248; - value_249 = 249; - value_250 = 250; - value_251 = 251; - value_252 = 252; - value_253 = 253; - value_254 = 254; - value_255 = 255; - value_256 = 256; - value_257 = 257; - value_258 = 258; - value_259 = 259; - value_260 = 260; - value_261 = 261; - value_262 = 262; - value_263 = 263; - value_264 = 264; - value_265 = 265; - value_266 = 266; - value_267 = 267; - value_268 = 268; - value_269 = 269; - value_270 = 270; - value_271 = 271; - value_272 = 272; - value_273 = 273; - value_274 = 274; - value_275 = 275; - value_276 = 276; - value_277 = 277; - value_278 = 278; - value_279 = 279; - value_280 = 280; - value_281 = 281; - value_282 = 282; - value_283 = 283; - value_284 = 284; - value_285 = 285; - value_286 = 286; - value_287 = 287; - value_288 = 288; - value_289 = 289; - value_290 = 290; - value_291 = 291; - value_292 = 292; - value_293 = 293; - value_294 = 294; - value_295 = 295; - value_296 = 296; - value_297 = 297; - value_298 = 298; - value_299 = 299; - value_300 = 300; - value_301 = 301; - value_302 = 302; - value_303 = 303; - value_304 = 304; - value_305 = 305; - value_306 = 306; - value_307 = 307; - value_308 = 308; - value_309 = 309; - value_310 = 310; - value_311 = 311; - value_312 = 312; - value_313 = 313; - value_314 = 314; - value_315 = 315; - value_316 = 316; - value_317 = 317; - value_318 = 318; - value_319 = 319; - value_320 = 320; - value_321 = 321; - value_322 = 322; - value_323 = 323; - value_324 = 324; - value_325 = 325; - value_326 = 326; - value_327 = 327; - value_328 = 328; - value_329 = 329; - value_330 = 330; - value_331 = 331; - value_332 = 332; - value_333 = 333; - value_334 = 334; - value_335 = 335; - value_336 = 336; - value_337 = 337; - value_338 = 338; - value_339 = 339; - value_340 = 340; - value_341 = 341; - value_342 = 342; - value_343 = 343; - value_344 = 344; - value_345 = 345; - value_346 = 346; - value_347 = 347; - value_348 = 348; - value_349 = 349; - value_350 = 350; - value_351 = 351; - value_352 = 352; - value_353 = 353; - value_354 = 354; - value_355 = 355; - value_356 = 356; - value_357 = 357; - value_358 = 358; - value_359 = 359; - value_360 = 360; - value_361 = 361; - value_362 = 362; - value_363 = 363; - value_364 = 364; - value_365 = 365; - value_366 = 366; - value_367 = 367; - value_368 = 368; - value_369 = 369; - value_370 = 370; - value_371 = 371; - value_372 = 372; - value_373 = 373; - value_374 = 374; - value_375 = 375; - value_376 = 376; - value_377 = 377; - value_378 = 378; - value_379 = 379; - value_380 = 380; - value_381 = 381; - value_382 = 382; - value_383 = 383; - value_384 = 384; - value_385 = 385; - value_386 = 386; - value_387 = 387; - value_388 = 388; - value_389 = 389; - value_390 = 390; - value_391 = 391; - value_392 = 392; - value_393 = 393; - value_394 = 394; - value_395 = 395; - value_396 = 396; - value_397 = 397; - value_398 = 398; - value_399 = 399; - value_400 = 400; - value_401 = 401; - value_402 = 402; - value_403 = 403; - value_404 = 404; - value_405 = 405; - value_406 = 406; - value_407 = 407; - value_408 = 408; - value_409 = 409; - value_410 = 410; - value_411 = 411; - value_412 = 412; - value_413 = 413; - value_414 = 414; - value_415 = 415; - value_416 = 416; - value_417 = 417; - value_418 = 418; - value_419 = 419; - value_420 = 420; - value_421 = 421; - value_422 = 422; - value_423 = 423; - value_424 = 424; - value_425 = 425; - value_426 = 426; - value_427 = 427; - value_428 = 428; - value_429 = 429; - value_430 = 430; - value_431 = 431; - value_432 = 432; - value_433 = 433; - value_434 = 434; - value_435 = 435; - value_436 = 436; - value_437 = 437; - value_438 = 438; - value_439 = 439; - value_440 = 440; - value_441 = 441; - value_442 = 442; - value_443 = 443; - value_444 = 444; - value_445 = 445; - value_446 = 446; - value_447 = 447; - value_448 = 448; - value_449 = 449; - value_450 = 450; - value_451 = 451; - value_452 = 452; - value_453 = 453; - value_454 = 454; - value_455 = 455; - value_456 = 456; - value_457 = 457; - value_458 = 458; - value_459 = 459; - value_460 = 460; - value_461 = 461; - value_462 = 462; - value_463 = 463; - value_464 = 464; - value_465 = 465; - value_466 = 466; - value_467 = 467; - value_468 = 468; - value_469 = 469; - value_470 = 470; - value_471 = 471; - value_472 = 472; - value_473 = 473; - value_474 = 474; - value_475 = 475; - value_476 = 476; - value_477 = 477; - value_478 = 478; - value_479 = 479; - value_480 = 480; - value_481 = 481; - value_482 = 482; - value_483 = 483; - value_484 = 484; - value_485 = 485; - value_486 = 486; - value_487 = 487; - value_488 = 488; - value_489 = 489; - value_490 = 490; - value_491 = 491; - value_492 = 492; - value_493 = 493; - value_494 = 494; - value_495 = 495; - value_496 = 496; - value_497 = 497; - value_498 = 498; - value_499 = 499; - value_500 = 500; - value_501 = 501; - value_502 = 502; - value_503 = 503; - value_504 = 504; - value_505 = 505; - value_506 = 506; - value_507 = 507; - value_508 = 508; - value_509 = 509; - value_510 = 510; - value_511 = 511; - value_512 = 512; - value_513 = 513; - value_514 = 514; - value_515 = 515; - value_516 = 516; - value_517 = 517; - value_518 = 518; - value_519 = 519; - value_520 = 520; - value_521 = 521; - value_522 = 522; - value_523 = 523; - value_524 = 524; - value_525 = 525; - value_526 = 526; - value_527 = 527; - value_528 = 528; - value_529 = 529; - value_530 = 530; - value_531 = 531; - value_532 = 532; - value_533 = 533; - value_534 = 534; - value_535 = 535; - value_536 = 536; - value_537 = 537; - value_538 = 538; - value_539 = 539; - value_540 = 540; - value_541 = 541; - value_542 = 542; - value_543 = 543; - value_544 = 544; - value_545 = 545; - value_546 = 546; - value_547 = 547; - value_548 = 548; - value_549 = 549; - value_550 = 550; - value_551 = 551; - value_552 = 552; - value_553 = 553; - value_554 = 554; - value_555 = 555; - value_556 = 556; - value_557 = 557; - value_558 = 558; - value_559 = 559; - value_560 = 560; - value_561 = 561; - value_562 = 562; - value_563 = 563; - value_564 = 564; - value_565 = 565; - value_566 = 566; - value_567 = 567; - value_568 = 568; - value_569 = 569; - value_570 = 570; - value_571 = 571; - value_572 = 572; - value_573 = 573; - value_574 = 574; - value_575 = 575; - value_576 = 576; - value_577 = 577; - value_578 = 578; - value_579 = 579; - value_580 = 580; - value_581 = 581; - value_582 = 582; - value_583 = 583; - value_584 = 584; - value_585 = 585; - value_586 = 586; - value_587 = 587; - value_588 = 588; - value_589 = 589; - value_590 = 590; - value_591 = 591; - value_592 = 592; - value_593 = 593; - value_594 = 594; - value_595 = 595; - value_596 = 596; - value_597 = 597; - value_598 = 598; - value_599 = 599; - value_600 = 600; - value_601 = 601; - value_602 = 602; - value_603 = 603; - value_604 = 604; - value_605 = 605; - value_606 = 606; - value_607 = 607; - value_608 = 608; - value_609 = 609; - value_610 = 610; - value_611 = 611; - value_612 = 612; - value_613 = 613; - value_614 = 614; - value_615 = 615; - value_616 = 616; - value_617 = 617; - value_618 = 618; - value_619 = 619; - value_620 = 620; - value_621 = 621; - value_622 = 622; - value_623 = 623; - value_624 = 624; - value_625 = 625; - value_626 = 626; - value_627 = 627; - value_628 = 628; - value_629 = 629; - value_630 = 630; - value_631 = 631; - value_632 = 632; - value_633 = 633; - value_634 = 634; - value_635 = 635; - value_636 = 636; - value_637 = 637; - value_638 = 638; - value_639 = 639; - value_640 = 640; - value_641 = 641; - value_642 = 642; - value_643 = 643; - value_644 = 644; - value_645 = 645; - value_646 = 646; - value_647 = 647; - value_648 = 648; - value_649 = 649; - value_650 = 650; - value_651 = 651; - value_652 = 652; - value_653 = 653; - value_654 = 654; - value_655 = 655; - value_656 = 656; - value_657 = 657; - value_658 = 658; - value_659 = 659; - value_660 = 660; - value_661 = 661; - value_662 = 662; - value_663 = 663; - value_664 = 664; - value_665 = 665; - value_666 = 666; - value_667 = 667; - value_668 = 668; - value_669 = 669; - value_670 = 670; - value_671 = 671; - value_672 = 672; - value_673 = 673; - value_674 = 674; - value_675 = 675; - value_676 = 676; - value_677 = 677; - value_678 = 678; - value_679 = 679; - value_680 = 680; - value_681 = 681; - value_682 = 682; - value_683 = 683; - value_684 = 684; - value_685 = 685; - value_686 = 686; - value_687 = 687; - value_688 = 688; - value_689 = 689; - value_690 = 690; - value_691 = 691; - value_692 = 692; - value_693 = 693; - value_694 = 694; - value_695 = 695; - value_696 = 696; - value_697 = 697; - value_698 = 698; - value_699 = 699; - value_700 = 700; - value_701 = 701; - value_702 = 702; - value_703 = 703; - value_704 = 704; - value_705 = 705; - value_706 = 706; - value_707 = 707; - value_708 = 708; - value_709 = 709; - value_710 = 710; - value_711 = 711; - value_712 = 712; - value_713 = 713; - value_714 = 714; - value_715 = 715; - value_716 = 716; - value_717 = 717; - value_718 = 718; - value_719 = 719; - value_720 = 720; - value_721 = 721; - value_722 = 722; - value_723 = 723; - value_724 = 724; - value_725 = 725; - value_726 = 726; - value_727 = 727; - value_728 = 728; - value_729 = 729; - value_730 = 730; - value_731 = 731; - value_732 = 732; - value_733 = 733; - value_734 = 734; - value_735 = 735; - value_736 = 736; - value_737 = 737; - value_738 = 738; - value_739 = 739; - value_740 = 740; - value_741 = 741; - value_742 = 742; - value_743 = 743; - value_744 = 744; - value_745 = 745; - value_746 = 746; - value_747 = 747; - value_748 = 748; - value_749 = 749; - value_750 = 750; - value_751 = 751; - value_752 = 752; - value_753 = 753; - value_754 = 754; - value_755 = 755; - value_756 = 756; - value_757 = 757; - value_758 = 758; - value_759 = 759; - value_760 = 760; - value_761 = 761; - value_762 = 762; - value_763 = 763; - value_764 = 764; - value_765 = 765; - value_766 = 766; - value_767 = 767; - value_768 = 768; - value_769 = 769; - value_770 = 770; - value_771 = 771; - value_772 = 772; - value_773 = 773; - value_774 = 774; - value_775 = 775; - value_776 = 776; - value_777 = 777; - value_778 = 778; - value_779 = 779; - value_780 = 780; - value_781 = 781; - value_782 = 782; - value_783 = 783; - value_784 = 784; - value_785 = 785; - value_786 = 786; - value_787 = 787; - value_788 = 788; - value_789 = 789; - value_790 = 790; - value_791 = 791; - value_792 = 792; - value_793 = 793; - value_794 = 794; - value_795 = 795; - value_796 = 796; - value_797 = 797; - value_798 = 798; - value_799 = 799; - value_800 = 800; - value_801 = 801; - value_802 = 802; - value_803 = 803; - value_804 = 804; - value_805 = 805; - value_806 = 806; - value_807 = 807; - value_808 = 808; - value_809 = 809; - value_810 = 810; - value_811 = 811; - value_812 = 812; - value_813 = 813; - value_814 = 814; - value_815 = 815; - value_816 = 816; - value_817 = 817; - value_818 = 818; - value_819 = 819; - value_820 = 820; - value_821 = 821; - value_822 = 822; - value_823 = 823; - value_824 = 824; - value_825 = 825; - value_826 = 826; - value_827 = 827; - value_828 = 828; - value_829 = 829; - value_830 = 830; - value_831 = 831; - value_832 = 832; - value_833 = 833; - value_834 = 834; - value_835 = 835; - value_836 = 836; - value_837 = 837; - value_838 = 838; - value_839 = 839; - value_840 = 840; - value_841 = 841; - value_842 = 842; - value_843 = 843; - value_844 = 844; - value_845 = 845; - value_846 = 846; - value_847 = 847; - value_848 = 848; - value_849 = 849; - value_850 = 850; - value_851 = 851; - value_852 = 852; - value_853 = 853; - value_854 = 854; - value_855 = 855; - value_856 = 856; - value_857 = 857; - value_858 = 858; - value_859 = 859; - value_860 = 860; - value_861 = 861; - value_862 = 862; - value_863 = 863; - value_864 = 864; - value_865 = 865; - value_866 = 866; - value_867 = 867; - value_868 = 868; - value_869 = 869; - value_870 = 870; - value_871 = 871; - value_872 = 872; - value_873 = 873; - value_874 = 874; - value_875 = 875; - value_876 = 876; - value_877 = 877; - value_878 = 878; - value_879 = 879; - value_880 = 880; - value_881 = 881; - value_882 = 882; - value_883 = 883; - value_884 = 884; - value_885 = 885; - value_886 = 886; - value_887 = 887; - value_888 = 888; - value_889 = 889; - value_890 = 890; - value_891 = 891; - value_892 = 892; - value_893 = 893; - value_894 = 894; - value_895 = 895; - value_896 = 896; - value_897 = 897; - value_898 = 898; - value_899 = 899; - value_900 = 900; - value_901 = 901; - value_902 = 902; - value_903 = 903; - value_904 = 904; - value_905 = 905; - value_906 = 906; - value_907 = 907; - value_908 = 908; - value_909 = 909; - value_910 = 910; - value_911 = 911; - value_912 = 912; - value_913 = 913; - value_914 = 914; - value_915 = 915; - value_916 = 916; - value_917 = 917; - value_918 = 918; - value_919 = 919; - value_920 = 920; - value_921 = 921; - value_922 = 922; - value_923 = 923; - value_924 = 924; - value_925 = 925; - value_926 = 926; - value_927 = 927; - value_928 = 928; - value_929 = 929; - value_930 = 930; - value_931 = 931; - value_932 = 932; - value_933 = 933; - value_934 = 934; - value_935 = 935; - value_936 = 936; - value_937 = 937; - value_938 = 938; - value_939 = 939; - value_940 = 940; - value_941 = 941; - value_942 = 942; - value_943 = 943; - value_944 = 944; - value_945 = 945; - value_946 = 946; - value_947 = 947; - value_948 = 948; - value_949 = 949; - value_950 = 950; - value_951 = 951; - value_952 = 952; - value_953 = 953; - value_954 = 954; - value_955 = 955; - value_956 = 956; - value_957 = 957; - value_958 = 958; - value_959 = 959; - value_960 = 960; - value_961 = 961; - value_962 = 962; - value_963 = 963; - value_964 = 964; - value_965 = 965; - value_966 = 966; - value_967 = 967; - value_968 = 968; - value_969 = 969; - value_970 = 970; - value_971 = 971; - value_972 = 972; - value_973 = 973; - value_974 = 974; - value_975 = 975; - value_976 = 976; - value_977 = 977; - value_978 = 978; - value_979 = 979; - value_980 = 980; - value_981 = 981; - value_982 = 982; - value_983 = 983; - value_984 = 984; - value_985 = 985; - value_986 = 986; - value_987 = 987; - value_988 = 988; - value_989 = 989; - value_990 = 990; - value_991 = 991; - value_992 = 992; - value_993 = 993; - value_994 = 994; - value_995 = 995; - value_996 = 996; - value_997 = 997; - value_998 = 998; - value_999 = 999; - value_1000 = 1000; - value_1001 = 1001; - value_1002 = 1002; - value_1003 = 1003; - value_1004 = 1004; - value_1005 = 1005; - value_1006 = 1006; - value_1007 = 1007; - value_1008 = 1008; - value_1009 = 1009; - value_1010 = 1010; - value_1011 = 1011; - value_1012 = 1012; - value_1013 = 1013; - value_1014 = 1014; - value_1015 = 1015; - value_1016 = 1016; - value_1017 = 1017; - value_1018 = 1018; - value_1019 = 1019; - value_1020 = 1020; - value_1021 = 1021; - value_1022 = 1022; - value_1023 = 1023; - value_1024 = 1024; - value_1025 = 1025; - value_1026 = 1026; - value_1027 = 1027; - value_1028 = 1028; - value_1029 = 1029; - value_1030 = 1030; - value_1031 = 1031; - value_1032 = 1032; - value_1033 = 1033; - value_1034 = 1034; - value_1035 = 1035; - value_1036 = 1036; - value_1037 = 1037; - value_1038 = 1038; - value_1039 = 1039; - value_1040 = 1040; - value_1041 = 1041; - value_1042 = 1042; - value_1043 = 1043; - value_1044 = 1044; - value_1045 = 1045; - value_1046 = 1046; - value_1047 = 1047; - value_1048 = 1048; - value_1049 = 1049; - value_1050 = 1050; - value_1051 = 1051; - value_1052 = 1052; - value_1053 = 1053; - value_1054 = 1054; - value_1055 = 1055; - value_1056 = 1056; - value_1057 = 1057; - value_1058 = 1058; - value_1059 = 1059; - value_1060 = 1060; - value_1061 = 1061; - value_1062 = 1062; - value_1063 = 1063; - value_1064 = 1064; - value_1065 = 1065; - value_1066 = 1066; - value_1067 = 1067; - value_1068 = 1068; - value_1069 = 1069; - value_1070 = 1070; - value_1071 = 1071; - value_1072 = 1072; - value_1073 = 1073; - value_1074 = 1074; - value_1075 = 1075; - value_1076 = 1076; - value_1077 = 1077; - value_1078 = 1078; - value_1079 = 1079; - value_1080 = 1080; - value_1081 = 1081; - value_1082 = 1082; - value_1083 = 1083; - value_1084 = 1084; - value_1085 = 1085; - value_1086 = 1086; - value_1087 = 1087; - value_1088 = 1088; - value_1089 = 1089; - value_1090 = 1090; - value_1091 = 1091; - value_1092 = 1092; - value_1093 = 1093; - value_1094 = 1094; - value_1095 = 1095; - value_1096 = 1096; - value_1097 = 1097; - value_1098 = 1098; - value_1099 = 1099; - value_1100 = 1100; - value_1101 = 1101; - value_1102 = 1102; - value_1103 = 1103; - value_1104 = 1104; - value_1105 = 1105; - value_1106 = 1106; - value_1107 = 1107; - value_1108 = 1108; - value_1109 = 1109; - value_1110 = 1110; - value_1111 = 1111; - value_1112 = 1112; - value_1113 = 1113; - value_1114 = 1114; - value_1115 = 1115; - value_1116 = 1116; - value_1117 = 1117; - value_1118 = 1118; - value_1119 = 1119; - value_1120 = 1120; - value_1121 = 1121; - value_1122 = 1122; - value_1123 = 1123; - value_1124 = 1124; - value_1125 = 1125; - value_1126 = 1126; - value_1127 = 1127; - value_1128 = 1128; - value_1129 = 1129; - value_1130 = 1130; - value_1131 = 1131; - value_1132 = 1132; - value_1133 = 1133; - value_1134 = 1134; - value_1135 = 1135; - value_1136 = 1136; - value_1137 = 1137; - value_1138 = 1138; - value_1139 = 1139; - value_1140 = 1140; - value_1141 = 1141; - value_1142 = 1142; - value_1143 = 1143; - value_1144 = 1144; - value_1145 = 1145; - value_1146 = 1146; - value_1147 = 1147; - value_1148 = 1148; - value_1149 = 1149; - value_1150 = 1150; - value_1151 = 1151; - value_1152 = 1152; - value_1153 = 1153; - value_1154 = 1154; - value_1155 = 1155; - value_1156 = 1156; - value_1157 = 1157; - value_1158 = 1158; - value_1159 = 1159; - value_1160 = 1160; - value_1161 = 1161; - value_1162 = 1162; - value_1163 = 1163; - value_1164 = 1164; - value_1165 = 1165; - value_1166 = 1166; - value_1167 = 1167; - value_1168 = 1168; - value_1169 = 1169; - value_1170 = 1170; - value_1171 = 1171; - value_1172 = 1172; - value_1173 = 1173; - value_1174 = 1174; - value_1175 = 1175; - value_1176 = 1176; - value_1177 = 1177; - value_1178 = 1178; - value_1179 = 1179; - value_1180 = 1180; - value_1181 = 1181; - value_1182 = 1182; - value_1183 = 1183; - value_1184 = 1184; - value_1185 = 1185; - value_1186 = 1186; - value_1187 = 1187; - value_1188 = 1188; - value_1189 = 1189; - value_1190 = 1190; - value_1191 = 1191; - value_1192 = 1192; - value_1193 = 1193; - value_1194 = 1194; - value_1195 = 1195; - value_1196 = 1196; - value_1197 = 1197; - value_1198 = 1198; - value_1199 = 1199; - value_1200 = 1200; - value_1201 = 1201; - value_1202 = 1202; - value_1203 = 1203; - value_1204 = 1204; - value_1205 = 1205; - value_1206 = 1206; - value_1207 = 1207; - value_1208 = 1208; - value_1209 = 1209; - value_1210 = 1210; - value_1211 = 1211; - value_1212 = 1212; - value_1213 = 1213; - value_1214 = 1214; - value_1215 = 1215; - value_1216 = 1216; - value_1217 = 1217; - value_1218 = 1218; - value_1219 = 1219; - value_1220 = 1220; - value_1221 = 1221; - value_1222 = 1222; - value_1223 = 1223; - value_1224 = 1224; - value_1225 = 1225; - value_1226 = 1226; - value_1227 = 1227; - value_1228 = 1228; - value_1229 = 1229; - value_1230 = 1230; - value_1231 = 1231; - value_1232 = 1232; - value_1233 = 1233; - value_1234 = 1234; - value_1235 = 1235; - value_1236 = 1236; - value_1237 = 1237; - value_1238 = 1238; - value_1239 = 1239; - value_1240 = 1240; - value_1241 = 1241; - value_1242 = 1242; - value_1243 = 1243; - value_1244 = 1244; - value_1245 = 1245; - value_1246 = 1246; - value_1247 = 1247; - value_1248 = 1248; - value_1249 = 1249; - value_1250 = 1250; - value_1251 = 1251; - value_1252 = 1252; - value_1253 = 1253; - value_1254 = 1254; - value_1255 = 1255; - value_1256 = 1256; - value_1257 = 1257; - value_1258 = 1258; - value_1259 = 1259; - value_1260 = 1260; - value_1261 = 1261; - value_1262 = 1262; - value_1263 = 1263; - value_1264 = 1264; - value_1265 = 1265; - value_1266 = 1266; - value_1267 = 1267; - value_1268 = 1268; - value_1269 = 1269; - value_1270 = 1270; - value_1271 = 1271; - value_1272 = 1272; - value_1273 = 1273; - value_1274 = 1274; - value_1275 = 1275; - value_1276 = 1276; - value_1277 = 1277; - value_1278 = 1278; - value_1279 = 1279; - value_1280 = 1280; - value_1281 = 1281; - value_1282 = 1282; - value_1283 = 1283; - value_1284 = 1284; - value_1285 = 1285; - value_1286 = 1286; - value_1287 = 1287; - value_1288 = 1288; - value_1289 = 1289; - value_1290 = 1290; - value_1291 = 1291; - value_1292 = 1292; - value_1293 = 1293; - value_1294 = 1294; - value_1295 = 1295; - value_1296 = 1296; - value_1297 = 1297; - value_1298 = 1298; - value_1299 = 1299; - value_1300 = 1300; - value_1301 = 1301; - value_1302 = 1302; - value_1303 = 1303; - value_1304 = 1304; - value_1305 = 1305; - value_1306 = 1306; - value_1307 = 1307; - value_1308 = 1308; - value_1309 = 1309; - value_1310 = 1310; - value_1311 = 1311; - value_1312 = 1312; - value_1313 = 1313; - value_1314 = 1314; - value_1315 = 1315; - value_1316 = 1316; - value_1317 = 1317; - value_1318 = 1318; - value_1319 = 1319; - value_1320 = 1320; - value_1321 = 1321; - value_1322 = 1322; - value_1323 = 1323; - value_1324 = 1324; - value_1325 = 1325; - value_1326 = 1326; - value_1327 = 1327; - value_1328 = 1328; - value_1329 = 1329; - value_1330 = 1330; - value_1331 = 1331; - value_1332 = 1332; - value_1333 = 1333; - value_1334 = 1334; - value_1335 = 1335; - value_1336 = 1336; - value_1337 = 1337; - value_1338 = 1338; - value_1339 = 1339; - value_1340 = 1340; - value_1341 = 1341; - value_1342 = 1342; - value_1343 = 1343; - value_1344 = 1344; - value_1345 = 1345; - value_1346 = 1346; - value_1347 = 1347; - value_1348 = 1348; - value_1349 = 1349; - value_1350 = 1350; - value_1351 = 1351; - value_1352 = 1352; - value_1353 = 1353; - value_1354 = 1354; - value_1355 = 1355; - value_1356 = 1356; - value_1357 = 1357; - value_1358 = 1358; - value_1359 = 1359; - value_1360 = 1360; - value_1361 = 1361; - value_1362 = 1362; - value_1363 = 1363; - value_1364 = 1364; - value_1365 = 1365; - value_1366 = 1366; - value_1367 = 1367; - value_1368 = 1368; - value_1369 = 1369; - value_1370 = 1370; - value_1371 = 1371; - value_1372 = 1372; - value_1373 = 1373; - value_1374 = 1374; - value_1375 = 1375; - value_1376 = 1376; - value_1377 = 1377; - value_1378 = 1378; - value_1379 = 1379; - value_1380 = 1380; - value_1381 = 1381; - value_1382 = 1382; - value_1383 = 1383; - value_1384 = 1384; - value_1385 = 1385; - value_1386 = 1386; - value_1387 = 1387; - value_1388 = 1388; - value_1389 = 1389; - value_1390 = 1390; - value_1391 = 1391; - value_1392 = 1392; - value_1393 = 1393; - value_1394 = 1394; - value_1395 = 1395; - value_1396 = 1396; - value_1397 = 1397; - value_1398 = 1398; - value_1399 = 1399; - value_1400 = 1400; - value_1401 = 1401; - value_1402 = 1402; - value_1403 = 1403; - value_1404 = 1404; - value_1405 = 1405; - value_1406 = 1406; - value_1407 = 1407; - value_1408 = 1408; - value_1409 = 1409; - value_1410 = 1410; - value_1411 = 1411; - value_1412 = 1412; - value_1413 = 1413; - value_1414 = 1414; - value_1415 = 1415; - value_1416 = 1416; - value_1417 = 1417; - value_1418 = 1418; - value_1419 = 1419; - value_1420 = 1420; - value_1421 = 1421; - value_1422 = 1422; - value_1423 = 1423; - value_1424 = 1424; - value_1425 = 1425; - value_1426 = 1426; - value_1427 = 1427; - value_1428 = 1428; - value_1429 = 1429; - value_1430 = 1430; - value_1431 = 1431; - value_1432 = 1432; - value_1433 = 1433; - value_1434 = 1434; - value_1435 = 1435; - value_1436 = 1436; - value_1437 = 1437; - value_1438 = 1438; - value_1439 = 1439; - value_1440 = 1440; - value_1441 = 1441; - value_1442 = 1442; - value_1443 = 1443; - value_1444 = 1444; - value_1445 = 1445; - value_1446 = 1446; - value_1447 = 1447; - value_1448 = 1448; - value_1449 = 1449; - value_1450 = 1450; - value_1451 = 1451; - value_1452 = 1452; - value_1453 = 1453; - value_1454 = 1454; - value_1455 = 1455; - value_1456 = 1456; - value_1457 = 1457; - value_1458 = 1458; - value_1459 = 1459; - value_1460 = 1460; - value_1461 = 1461; - value_1462 = 1462; - value_1463 = 1463; - value_1464 = 1464; - value_1465 = 1465; - value_1466 = 1466; - value_1467 = 1467; - value_1468 = 1468; - value_1469 = 1469; - value_1470 = 1470; - value_1471 = 1471; - value_1472 = 1472; - value_1473 = 1473; - value_1474 = 1474; - value_1475 = 1475; - value_1476 = 1476; - value_1477 = 1477; - value_1478 = 1478; - value_1479 = 1479; - value_1480 = 1480; - value_1481 = 1481; - value_1482 = 1482; - value_1483 = 1483; - value_1484 = 1484; - value_1485 = 1485; - value_1486 = 1486; - value_1487 = 1487; - value_1488 = 1488; - value_1489 = 1489; - value_1490 = 1490; - value_1491 = 1491; - value_1492 = 1492; - value_1493 = 1493; - value_1494 = 1494; - value_1495 = 1495; - value_1496 = 1496; - value_1497 = 1497; - value_1498 = 1498; - value_1499 = 1499; - value_1500 = 1500; - value_1501 = 1501; - value_1502 = 1502; - value_1503 = 1503; - value_1504 = 1504; - value_1505 = 1505; - value_1506 = 1506; - value_1507 = 1507; - value_1508 = 1508; - value_1509 = 1509; - value_1510 = 1510; - value_1511 = 1511; - value_1512 = 1512; - value_1513 = 1513; - value_1514 = 1514; - value_1515 = 1515; - value_1516 = 1516; - value_1517 = 1517; - value_1518 = 1518; - value_1519 = 1519; - value_1520 = 1520; - value_1521 = 1521; - value_1522 = 1522; - value_1523 = 1523; - value_1524 = 1524; - value_1525 = 1525; - value_1526 = 1526; - value_1527 = 1527; - value_1528 = 1528; - value_1529 = 1529; - value_1530 = 1530; - value_1531 = 1531; - value_1532 = 1532; - value_1533 = 1533; - value_1534 = 1534; - value_1535 = 1535; - value_1536 = 1536; - value_1537 = 1537; - value_1538 = 1538; - value_1539 = 1539; - value_1540 = 1540; - value_1541 = 1541; - value_1542 = 1542; - value_1543 = 1543; - value_1544 = 1544; - value_1545 = 1545; - value_1546 = 1546; - value_1547 = 1547; - value_1548 = 1548; - value_1549 = 1549; - value_1550 = 1550; - value_1551 = 1551; - value_1552 = 1552; - value_1553 = 1553; - value_1554 = 1554; - value_1555 = 1555; - value_1556 = 1556; - value_1557 = 1557; - value_1558 = 1558; - value_1559 = 1559; - value_1560 = 1560; - value_1561 = 1561; - value_1562 = 1562; - value_1563 = 1563; - value_1564 = 1564; - value_1565 = 1565; - value_1566 = 1566; - value_1567 = 1567; - value_1568 = 1568; - value_1569 = 1569; - value_1570 = 1570; - value_1571 = 1571; - value_1572 = 1572; - value_1573 = 1573; - value_1574 = 1574; - value_1575 = 1575; - } - Value value = 1; - Sentence inner = 2; -} -message Sentence { - repeated Word words = 1; -} From 6c68269458a7d01a6fef4c6757e1e4c9419fec08 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 4 Jul 2023 00:25:51 +0200 Subject: [PATCH 1144/1997] Update Field.h --- src/Core/Field.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Field.h b/src/Core/Field.h index 97a32ab5bb1..686bfafc397 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -739,7 +739,7 @@ private: using StorageType = NearestFieldType; /// Incrementing the depth since we create a new Field. - auto depth = calculateAndCheckFieldDepth(x) + 1; + auto depth = calculateAndCheckFieldDepth(x); new (&storage) StorageType(std::forward(x)); if constexpr (std::is_same_v From 87a2c44778db4dce5a425c7f618009e5652a6bef Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 00:43:25 +0200 Subject: [PATCH 1145/1997] Fix test 02789_object_type_invalid_num_of_rows --- .../0_stateless/02789_object_type_invalid_num_of_rows.reference | 1 - .../0_stateless/02789_object_type_invalid_num_of_rows.sql | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference index 7dec35f7acb..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference @@ -1 +0,0 @@ -0.02 diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql index a9c8a844aa0..d0fc6905593 100644 --- a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql @@ -1,2 +1,2 @@ set allow_experimental_object_type=1; -SELECT '0.02' GROUP BY GROUPING SETS (('6553.6'), (CAST('{"x" : 1}', 'Object(\'json\')'))) settings max_threads=1; -- { serverError NOT_IMPLEMENTED } +SELECT '0.02' GROUP BY GROUPING SETS (('6553.6'), (CAST('{"x" : 1}', 'Object(\'json\')'))) FORMAT Null; -- { serverError NOT_IMPLEMENTED } From be1353fe334cf90d5534036e306dc424cbf26773 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 00:47:31 +0200 Subject: [PATCH 1146/1997] Revert "Merge pull request #51750 from ClickHouse/revert-51296-object_column_invalid_num_of_rows" This reverts commit 73d956249eaf5076691af80edac689d466a1f0fd, reversing changes made to 4d76b538c7838c55105746bfdd99db1e01e1fd91. --- .../0_stateless/02789_object_type_invalid_num_of_rows.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference index e69de29bb2d..7dec35f7acb 100644 --- a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference @@ -0,0 +1 @@ +0.02 From c65ee49a37773a2034c4e9a439ba6ebaf1820955 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 00:49:39 +0200 Subject: [PATCH 1147/1997] Revert "Merge pull request #51750 from ClickHouse/revert-51296-object_column_invalid_num_of_rows" This reverts commit 73d956249eaf5076691af80edac689d466a1f0fd, reversing changes made to 4d76b538c7838c55105746bfdd99db1e01e1fd91. --- src/Processors/QueryPlan/AggregatingStep.cpp | 2 ++ .../0_stateless/02789_object_type_invalid_num_of_rows.reference | 1 + 2 files changed, 3 insertions(+) create mode 100644 tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 4ac972e2a79..eebbfc04304 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -319,6 +319,8 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B { auto column_with_default = col.column->cloneEmpty(); col.type->insertDefaultInto(*column_with_default); + column_with_default->finalize(); + auto column = ColumnConst::create(std::move(column_with_default), 0); const auto * node = &dag->addColumn({ColumnPtr(std::move(column)), col.type, col.name}); node = &dag->materializeNode(*node); diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference new file mode 100644 index 00000000000..7dec35f7acb --- /dev/null +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference @@ -0,0 +1 @@ +0.02 From 9d11678f8486c81bc38638c7144e705ad46af304 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 4 Jul 2023 00:54:50 +0200 Subject: [PATCH 1148/1997] Polishing --- src/Core/Field.h | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/Core/Field.h b/src/Core/Field.h index 686bfafc397..cc0083e02d8 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -48,7 +48,7 @@ using FieldVector = std::vector>; struct X : public FieldVector \ { \ using FieldVector::FieldVector; \ - size_t nested_field_depth = 0; \ + uint8_t nested_field_depth = 0; \ } DEFINE_FIELD_VECTOR(Array); @@ -65,7 +65,7 @@ using FieldMap = std::map, AllocatorWithMemoryTrackin struct X : public FieldMap \ { \ using FieldMap::FieldMap; \ - size_t nested_field_depth = 0; \ + uint8_t nested_field_depth = 0; \ } DEFINE_FIELD_MAP(Object); @@ -296,10 +296,11 @@ decltype(auto) castToNearestFieldType(T && x) */ #define DBMS_MIN_FIELD_SIZE 32 +/// Note: uint8_t is used for storing depth value. #if defined(SANITIZER) || !defined(NDEBUG) #define DBMS_MAX_NESTED_FIELD_DEPTH 64 #else - #define DBMS_MAX_NESTED_FIELD_DEPTH 256 + #define DBMS_MAX_NESTED_FIELD_DEPTH 255 #endif /** Discriminated union of several types. @@ -683,9 +684,9 @@ private: /// StorageType and Original are the same for Array, Tuple, Map, Object template - size_t calculateAndCheckFieldDepth(Original && x) + uint8_t calculateAndCheckFieldDepth(Original && x) { - size_t result = 0; + uint8_t result = 0; if constexpr (std::is_same_v || std::is_same_v @@ -694,29 +695,29 @@ private: { result = x.nested_field_depth; - auto calculate_max = [](const Field & elem, size_t result) + auto get_depth = [](const Field & elem) { switch (elem.which) { case Types::Array: - return std::max(result, elem.template get().nested_field_depth); + return elem.template get().nested_field_depth; case Types::Tuple: - return std::max(result, elem.template get().nested_field_depth); + return elem.template get().nested_field_depth; case Types::Map: - return std::max(result, elem.template get().nested_field_depth); + return elem.template get().nested_field_depth; case Types::Object: - return std::max(result, elem.template get().nested_field_depth); + return elem.template get().nested_field_depth; default: - return result; + return static_cast(0); } }; if constexpr (std::is_same_v) for (auto & [_, value] : x) - result = calculate_max(value, result); + result = std::max(get_depth(value), result); else for (auto & value : x) - result = calculate_max(value, result); + result = std::max(get_depth(value), result); } if (result >= DBMS_MAX_NESTED_FIELD_DEPTH) From c25da7cbf4432a8b49155902dd8e5f23929fa844 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 01:03:49 +0200 Subject: [PATCH 1149/1997] Fix (benign) data rance in `transform` --- src/Columns/ColumnVector.h | 4 ++-- src/Functions/transform.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index bf790423d1d..b8ebff2a5d5 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -107,8 +107,8 @@ struct FloatCompareHelper } }; -template struct CompareHelper : public FloatCompareHelper {}; -template struct CompareHelper : public FloatCompareHelper {}; +template struct CompareHelper : public FloatCompareHelper {}; +template struct CompareHelper : public FloatCompareHelper {}; /** A template for columns that use a simple array to store. diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index 8d6e53c491e..1fc0e3adf96 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -698,6 +698,8 @@ namespace const DataTypePtr & from_type = arguments[0].type; + std::lock_guard lock(cache.mutex); + if (from_type->onlyNull()) { cache.is_empty = true; @@ -711,8 +713,6 @@ namespace throw Exception( ErrorCodes::ILLEGAL_COLUMN, "Second and third arguments of function {} must be constant arrays.", getName()); - std::lock_guard lock(cache.mutex); - const ColumnPtr & from_column_uncasted = array_from->getDataPtr(); cache.from_column = castColumn( From 49907818e10030e4129bcd65c52a865b0415abf0 Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Tue, 4 Jul 2023 02:32:08 +0000 Subject: [PATCH 1150/1997] Added waiting before crash log store finish --- src/Common/SystemLogBase.cpp | 49 ++++++++++------- src/Common/SystemLogBase.h | 9 +++- src/Daemon/BaseDaemon.cpp | 3 ++ src/Interpreters/CrashLog.cpp | 3 ++ tests/integration/test_crash_log/__init__.py | 0 tests/integration/test_crash_log/test.py | 57 ++++++++++++++++++++ 6 files changed, 101 insertions(+), 20 deletions(-) create mode 100644 tests/integration/test_crash_log/__init__.py create mode 100644 tests/integration/test_crash_log/test.py diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index 86adcbbd31b..cb1d2c3b3e7 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -137,25 +137,9 @@ void SystemLogBase::add(const LogElement & element) template void SystemLogBase::flush(bool force) { - uint64_t this_thread_requested_offset; - - { - std::lock_guard lock(mutex); - - if (is_shutdown) - return; - - this_thread_requested_offset = queue_front_index + queue.size(); - - // Publish our flush request, taking care not to overwrite the requests - // made by other threads. - is_force_prepare_tables |= force; - requested_flush_up_to = std::max(requested_flush_up_to, this_thread_requested_offset); - - flush_event.notify_all(); - } - - LOG_DEBUG(log, "Requested flush up to offset {}", this_thread_requested_offset); + uint64_t this_thread_requested_offset = notifyFlushImpl(force); + if (this_thread_requested_offset == uint64_t(-1)) + return; // Use an arbitrary timeout to avoid endless waiting. 60s proved to be // too fast for our parallel functional tests, probably because they @@ -174,6 +158,33 @@ void SystemLogBase::flush(bool force) } } +template +void SystemLogBase::notifyFlush(bool force) { notifyFlushImpl(force ); } + +template +uint64_t SystemLogBase::notifyFlushImpl(bool force) +{ + uint64_t this_thread_requested_offset; + + { + std::lock_guard lock(mutex); + if (is_shutdown) + return uint64_t(-1); + + this_thread_requested_offset = queue_front_index + queue.size(); + + // Publish our flush request, taking care not to overwrite the requests + // made by other threads. + is_force_prepare_tables |= force; + requested_flush_up_to = std::max(requested_flush_up_to, this_thread_requested_offset); + + flush_event.notify_all(); + } + + LOG_DEBUG(log, "Requested flush up to offset {}", this_thread_requested_offset); + return this_thread_requested_offset; +} + #define INSTANTIATE_SYSTEM_LOG_BASE(ELEMENT) template class SystemLogBase; SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE) diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index f8febd8b159..92409028c22 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -87,9 +87,12 @@ public: */ void add(const LogElement & element); - /// Flush data in the buffer to disk + /// Flush data in the buffer to disk. Block the thread until the data is stored on disk. void flush(bool force) override; + /// Non-blocking flush data in the buffer to disk. + void notifyFlush(bool force); + String getName() const override { return LogElement::name(); } static const char * getDefaultOrderBy() { return "event_date, event_time"; } @@ -112,6 +115,10 @@ protected: uint64_t flushed_up_to = 0; // Logged overflow message at this queue front index uint64_t logged_queue_full_at_index = -1; + +private: + uint64_t notifyFlushImpl(bool force); + }; } diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 6d29523a354..2c4c740af30 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -173,6 +173,9 @@ static void signalHandler(int sig, siginfo_t * info, void * context) /// This coarse method of synchronization is perfectly ok for fatal signals. sleepForSeconds(1); } + + /// Wait for all logs flush operations + sleepForSeconds(3); call_default_signal_handler(sig); } diff --git a/src/Interpreters/CrashLog.cpp b/src/Interpreters/CrashLog.cpp index f1f0ffb6f60..3f5476535a6 100644 --- a/src/Interpreters/CrashLog.cpp +++ b/src/Interpreters/CrashLog.cpp @@ -84,5 +84,8 @@ void collectCrashLog(Int32 signal, UInt64 thread_id, const String & query_id, co CrashLogElement element{static_cast(time / 1000000000), time, signal, thread_id, query_id, trace, trace_full}; crash_log_owned->add(element); + /// Notify savingThreadFunction to start flushing crash log + /// Crash log is storing in parallel with the signal processing thread. + crash_log_owned->notifyFlush(true); } } diff --git a/tests/integration/test_crash_log/__init__.py b/tests/integration/test_crash_log/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_crash_log/test.py b/tests/integration/test_crash_log/test.py new file mode 100644 index 00000000000..9f6eca794b1 --- /dev/null +++ b/tests/integration/test_crash_log/test.py @@ -0,0 +1,57 @@ +import os +import time +import pytest + +import helpers.cluster +import helpers.test_tools + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + + +@pytest.fixture(scope="module") +def started_node(): + cluster = helpers.cluster.ClickHouseCluster(__file__) + try: + node = cluster.add_instance("node", stay_alive=True) + + cluster.start() + yield node + finally: + cluster.shutdown() + + +def send_signal(started_node, signal): + started_node.exec_in_container( + ["bash", "-c", f"pkill -{signal} clickhouse"], user="root" + ) + + +def wait_for_clickhouse_stop(started_node): + result = None + for attempt in range(60): + time.sleep(1) + pid = started_node.get_process_pid("clickhouse") + if pid is None: + result = "OK" + break + assert result == "OK", "ClickHouse process is still running" + + +def test_pkill(started_node): + if ( + started_node.is_built_with_thread_sanitizer() + or started_node.is_built_with_address_sanitizer() + or started_node.is_built_with_memory_sanitizer() + ): + pytest.skip("doesn't fit in timeouts for stacktrace generation") + + crashes_count = 0 + for signal in ["SEGV", "4"]: + send_signal(started_node, signal) + wait_for_clickhouse_stop(started_node) + started_node.restart_clickhouse() + crashes_count += 1 + assert ( + started_node.query("SELECT COUNT(*) FROM system.crash_log") + == f"{crashes_count}\n" + ) From 50d555a3987d5e653da64107254af3bcfd33af81 Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Tue, 4 Jul 2023 02:45:15 +0000 Subject: [PATCH 1151/1997] style correction --- src/Common/SystemLogBase.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index cb1d2c3b3e7..5e9ee9a1e04 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -159,7 +159,7 @@ void SystemLogBase::flush(bool force) } template -void SystemLogBase::notifyFlush(bool force) { notifyFlushImpl(force ); } +void SystemLogBase::notifyFlush(bool force) { notifyFlushImpl(force); } template uint64_t SystemLogBase::notifyFlushImpl(bool force) From f86c5edfc465717a5344a8b71e140f0ceaa9ba47 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 4 Jul 2023 07:54:13 +0000 Subject: [PATCH 1152/1997] Remove debug tracing --- src/Interpreters/GraceHashJoin.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index f5b2386fd1e..8acdb4e90dd 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -623,8 +623,6 @@ Block GraceHashJoin::prepareRightBlock(const Block & block) void GraceHashJoin::addJoinedBlockImpl(Block block) { - LOG_ERROR(&Poco::Logger::get(__PRETTY_FUNCTION__), ""); - block = prepareRightBlock(block); Buckets buckets_snapshot = getCurrentBuckets(); size_t bucket_index = current_bucket->idx; From 1b71bbf1b119c937a176ff63a4ffaeb660d96038 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 4 Jul 2023 08:27:48 +0000 Subject: [PATCH 1153/1997] Add test for ZK disconnect --- tests/integration/test_keeper_map/test.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py index c6ec7103056..fbae875d2e6 100644 --- a/tests/integration/test_keeper_map/test.py +++ b/tests/integration/test_keeper_map/test.py @@ -1,4 +1,5 @@ import pytest +import time from helpers.cluster import ClickHouseCluster from helpers.network import PartitionManager @@ -39,9 +40,18 @@ def remove_children(client, path): def test_keeper_map_without_zk(started_cluster): + def wait_disconnect_from_zk(): + for _ in range(20): + if len(node.query_and_get_answer_with_error("SELECT * FROM system.zookeeper WHERE path='/'")[1]) != 0: + break + time.sleep(1) + else: + assert False, "ClickHouse didn't disconnect from ZK after DROP rule was added" + def assert_keeper_exception_after_partition(query): with PartitionManager() as pm: pm.drop_instance_zk_connections(node) + wait_disconnect_from_zk() error = node.query_and_get_error(query) assert "Coordination::Exception" in error @@ -49,17 +59,17 @@ def test_keeper_map_without_zk(started_cluster): "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_without_zk') PRIMARY KEY(key);" ) - node.query( + node.query_with_retry( "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_without_zk') PRIMARY KEY(key);" ) assert_keeper_exception_after_partition( "INSERT INTO test_keeper_map_without_zk VALUES (1, 11)" ) - node.query("INSERT INTO test_keeper_map_without_zk VALUES (1, 11)") + node.query_with_retry("INSERT INTO test_keeper_map_without_zk VALUES (1, 11)") assert_keeper_exception_after_partition("SELECT * FROM test_keeper_map_without_zk") - node.query("SELECT * FROM test_keeper_map_without_zk") + node.query_with_retry("SELECT * FROM test_keeper_map_without_zk") with PartitionManager() as pm: pm.drop_instance_zk_connections(node) @@ -67,7 +77,7 @@ def test_keeper_map_without_zk(started_cluster): error = node.query_and_get_error("SELECT * FROM test_keeper_map_without_zk") assert "Failed to activate table because of connection issues" in error - node.query("SELECT * FROM test_keeper_map_without_zk") + node.query_with_retry("SELECT * FROM test_keeper_map_without_zk") client = get_genuine_zk() remove_children(client, "/test_keeper_map/test_without_zk") From 30be0ab4a8af7247a12c94076cd17834f712b9d9 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 4 Jul 2023 09:00:53 +0000 Subject: [PATCH 1154/1997] Fix: unexpected number of buckets Number of buckets could become inconsistent if exception was thrown during new buckets creation --- src/Interpreters/GraceHashJoin.cpp | 63 ++++++++++++++++++------------ src/Interpreters/GraceHashJoin.h | 5 ++- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 8acdb4e90dd..aa7091548d7 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -288,10 +288,7 @@ void GraceHashJoin::initBuckets() size_t initial_num_buckets = roundUpToPowerOfTwoOrZero(std::clamp(settings.grace_hash_join_initial_buckets, 1, settings.grace_hash_join_max_buckets)); - for (size_t i = 0; i < initial_num_buckets; ++i) - { - addBucket(buckets); - } + addBuckets(initial_num_buckets); if (buckets.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "No buckets created"); @@ -368,40 +365,54 @@ GraceHashJoin::Buckets GraceHashJoin::rehashBuckets() if (to_size > max_num_buckets) { - throw Exception(ErrorCodes::LIMIT_EXCEEDED, + throw Exception( + ErrorCodes::LIMIT_EXCEEDED, "Too many grace hash join buckets ({} > {}), " "consider increasing grace_hash_join_max_buckets or max_rows_in_join/max_bytes_in_join", - to_size, max_num_buckets); + to_size, + max_num_buckets); } LOG_TRACE(log, "Rehashing from {} to {}", current_size, to_size); - buckets.reserve(to_size); - for (size_t i = current_size; i < to_size; ++i) - addBucket(buckets); + addBuckets(to_size - current_size); return buckets; } -void GraceHashJoin::addBucket(Buckets & destination) +void GraceHashJoin::addBuckets(const size_t bucket_count) { - // There could be exceptions from createStream, In ci tests - // there is a certain probability of failure in allocating memory, see memory_tracker_fault_probability. - // It may terminate this thread and leave a broken hash_join, and another thread cores when it tries to - // use the broken hash_join. So we print an exception message here to help debug. - try - { - auto & left_file = tmp_data->createStream(left_sample_block); - auto & right_file = tmp_data->createStream(prepareRightBlock(right_sample_block)); + // Exception can be thrown in number of cases: + // - during creation of temporary files for buckets + // - in CI tests, there is a certain probability of failure in allocating memory, see memory_tracker_fault_probability + // Therefore, new buckets are added only after all of them created successfully, + // otherwise we can end up having unexpected number of buckets - BucketPtr new_bucket = std::make_shared(destination.size(), left_file, right_file, log); - destination.emplace_back(std::move(new_bucket)); - } - catch (...) - { - LOG_ERROR(&Poco::Logger::get("GraceHashJoin"), "Can't create bucket. current buckets size: {}", destination.size()); - throw; - } + const size_t current_size = buckets.size(); + Buckets tmp_buckets; + tmp_buckets.reserve(bucket_count); + for (size_t i = 0; i < bucket_count; ++i) + try + { + auto & left_file = tmp_data->createStream(left_sample_block); + auto & right_file = tmp_data->createStream(prepareRightBlock(right_sample_block)); + + BucketPtr new_bucket = std::make_shared(current_size + i, left_file, right_file, log); + tmp_buckets.emplace_back(std::move(new_bucket)); + } + catch (...) + { + LOG_ERROR( + &Poco::Logger::get("GraceHashJoin"), + "Can't create bucket {} due to error: {}", + current_size + i, + getCurrentExceptionMessage(false)); + throw; + } + + buckets.reserve(buckets.size() + bucket_count); + for(auto & bucket : tmp_buckets) + buckets.emplace_back(std::move(bucket)); } void GraceHashJoin::checkTypesOfKeys(const Block & block) const diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h index fd3397ba15e..78ba70bc764 100644 --- a/src/Interpreters/GraceHashJoin.h +++ b/src/Interpreters/GraceHashJoin.h @@ -101,8 +101,9 @@ private: bool hasMemoryOverflow(const InMemoryJoinPtr & hash_join_) const; bool hasMemoryOverflow(const BlocksList & blocks) const; - /// Create new bucket at the end of @destination. - void addBucket(Buckets & destination); + /// Add bucket_count new buckets + /// Throws if a bucket creation fails + void addBuckets(size_t bucket_count); /// Increase number of buckets to match desired_size. /// Called when HashJoin in-memory table for one bucket exceeds the limits. From 57ada39ccf1615910afb48152a30ba7410152a36 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 4 Jul 2023 09:31:15 +0000 Subject: [PATCH 1155/1997] Add retries and iptables rules dump --- tests/integration/helpers/cluster.py | 7 +-- tests/integration/helpers/network.py | 11 +++- tests/integration/test_keeper_map/test.py | 66 ++++++++++++++--------- 3 files changed, 54 insertions(+), 30 deletions(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 21398790be3..3db0ad12295 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3416,13 +3416,14 @@ class ClickHouseInstance: database=database, ) time.sleep(sleep_time) + + if result is not None: + return result except QueryRuntimeException as ex: logging.debug("Retry {} got exception {}".format(i + 1, ex)) time.sleep(sleep_time) - if result is not None: - return result - raise Exception("Query {sql} did not fail".format(sql)) + raise Exception("Query {} did not fail".format(sql)) # The same as query_and_get_error but ignores successful query. def query_and_get_answer_with_error( diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py index 2df560708e0..60b46926589 100644 --- a/tests/integration/helpers/network.py +++ b/tests/integration/helpers/network.py @@ -32,6 +32,9 @@ class PartitionManager: {"destination": instance.ip_address, "source_port": 2181, "action": action} ) + def dump_rules(self): + return _NetworkManager.get().dump_rules() + def restore_instance_zk_connections(self, instance, action="DROP"): self._check_instance(instance) @@ -157,6 +160,10 @@ class _NetworkManager: cmd.extend(self._iptables_cmd_suffix(**kwargs)) self._exec_run(cmd, privileged=True) + def dump_rules(self): + cmd = ["iptables", "-L", "DOCKER-USER"] + return self._exec_run(cmd, privileged=True) + @staticmethod def clean_all_user_iptables_rules(): for i in range(1000): @@ -212,8 +219,8 @@ class _NetworkManager: def __init__( self, - container_expire_timeout=50, - container_exit_timeout=60, + container_expire_timeout=120, + container_exit_timeout=120, docker_api_version=os.environ.get("DOCKER_API_VERSION"), ): self.container_expire_timeout = container_expire_timeout diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py index fbae875d2e6..d7b4230d872 100644 --- a/tests/integration/test_keeper_map/test.py +++ b/tests/integration/test_keeper_map/test.py @@ -1,8 +1,7 @@ import pytest -import time from helpers.cluster import ClickHouseCluster -from helpers.network import PartitionManager +from helpers.network import PartitionManager, _NetworkManager test_recover_staled_replica_run = 1 @@ -39,50 +38,67 @@ def remove_children(client, path): client.delete(child_path) -def test_keeper_map_without_zk(started_cluster): - def wait_disconnect_from_zk(): - for _ in range(20): - if len(node.query_and_get_answer_with_error("SELECT * FROM system.zookeeper WHERE path='/'")[1]) != 0: - break - time.sleep(1) - else: - assert False, "ClickHouse didn't disconnect from ZK after DROP rule was added" +def print_iptables_rules(): + print(f"iptables rules: {_NetworkManager.get().dump_rules()}") - def assert_keeper_exception_after_partition(query): - with PartitionManager() as pm: - pm.drop_instance_zk_connections(node) - wait_disconnect_from_zk() - error = node.query_and_get_error(query) + +def assert_keeper_exception_after_partition(query): + with PartitionManager() as pm: + pm.drop_instance_zk_connections(node) + try: + error = node.query_and_get_error_with_retry(query, sleep_time=1) assert "Coordination::Exception" in error + except: + print_iptables_rules() + raise + +def run_query(query): + try: + result = node.query_with_retry(query, sleep_time=1) + return result + except: + print_iptables_rules() + raise + + +def test_keeper_map_without_zk(started_cluster): assert_keeper_exception_after_partition( - "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_without_zk') PRIMARY KEY(key);" + "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_keeper_map_without_zk') PRIMARY KEY(key);" ) - node.query_with_retry( - "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_without_zk') PRIMARY KEY(key);" + run_query( + "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_keeper_map_without_zk') PRIMARY KEY(key);" ) assert_keeper_exception_after_partition( "INSERT INTO test_keeper_map_without_zk VALUES (1, 11)" ) - node.query_with_retry("INSERT INTO test_keeper_map_without_zk VALUES (1, 11)") + run_query("INSERT INTO test_keeper_map_without_zk VALUES (1, 11)") assert_keeper_exception_after_partition("SELECT * FROM test_keeper_map_without_zk") - node.query_with_retry("SELECT * FROM test_keeper_map_without_zk") + assert run_query("SELECT * FROM test_keeper_map_without_zk") == "1\t11\n" with PartitionManager() as pm: pm.drop_instance_zk_connections(node) node.restart_clickhouse(60) - error = node.query_and_get_error("SELECT * FROM test_keeper_map_without_zk") - assert "Failed to activate table because of connection issues" in error + try: + error = node.query_and_get_error_with_retry( + "SELECT * FROM test_keeper_map_without_zk", sleep_time=1 + ) + assert "Failed to activate table because of connection issues" in error + except: + print_iptables_rules() + raise - node.query_with_retry("SELECT * FROM test_keeper_map_without_zk") + run_query("SELECT * FROM test_keeper_map_without_zk") client = get_genuine_zk() - remove_children(client, "/test_keeper_map/test_without_zk") + remove_children(client, "/test_keeper_map/test_keeper_map_without_zk") node.restart_clickhouse(60) - error = node.query_and_get_error("SELECT * FROM test_keeper_map_without_zk") + error = node.query_and_get_error_with_retry( + "SELECT * FROM test_keeper_map_without_zk" + ) assert "Failed to activate table because of invalid metadata in ZooKeeper" in error node.query("DETACH TABLE test_keeper_map_without_zk") From 60c6cc17fca0cf98ace414f6c81e780859439a1c Mon Sep 17 00:00:00 2001 From: yariks5s <114298166+yariks5s@users.noreply.github.com> Date: Tue, 4 Jul 2023 11:39:39 +0200 Subject: [PATCH 1156/1997] Update include brackets Co-authored-by: Sergei Trifonov --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 87c3567f7d5..1cf86f1ae4d 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1,6 +1,6 @@ #include "IMergeTreeDataPart.h" -#include "Storages/MergeTree/IDataPartStorage.h" -#include "base/types.h" +#include +#include #include #include From 0094919c436dacc4f52c97efedf2b6a166cf198b Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 4 Jul 2023 12:53:03 +0300 Subject: [PATCH 1157/1997] Add line --- tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql b/tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql index 5350ef99ed3..155596dd1d5 100644 --- a/tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql +++ b/tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql @@ -5,4 +5,4 @@ SELECT toUInt64(toUUID('00000000-0000-0000-0000-000000000000')); -- { serverErro SELECT toInt128(toUUID('00000000-0000-0000-0000-000000000000')); -- { serverError NOT_IMPLEMENTED } SELECT cast(toUUID('f82aef31-279e-431f-8b00-2899ad387aea'), 'UInt128'); select accurateCast(toUUID('f82aef31-279e-431f-8b00-2899ad387aea'), 'UInt128'); -select toUUID('f82aef31-279e-431f-8b00-2899ad387aea')::UInt128; \ No newline at end of file +select toUUID('f82aef31-279e-431f-8b00-2899ad387aea')::UInt128; From 0e4cd16582c5427cebb52fece006d16ec5e337e2 Mon Sep 17 00:00:00 2001 From: yariks5s <114298166+yariks5s@users.noreply.github.com> Date: Tue, 4 Jul 2023 11:53:23 +0200 Subject: [PATCH 1158/1997] Update include brackets --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 1cf86f1ae4d..021f624e783 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1,4 +1,4 @@ -#include "IMergeTreeDataPart.h" +#include #include #include From 0d0e59abc8672348f9a3d8386fa20454271ff618 Mon Sep 17 00:00:00 2001 From: yariks5s <114298166+yariks5s@users.noreply.github.com> Date: Tue, 4 Jul 2023 12:08:58 +0200 Subject: [PATCH 1159/1997] Update IMergeTreeDataPart.cpp --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 021f624e783..1cf86f1ae4d 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1,4 +1,4 @@ -#include +#include "IMergeTreeDataPart.h" #include #include From 9c8cb9f77ee3f087e3799b2a14a4d0db6f75f367 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 4 Jul 2023 13:20:48 +0300 Subject: [PATCH 1160/1997] Add comment to docs --- docs/en/sql-reference/functions/array-functions.md | 3 ++- docs/ru/sql-reference/functions/array-functions.md | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 7f2b8f3c605..763a61d43f2 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -142,6 +142,7 @@ range([start, ] end [, step]) - All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments. - An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting. +- Returns Null if any argument has Nullable(Nothing) type. An exception is thrown if any argument has Null value (Nullable(T) type). **Examples** @@ -878,7 +879,7 @@ A special function. See the section [“ArrayJoin function”](../../sql-referen ## arrayDifference -Calculates an array of differences between adjacent array elements. The first element of the result array will be 0, the second `a[1] - a[0]`, the third `a[2] - a[1]`, etc. The type of elements in the result array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`). +Calculates an array of differences between adjacent array elements. The first element of the result array will be 0, the second `a[1] - a[0]`, the third `a[2] - a[1]`, etc. The type of elements in the result array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`). **Syntax** diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index c43323d68fd..439eddfd752 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -145,6 +145,8 @@ range([start, ] end [, step]) - Если в результате запроса создаются массивы суммарной длиной больше, чем количество элементов, указанное настройкой [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block), то генерируется исключение. +- Возвращает Null если любой аргумент Nullable(Nothing) типа. Генерируется исключение если любой аргумент Null (Nullable(T) тип). + **Примеры** Запрос: From e99e0ebddb712ab40e22be4ab74ebd2ae3786ba0 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 4 Jul 2023 13:26:55 +0300 Subject: [PATCH 1161/1997] Update 01320_create_sync_race_condition_zookeeper.sh (#51742) --- .../01320_create_sync_race_condition_zookeeper.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh b/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh index ef45e8e63bc..aee69e64b1b 100755 --- a/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh +++ b/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh @@ -12,7 +12,10 @@ $CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 --query "CREATE DATABA function thread1() { - while true; do $CLICKHOUSE_CLIENT -n --query "CREATE TABLE test_01320.r (x UInt64) ENGINE = ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/table', 'r') ORDER BY x; DROP TABLE test_01320.r;"; done + while true; do + $CLICKHOUSE_CLIENT -n --query "CREATE TABLE test_01320.r (x UInt64) ENGINE = ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/table', 'r') ORDER BY x; + DROP TABLE test_01320.r;" 2>&1 | grep -F "Code:" | grep -v "UNKNOWN_DATABASE" + done } function thread2() From 6bca452924bf4ceecbd4106acbfc99bc49276012 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 4 Jul 2023 12:42:27 +0200 Subject: [PATCH 1162/1997] Fix tests --- src/Access/tests/gtest_access_rights_ops.cpp | 2 +- .../test_s3_table_functions/configs/users.d/users.xml | 9 +++++++++ tests/integration/test_s3_table_functions/test.py | 9 ++++++--- 3 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 tests/integration/test_s3_table_functions/configs/users.d/users.xml diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp index 5f1f13ca5a2..c2e9501f58c 100644 --- a/src/Access/tests/gtest_access_rights_ops.cpp +++ b/src/Access/tests/gtest_access_rights_ops.cpp @@ -53,7 +53,7 @@ TEST(AccessRights, Union) "SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, " "SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, " "SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, " - "SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, GRANT NAMED COLLECTION CONTROL ON db1"); + "SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, GRANT NAMED COLLECTION ADMIN ON db1"); } diff --git a/tests/integration/test_s3_table_functions/configs/users.d/users.xml b/tests/integration/test_s3_table_functions/configs/users.d/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_s3_table_functions/configs/users.d/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_s3_table_functions/test.py b/tests/integration/test_s3_table_functions/test.py index 516d6582990..a6def175136 100644 --- a/tests/integration/test_s3_table_functions/test.py +++ b/tests/integration/test_s3_table_functions/test.py @@ -11,6 +11,9 @@ node = cluster.add_instance( main_configs=[ "configs/config.d/minio.xml", ], + user_configs=[ + "configs/users.d/users.xml", + ], with_minio=True, ) @@ -44,7 +47,7 @@ def test_s3_table_functions(started_cluster): """ INSERT INTO FUNCTION s3 ( - nc_s3, + nc_s3, filename = 'test_file.tsv.gz', format = 'TSV', structure = 'number UInt64', @@ -60,7 +63,7 @@ def test_s3_table_functions(started_cluster): """ SELECT count(*) FROM s3 ( - nc_s3, + nc_s3, filename = 'test_file.tsv.gz', format = 'TSV', structure = 'number UInt64', @@ -85,7 +88,7 @@ def test_s3_table_functions_timeouts(started_cluster): """ INSERT INTO FUNCTION s3 ( - nc_s3, + nc_s3, filename = 'test_file.tsv.gz', format = 'TSV', structure = 'number UInt64', From 35c13219520b8b41c633f41423af91b9fda898e1 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 4 Jul 2023 12:35:16 +0200 Subject: [PATCH 1163/1997] Version mypy=1.4.1 falsly reports unused ignore comment --- docker/test/style/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 746cc7bb2d5..2aa0b1a62d6 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -18,7 +18,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ python3-pip \ shellcheck \ yamllint \ - && pip3 install black==23.1.0 boto3 codespell==2.2.1 dohq-artifactory mypy PyGithub unidiff pylint==2.6.2 \ + && pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \ && apt-get clean \ && rm -rf /root/.cache/pip From c9a26d43c5b053c3e5a0898e382c0405a4ecf4a6 Mon Sep 17 00:00:00 2001 From: Rory Crispin Date: Tue, 4 Jul 2023 12:06:15 +0100 Subject: [PATCH 1164/1997] (docs) Remove async_metric_log event_time_microseconds event_time_microseconds was removed from the system.asynchronous_metric_log in https://github.com/ClickHouse/ClickHouse/pull/36360 --- .../system-tables/asynchronous_metric_log.md | 25 +++++++++---------- .../system-tables/asynchronous_metric_log.md | 1 - .../system-tables/asynchronous_metric_log.md | 25 +++++++++---------- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md index 4290799b6bc..efe57a202d8 100644 --- a/docs/en/operations/system-tables/asynchronous_metric_log.md +++ b/docs/en/operations/system-tables/asynchronous_metric_log.md @@ -9,7 +9,6 @@ Columns: - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution. - `name` ([String](../../sql-reference/data-types/string.md)) — Metric name. - `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. @@ -20,18 +19,18 @@ SELECT * FROM system.asynchronous_metric_log LIMIT 10 ``` ``` text -┌─event_date─┬──────────event_time─┬────event_time_microseconds─┬─name─────────────────────────────────────┬─────value─┐ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ CPUFrequencyMHz_0 │ 2120.9 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pmuzzy │ 743 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pdirty │ 26288 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.run_intervals │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.num_runs │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.retained │ 60694528 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.mapped │ 303161344 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.resident │ 260931584 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.metadata │ 12079488 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.allocated │ 133756128 │ -└────────────┴─────────────────────┴────────────────────────────┴──────────────────────────────────────────┴───────────┘ +┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬─────value─┐ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ CPUFrequencyMHz_0 │ 2120.9 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pmuzzy │ 743 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pdirty │ 26288 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.run_intervals │ 0 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.num_runs │ 0 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.retained │ 60694528 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.mapped │ 303161344 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.resident │ 260931584 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.metadata │ 12079488 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.allocated │ 133756128 │ +└────────────┴─────────────────────┴──────────────────────────────────────────┴───────────┘ ``` **See Also** diff --git a/docs/ru/operations/system-tables/asynchronous_metric_log.md b/docs/ru/operations/system-tables/asynchronous_metric_log.md index 886fbb6cab0..5145889c95f 100644 --- a/docs/ru/operations/system-tables/asynchronous_metric_log.md +++ b/docs/ru/operations/system-tables/asynchronous_metric_log.md @@ -8,7 +8,6 @@ slug: /ru/operations/system-tables/asynchronous_metric_log Столбцы: - `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата события. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время события. -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — время события в микросекундах. - `name` ([String](../../sql-reference/data-types/string.md)) — название метрики. - `value` ([Float64](../../sql-reference/data-types/float.md)) — значение метрики. diff --git a/docs/zh/operations/system-tables/asynchronous_metric_log.md b/docs/zh/operations/system-tables/asynchronous_metric_log.md index 419ad2a7ed6..9fa399f1aed 100644 --- a/docs/zh/operations/system-tables/asynchronous_metric_log.md +++ b/docs/zh/operations/system-tables/asynchronous_metric_log.md @@ -8,7 +8,6 @@ slug: /zh/operations/system-tables/asynchronous_metric_log 列: - `event_date` ([Date](../../sql-reference/data-types/date.md)) — 事件日期。 - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 事件时间。 -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — 事件时间(微秒)。 - `name` ([String](../../sql-reference/data-types/string.md)) — 指标名。 - `value` ([Float64](../../sql-reference/data-types/float.md)) — 指标值。 @@ -17,18 +16,18 @@ slug: /zh/operations/system-tables/asynchronous_metric_log SELECT * FROM system.asynchronous_metric_log LIMIT 10 ``` ``` text -┌─event_date─┬──────────event_time─┬────event_time_microseconds─┬─name─────────────────────────────────────┬─────value─┐ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ CPUFrequencyMHz_0 │ 2120.9 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pmuzzy │ 743 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pdirty │ 26288 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.run_intervals │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.num_runs │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.retained │ 60694528 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.mapped │ 303161344 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.resident │ 260931584 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.metadata │ 12079488 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.allocated │ 133756128 │ -└────────────┴─────────────────────┴────────────────────────────┴──────────────────────────────────────────┴───────────┘ +┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬─────value─┐ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ CPUFrequencyMHz_0 │ 2120.9 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pmuzzy │ 743 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pdirty │ 26288 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.run_intervals │ 0 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.num_runs │ 0 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.retained │ 60694528 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.mapped │ 303161344 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.resident │ 260931584 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.metadata │ 12079488 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.allocated │ 133756128 │ +└────────────┴─────────────────────┴──────────────────────────────────────────┴───────────┘ ``` **另请参阅** From 463c2ba5ce13a6839dfaf12223826b9e90f143c8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 14:07:10 +0300 Subject: [PATCH 1165/1997] Revert "Add documentation for building in docker" --- docker/packager/README.md | 2 +- docker/packager/packager | 16 ++++++++-------- docs/en/development/build.md | 14 -------------- tests/ci/build_check.py | 6 +++--- tests/ci/build_report_check.py | 4 ++-- tests/ci/ci_config.py | 34 +++++++++++++++++----------------- tests/ci/report.py | 6 +++--- 7 files changed, 34 insertions(+), 48 deletions(-) diff --git a/docker/packager/README.md b/docker/packager/README.md index 3a91f9a63f0..a78feb8d7fc 100644 --- a/docker/packager/README.md +++ b/docker/packager/README.md @@ -6,7 +6,7 @@ Usage: Build deb package with `clang-14` in `debug` mode: ``` $ mkdir deb/test_output -$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --debug-build +$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --build-type=debug $ ls -l deb/test_output -rw-r--r-- 1 root root 3730 clickhouse-client_22.2.2+debug_all.deb -rw-r--r-- 1 root root 84221888 clickhouse-common-static_22.2.2+debug_amd64.deb diff --git a/docker/packager/packager b/docker/packager/packager index 3c3304165b3..1b3df858cd2 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -112,12 +112,12 @@ def run_docker_image_with_env( subprocess.check_call(cmd, shell=True) -def is_release_build(debug_build: bool, package_type: str, sanitizer: str) -> bool: - return not debug_build and package_type == "deb" and sanitizer == "" +def is_release_build(build_type: str, package_type: str, sanitizer: str) -> bool: + return build_type == "" and package_type == "deb" and sanitizer == "" def parse_env_variables( - debug_build: bool, + build_type: str, compiler: str, sanitizer: str, package_type: str, @@ -233,7 +233,7 @@ def parse_env_variables( build_target = ( f"{build_target} clickhouse-odbc-bridge clickhouse-library-bridge" ) - if is_release_build(debug_build, package_type, sanitizer): + if is_release_build(build_type, package_type, sanitizer): cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON") result.append("WITH_PERFORMANCE=1") if is_cross_arm: @@ -253,8 +253,8 @@ def parse_env_variables( if sanitizer: result.append(f"SANITIZER={sanitizer}") - if debug_build: - result.append("BUILD_TYPE=DEBUG") + if build_type: + result.append(f"BUILD_TYPE={build_type.capitalize()}") else: result.append("BUILD_TYPE=None") @@ -359,7 +359,7 @@ def parse_args() -> argparse.Namespace: help="ClickHouse git repository", ) parser.add_argument("--output-dir", type=dir_name, required=True) - parser.add_argument("--debug-build", action="store_true") + parser.add_argument("--build-type", choices=("debug", ""), default="") parser.add_argument( "--compiler", @@ -464,7 +464,7 @@ def main(): build_image(image_with_version, dockerfile) env_prepared = parse_env_variables( - args.debug_build, + args.build_type, args.compiler, args.sanitizer, args.package_type, diff --git a/docs/en/development/build.md b/docs/en/development/build.md index ab47ee8aac5..83a4550df88 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -13,20 +13,6 @@ Supported platforms: - AArch64 - Power9 (experimental) -## Building in docker -We use the docker image `clickhouse/binary-builder` for our CI builds. It contains everything necessary to build the binary and packages. There is a script `docker/packager/packager` to ease the image usage: - -```bash -# define a directory for the output artifacts -output_dir="build_results" -# a simplest build -./docker/packager/packager --package-type=binary --output-dir "$output_dir" -# build debian packages -./docker/packager/packager --package-type=deb --output-dir "$output_dir" -# by default, debian packages use thin LTO, so we can override it to speed up the build -CMAKE_FLAGS='-DENABLE_THINLTO=' ./docker/packager/packager --package-type=deb --output-dir "$output_dir" -``` - ## Building on Ubuntu The following tutorial is based on Ubuntu Linux. diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 2a636faf967..35b98a7c3bb 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -45,7 +45,7 @@ def _can_export_binaries(build_config: BuildConfig) -> bool: return False if build_config["sanitizer"] != "": return True - if build_config["debug_build"]: + if build_config["build_type"] != "": return True return False @@ -66,8 +66,8 @@ def get_packager_cmd( f"--package-type={package_type} --compiler={comp}" ) - if build_config["debug_build"]: - cmd += " --debug-build" + if build_config["build_type"]: + cmd += f" --build-type={build_config['build_type']}" if build_config["sanitizer"]: cmd += f" --sanitizer={build_config['sanitizer']}" if build_config["tidy"] == "enable": diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 295b6cf9740..1362f3c8934 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -70,7 +70,7 @@ def get_failed_report( message = f"{job_name} failed" build_result = BuildResult( compiler="unknown", - debug_build=False, + build_type="unknown", sanitizer="unknown", status=message, elapsed_seconds=0, @@ -85,7 +85,7 @@ def process_report( build_config = build_report["build_config"] build_result = BuildResult( compiler=build_config["compiler"], - debug_build=build_config["debug_build"], + build_type=build_config["build_type"], sanitizer=build_config["sanitizer"], status="success" if build_report["status"] else "failure", elapsed_seconds=build_report["elapsed_seconds"], diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 875c5a3c8bd..c680b5810fc 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -10,7 +10,7 @@ CI_CONFIG = { "build_config": { "package_release": { "compiler": "clang-16", - "debug_build": False, + "build_type": "", "sanitizer": "", "package_type": "deb", "static_binary_name": "amd64", @@ -21,7 +21,7 @@ CI_CONFIG = { }, "coverity": { "compiler": "clang-16", - "debug_build": False, + "build_type": "", "sanitizer": "", "package_type": "coverity", "tidy": "disable", @@ -31,7 +31,7 @@ CI_CONFIG = { }, "package_aarch64": { "compiler": "clang-16-aarch64", - "debug_build": False, + "build_type": "", "sanitizer": "", "package_type": "deb", "static_binary_name": "aarch64", @@ -42,7 +42,7 @@ CI_CONFIG = { }, "package_asan": { "compiler": "clang-16", - "debug_build": False, + "build_type": "", "sanitizer": "address", "package_type": "deb", "tidy": "disable", @@ -51,7 +51,7 @@ CI_CONFIG = { }, "package_ubsan": { "compiler": "clang-16", - "debug_build": False, + "build_type": "", "sanitizer": "undefined", "package_type": "deb", "tidy": "disable", @@ -60,7 +60,7 @@ CI_CONFIG = { }, "package_tsan": { "compiler": "clang-16", - "debug_build": False, + "build_type": "", "sanitizer": "thread", "package_type": "deb", "tidy": "disable", @@ -69,7 +69,7 @@ CI_CONFIG = { }, "package_msan": { "compiler": "clang-16", - "debug_build": False, + "build_type": "", "sanitizer": "memory", "package_type": "deb", "tidy": "disable", @@ -78,7 +78,7 @@ CI_CONFIG = { }, "package_debug": { "compiler": "clang-16", - "debug_build": True, + "build_type": "debug", "sanitizer": "", "package_type": "deb", "tidy": "disable", @@ -87,7 +87,7 @@ CI_CONFIG = { }, "binary_release": { "compiler": "clang-16", - "debug_build": False, + "build_type": "", "sanitizer": "", "package_type": "binary", "tidy": "disable", @@ -96,7 +96,7 @@ CI_CONFIG = { }, "binary_tidy": { "compiler": "clang-16", - "debug_build": True, + "build_type": "debug", "sanitizer": "", "package_type": "binary", "static_binary_name": "debug-amd64", @@ -106,7 +106,7 @@ CI_CONFIG = { }, "binary_darwin": { "compiler": "clang-16-darwin", - "debug_build": False, + "build_type": "", "sanitizer": "", "package_type": "binary", "static_binary_name": "macos", @@ -116,7 +116,7 @@ CI_CONFIG = { }, "binary_aarch64": { "compiler": "clang-16-aarch64", - "debug_build": False, + "build_type": "", "sanitizer": "", "package_type": "binary", "tidy": "disable", @@ -125,7 +125,7 @@ CI_CONFIG = { }, "binary_aarch64_v80compat": { "compiler": "clang-16-aarch64-v80compat", - "debug_build": False, + "build_type": "", "sanitizer": "", "package_type": "binary", "static_binary_name": "aarch64v80compat", @@ -135,7 +135,7 @@ CI_CONFIG = { }, "binary_freebsd": { "compiler": "clang-16-freebsd", - "debug_build": False, + "build_type": "", "sanitizer": "", "package_type": "binary", "static_binary_name": "freebsd", @@ -145,7 +145,7 @@ CI_CONFIG = { }, "binary_darwin_aarch64": { "compiler": "clang-16-darwin-aarch64", - "debug_build": False, + "build_type": "", "sanitizer": "", "package_type": "binary", "static_binary_name": "macos-aarch64", @@ -155,7 +155,7 @@ CI_CONFIG = { }, "binary_ppc64le": { "compiler": "clang-16-ppc64le", - "debug_build": False, + "build_type": "", "sanitizer": "", "package_type": "binary", "static_binary_name": "powerpc64le", @@ -165,7 +165,7 @@ CI_CONFIG = { }, "binary_amd64_compat": { "compiler": "clang-16-amd64-compat", - "debug_build": False, + "build_type": "", "sanitizer": "", "package_type": "binary", "static_binary_name": "amd64compat", diff --git a/tests/ci/report.py b/tests/ci/report.py index 0f84fbcaeb2..a9014acec12 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -239,7 +239,7 @@ def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestRes @dataclass class BuildResult: compiler: str - debug_build: bool + build_type: str sanitizer: str status: str elapsed_seconds: int @@ -484,8 +484,8 @@ def create_build_html_report( ): row = "" row += f"{build_result.compiler}" - if build_result.debug_build: - row += "debug" + if build_result.build_type: + row += f"{build_result.build_type}" else: row += "relwithdebuginfo" if build_result.sanitizer: From fd6115f0e3ef53fb6b0a7c411de13148e8cdc10e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 4 Jul 2023 11:42:27 +0000 Subject: [PATCH 1166/1997] Fix flaky tests with timeout --- ...20_create_sync_race_condition_zookeeper.sh | 17 ++++++++----- .../0_stateless/01632_tinylog_read_write.sh | 24 ++++++++++-------- .../02481_async_insert_race_long.sh | 25 +++++++++++-------- 3 files changed, 39 insertions(+), 27 deletions(-) diff --git a/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh b/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh index aee69e64b1b..57409d782ae 100755 --- a/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh +++ b/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh @@ -12,22 +12,27 @@ $CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 --query "CREATE DATABA function thread1() { - while true; do - $CLICKHOUSE_CLIENT -n --query "CREATE TABLE test_01320.r (x UInt64) ENGINE = ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/table', 'r') ORDER BY x; - DROP TABLE test_01320.r;" 2>&1 | grep -F "Code:" | grep -v "UNKNOWN_DATABASE" + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + $CLICKHOUSE_CLIENT -n --query "CREATE TABLE test_01320.r (x UInt64) ENGINE = ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/table', 'r') ORDER BY x; DROP TABLE test_01320.r;" done } function thread2() { - while true; do $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA test_01320.r" 2>/dev/null; done + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA test_01320.r" 2>/dev/null; + done } export -f thread1 export -f thread2 -timeout 10 bash -c thread1 & -timeout 10 bash -c thread2 & +TIMEOUT=10 + +thread1 $TIMEOUT & +thread2 $TIMEOUT & wait diff --git a/tests/queries/0_stateless/01632_tinylog_read_write.sh b/tests/queries/0_stateless/01632_tinylog_read_write.sh index 69f985a9d0d..10625ec5d27 100755 --- a/tests/queries/0_stateless/01632_tinylog_read_write.sh +++ b/tests/queries/0_stateless/01632_tinylog_read_write.sh @@ -11,14 +11,16 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --multiquery --query "DROP TABLE IF EXISTS test; CREATE TABLE IF NOT EXISTS test (x UInt64, s Array(Nullable(String))) ENGINE = TinyLog;" function thread_select { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do $CLICKHOUSE_CLIENT --local_filesystem_read_method pread --query "SELECT * FROM test FORMAT Null" sleep 0.0$RANDOM done } function thread_insert { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$1" ]; do $CLICKHOUSE_CLIENT --query "INSERT INTO test VALUES (1, ['Hello'])" sleep 0.0$RANDOM done @@ -30,15 +32,17 @@ export -f thread_insert # Do randomized queries and expect nothing extraordinary happens. -timeout 10 bash -c 'thread_select' & -timeout 10 bash -c 'thread_select' & -timeout 10 bash -c 'thread_select' & -timeout 10 bash -c 'thread_select' & +TIMEOUT=10 -timeout 10 bash -c 'thread_insert' & -timeout 10 bash -c 'thread_insert' & -timeout 10 bash -c 'thread_insert' & -timeout 10 bash -c 'thread_insert' & +thread_select $TIMEOUT & +thread_select $TIMEOUT & +thread_select $TIMEOUT & +thread_select $TIMEOUT & + +thread_insert $TIMEOUT & +thread_insert $TIMEOUT & +thread_insert $TIMEOUT & +thread_insert $TIMEOUT & wait echo "Done" diff --git a/tests/queries/0_stateless/02481_async_insert_race_long.sh b/tests/queries/0_stateless/02481_async_insert_race_long.sh index cec9278c127..c4b026c6aba 100755 --- a/tests/queries/0_stateless/02481_async_insert_race_long.sh +++ b/tests/queries/0_stateless/02481_async_insert_race_long.sh @@ -11,21 +11,24 @@ export MY_CLICKHOUSE_CLIENT="$CLICKHOUSE_CLIENT --async_insert_busy_timeout_ms 1 function insert1() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${MY_CLICKHOUSE_CLIENT} --wait_for_async_insert 0 -q 'INSERT INTO async_inserts_race FORMAT CSV 1,"a"' done } function insert2() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${MY_CLICKHOUSE_CLIENT} --wait_for_async_insert 0 -q 'INSERT INTO async_inserts_race FORMAT JSONEachRow {"id": 5, "s": "e"} {"id": 6, "s": "f"}' done } function insert3() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${MY_CLICKHOUSE_CLIENT} --wait_for_async_insert 1 -q "INSERT INTO async_inserts_race VALUES (7, 'g') (8, 'h')" & sleep 0.05 done @@ -33,29 +36,29 @@ function insert3() function select1() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${MY_CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts_race FORMAT Null" done - } ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts_race" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts_race (id UInt32, s String) ENGINE = MergeTree ORDER BY id" -TIMEOUT=10 - export -f insert1 export -f insert2 export -f insert3 export -f select1 +TIMEOUT=10 + for _ in {1..3}; do - timeout $TIMEOUT bash -c insert1 & - timeout $TIMEOUT bash -c insert2 & - timeout $TIMEOUT bash -c insert3 & + insert1 $TIMEOUT & + insert2 $TIMEOUT & + insert3 $TIMEOUT & done -timeout $TIMEOUT bash -c select1 & +select1 $TIMEOUT & wait echo "OK" From 5230503651e091f4bf1944a1176aeac47c834907 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 4 Jul 2023 12:59:25 +0000 Subject: [PATCH 1167/1997] remove unused type: ignore --- tests/ci/commit_status_helper.py | 2 +- tests/ci/github_helper.py | 2 +- tests/ci/merge_pr.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 4437ddc52c6..efe149b0aa4 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -7,7 +7,7 @@ from typing import Dict, List, Literal, Optional, Union import logging from github import Github -from github.GithubObject import _NotSetType, NotSet as NotSet # type: ignore +from github.GithubObject import _NotSetType, NotSet as NotSet from github.Commit import Commit from github.CommitStatus import CommitStatus from github.IssueComment import IssueComment diff --git a/tests/ci/github_helper.py b/tests/ci/github_helper.py index 834c8247cb8..f7256e523b1 100644 --- a/tests/ci/github_helper.py +++ b/tests/ci/github_helper.py @@ -111,7 +111,7 @@ class GitHub(github.Github): # See https://github.com/PyGithub/PyGithub/issues/2202, # obj._rawData doesn't spend additional API requests # pylint: disable=protected-access - repo_url = issue._rawData["repository_url"] # type: ignore + repo_url = issue._rawData["repository_url"] if repo_url not in repos: repos[repo_url] = issue.repository prs.append( diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 2d6d81a152a..14844ed9b25 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -154,7 +154,7 @@ def get_workflows_for_head(repo: Repository, head_sha: str) -> List[WorkflowRun] return list( PaginatedList( WorkflowRun, - repo._requester, # type:ignore # pylint:disable=protected-access + repo._requester, # pylint:disable=protected-access f"{repo.url}/actions/runs", {"head_sha": head_sha}, list_item="workflow_runs", From c979113ea5072366f61a678b5542558ee9583291 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 4 Jul 2023 16:03:09 +0200 Subject: [PATCH 1168/1997] Fix --- src/Interpreters/Cache/Metadata.cpp | 41 +++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index decc69bb81f..bfaa00eac2c 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -333,11 +333,11 @@ class DownloadQueue { friend struct CacheMetadata; public: - void add(std::weak_ptr file_segment) + void add(FileSegmentPtr file_segment) { { std::lock_guard lock(mutex); - queue.push(file_segment); + queue.emplace(file_segment->key(), file_segment->offset(), file_segment); } CurrentMetrics::add(CurrentMetrics::FilesystemCacheDownloadQueueElements); @@ -356,8 +356,19 @@ private: std::mutex mutex; std::condition_variable cv; - std::queue> queue; bool cancelled = false; + + struct DownloadInfo + { + CacheMetadata::Key key; + size_t offset; + /// We keep weak pointer to file segment + /// instead of just getting it from file_segment_metadata, + /// because file segment at key:offset count be removed and added back to metadata + /// before we actually started background download. + std::weak_ptr file_segment; + }; + std::queue queue; }; void CacheMetadata::downloadThreadFunc() @@ -365,6 +376,8 @@ void CacheMetadata::downloadThreadFunc() std::optional> memory; while (true) { + Key key; + size_t offset; std::weak_ptr file_segment_weak; { @@ -379,7 +392,11 @@ void CacheMetadata::downloadThreadFunc() continue; } - file_segment_weak = download_queue->queue.front(); + auto entry = download_queue->queue.front(); + key = entry.key; + offset = entry.offset; + file_segment_weak = entry.file_segment; + download_queue->queue.pop(); } @@ -389,19 +406,21 @@ void CacheMetadata::downloadThreadFunc() try { { - auto file_segment = file_segment_weak.lock(); - if (!file_segment - || file_segment->state() != FileSegment::State::PARTIALLY_DOWNLOADED) - continue; - - auto locked_key = lockKeyMetadata(file_segment->key(), KeyNotFoundPolicy::RETURN_NULL); + auto locked_key = lockKeyMetadata(key, KeyNotFoundPolicy::RETURN_NULL); if (!locked_key) continue; - auto file_segment_metadata = locked_key->tryGetByOffset(file_segment->offset()); + auto file_segment_metadata = locked_key->tryGetByOffset(offset); if (!file_segment_metadata || file_segment_metadata->evicting()) continue; + auto file_segment = file_segment_weak.lock(); + + if (!file_segment + || file_segment != file_segment_metadata->file_segment + || file_segment->state() != FileSegment::State::PARTIALLY_DOWNLOADED) + continue; + holder = std::make_unique(FileSegments{file_segment}); } From ca6930eb110903709fc4c2e1cbec19a95e55ee18 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Tue, 4 Jul 2023 17:38:53 +0200 Subject: [PATCH 1169/1997] Revert "Revert "Merge pull request #50951 from ZhiguoZh/20230607-toyear-fix"" --- src/Functions/DateTimeTransforms.h | 72 +++++++ .../FunctionDateOrDateTimeToSomething.h | 13 ++ src/Functions/IFunction.h | 29 ++- src/Functions/IFunctionAdaptors.h | 7 + ...OrDateTimeConverterWithPreimageVisitor.cpp | 199 ++++++++++++++++++ ...teOrDateTimeConverterWithPreimageVisitor.h | 37 ++++ src/Interpreters/TreeOptimizer.cpp | 19 ++ ...783_date_predicate_optimizations.reference | 52 +++++ .../02783_date_predicate_optimizations.sql | 76 +++++++ ...dicate_optimizations_ast_rewrite.reference | 87 ++++++++ ...te_predicate_optimizations_ast_rewrite.sql | 47 +++++ 11 files changed, 632 insertions(+), 6 deletions(-) create mode 100644 src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp create mode 100644 src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.h create mode 100644 tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.reference create mode 100644 tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.sql diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 019e0c42cde..84c71c89b11 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -322,6 +322,7 @@ struct ToTimeImpl { throwDateTimeIsNotSupported(name); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToDateImpl; }; @@ -393,6 +394,7 @@ struct ToStartOfSecondImpl { throwDateTimeIsNotSupported(name); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -440,6 +442,7 @@ struct ToStartOfMillisecondImpl { throwDateTimeIsNotSupported(name); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -483,6 +486,7 @@ struct ToStartOfMicrosecondImpl { throwDateTimeIsNotSupported(name); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -520,6 +524,7 @@ struct ToStartOfNanosecondImpl { throwDateTimeIsNotSupported(name); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -718,6 +723,28 @@ struct ToYearImpl return time_zone.toYear(DayNum(d)); } + static inline constexpr bool hasPreimage() { return true; } + + static inline RangeOrNull getPreimage(const IDataType & type, const Field & point) + { + if (point.getType() != Field::Types::UInt64) return std::nullopt; + + auto year = point.get(); + if (year < DATE_LUT_MIN_YEAR || year >= DATE_LUT_MAX_YEAR) return std::nullopt; + + const DateLUTImpl & date_lut = DateLUT::instance(); + + auto start_time = date_lut.makeDateTime(year, 1, 1, 0, 0, 0); + auto end_time = date_lut.addYears(start_time, 1); + + if (isDateOrDate32(type) || isDateTime(type) || isDateTime64(type)) + return {std::make_pair(Field(start_time), Field(end_time))}; + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64", + type.getName(), name); + } + using FactorTransform = ZeroTransform; }; @@ -791,6 +818,7 @@ struct ToQuarterImpl { return time_zone.toQuarter(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfYearImpl; }; @@ -815,6 +843,7 @@ struct ToMonthImpl { return time_zone.toMonth(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfYearImpl; }; @@ -840,6 +869,7 @@ struct ToDayOfMonthImpl return time_zone.toDayOfMonth(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfMonthImpl; }; @@ -887,6 +917,7 @@ struct ToDayOfYearImpl { return time_zone.toDayOfYear(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfYearImpl; }; @@ -911,6 +942,7 @@ struct ToHourImpl { throwDateTimeIsNotSupported(name); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToDateImpl; }; @@ -939,6 +971,7 @@ struct TimezoneOffsetImpl throwDateTimeIsNotSupported(name); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToTimeImpl; }; @@ -962,6 +995,7 @@ struct ToMinuteImpl { throwDateTimeIsNotSupported(name); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfHourImpl; }; @@ -986,6 +1020,7 @@ struct ToSecondImpl { throwDateTimeIsNotSupported(name); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfMinuteImpl; }; @@ -1010,6 +1045,7 @@ struct ToISOYearImpl { return time_zone.toISOYear(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1066,6 +1102,7 @@ struct ToISOWeekImpl { return time_zone.toISOWeek(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToISOYearImpl; }; @@ -1108,6 +1145,7 @@ struct ToRelativeYearNumImpl { return time_zone.toYear(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1139,6 +1177,7 @@ struct ToRelativeQuarterNumImpl { return time_zone.toRelativeQuarterNum(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1170,6 +1209,7 @@ struct ToRelativeMonthNumImpl { return time_zone.toRelativeMonthNum(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1201,6 +1241,7 @@ struct ToRelativeWeekNumImpl { return time_zone.toRelativeWeekNum(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1232,6 +1273,7 @@ struct ToRelativeDayNumImpl { return static_cast(d); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1269,6 +1311,7 @@ struct ToRelativeHourNumImpl else return static_cast(time_zone.toRelativeHourNum(DayNum(d))); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1300,6 +1343,7 @@ struct ToRelativeMinuteNumImpl { return static_cast(time_zone.toRelativeMinuteNum(DayNum(d))); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1328,6 +1372,7 @@ struct ToRelativeSecondNumImpl { return static_cast(time_zone.fromDayNum(DayNum(d))); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1352,6 +1397,31 @@ struct ToYYYYMMImpl { return time_zone.toNumYYYYMM(DayNum(d)); } + static inline constexpr bool hasPreimage() { return true; } + + static inline RangeOrNull getPreimage(const IDataType & type, const Field & point) + { + if (point.getType() != Field::Types::UInt64) return std::nullopt; + + auto year_month = point.get(); + auto year = year_month / 100; + auto month = year_month % 100; + + if (year < DATE_LUT_MIN_YEAR || year > DATE_LUT_MAX_YEAR || month < 1 || month > 12 || (year == DATE_LUT_MAX_YEAR && month == 12)) + return std::nullopt; + + const DateLUTImpl & date_lut = DateLUT::instance(); + + auto start_time = date_lut.makeDateTime(year, month, 1, 0, 0, 0); + auto end_time = date_lut.addMonths(start_time, 1); + + if (isDateOrDate32(type) || isDateTime(type) || isDateTime64(type)) + return {std::make_pair(Field(start_time), Field(end_time))}; + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64", + type.getName(), name); + } using FactorTransform = ZeroTransform; }; @@ -1376,6 +1446,7 @@ struct ToYYYYMMDDImpl { return time_zone.toNumYYYYMMDD(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1400,6 +1471,7 @@ struct ToYYYYMMDDhhmmssImpl { return time_zone.toNumYYYYMMDDhhmmss(time_zone.toDate(DayNum(d))); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; diff --git a/src/Functions/FunctionDateOrDateTimeToSomething.h b/src/Functions/FunctionDateOrDateTimeToSomething.h index 82818cc3d2b..d98b788c7d7 100644 --- a/src/Functions/FunctionDateOrDateTimeToSomething.h +++ b/src/Functions/FunctionDateOrDateTimeToSomething.h @@ -7,6 +7,7 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NOT_IMPLEMENTED; } /// See DateTimeTransforms.h @@ -83,6 +84,18 @@ public: arguments[0].type->getName(), this->getName()); } + bool hasInformationAboutPreimage() const override { return Transform::hasPreimage(); } + + RangeOrNull getPreimage(const IDataType & type, const Field & point) const override + { + if constexpr (Transform::hasPreimage()) + return Transform::getPreimage(type, point); + else + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Function {} has no information about its preimage", + Transform::name); + } + }; } diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index c5b9a78015d..433cb61d04e 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include #include @@ -11,11 +13,6 @@ #include -#if USE_EMBEDDED_COMPILER -# include -#endif - - /// This file contains user interface for functions. namespace llvm @@ -35,7 +32,8 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } -class Field; +/// A left-closed and right-open interval representing the preimage of a function. +using RangeOrNull = std::optional>; /// The simplest executable object. /// Motivation: @@ -233,6 +231,12 @@ public: */ virtual bool hasInformationAboutMonotonicity() const { return false; } + /** Lets you know if the function has its definition of preimage. + * This is used to work with predicate optimizations, where the comparison between + * f(x) and a constant c could be converted to the comparison between x and f's preimage [b, e). + */ + virtual bool hasInformationAboutPreimage() const { return false; } + struct ShortCircuitSettings { /// Should we enable lazy execution for the first argument of short-circuit function? @@ -286,6 +290,14 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} has no information about its monotonicity", getName()); } + /** Get the preimage of a function in the form of a left-closed and right-open interval. Call only if hasInformationAboutPreimage. + * std::nullopt might be returned if the point (a single value) is invalid for this function. + */ + virtual RangeOrNull getPreimage(const IDataType & /*type*/, const Field & /*point*/) const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} has no information about its preimage", getName()); + } + }; using FunctionBasePtr = std::shared_ptr; @@ -475,12 +487,17 @@ public: virtual bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const = 0; virtual bool hasInformationAboutMonotonicity() const { return false; } + virtual bool hasInformationAboutPreimage() const { return false; } using Monotonicity = IFunctionBase::Monotonicity; virtual Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} has no information about its monotonicity", getName()); } + virtual RangeOrNull getPreimage(const IDataType & /*type*/, const Field & /*point*/) const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} has no information about its preimage", getName()); + } /// For non-variadic functions, return number of arguments; otherwise return zero (that should be ignored). virtual size_t getNumberOfArguments() const = 0; diff --git a/src/Functions/IFunctionAdaptors.h b/src/Functions/IFunctionAdaptors.h index 23725b1a8b1..123fdbc2f50 100644 --- a/src/Functions/IFunctionAdaptors.h +++ b/src/Functions/IFunctionAdaptors.h @@ -90,10 +90,17 @@ public: bool hasInformationAboutMonotonicity() const override { return function->hasInformationAboutMonotonicity(); } + bool hasInformationAboutPreimage() const override { return function->hasInformationAboutPreimage(); } + Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override { return function->getMonotonicityForRange(type, left, right); } + + RangeOrNull getPreimage(const IDataType & type, const Field & point) const override + { + return function->getPreimage(type, point); + } private: std::shared_ptr function; DataTypes arguments; diff --git a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp new file mode 100644 index 00000000000..a377bb4bba6 --- /dev/null +++ b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp @@ -0,0 +1,199 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/** Given a monotonic non-decreasing function f(x), which satisfies f(x) = c for any value x within [b, e). + * We could convert it into its equivalent form, x >= b AND x < e, which is free from the invocation of the function. + * And we could apply the similar transformation to other comparisons. The suggested transformations list: + * + * f(x) == c -> x >= b AND x < e + * f(x) != c -> x < b OR x >= e + * f(x) > c -> x >= e + * f(x) >= c -> x >= b + * f(x) < c -> x < b + * f(x) <= c -> x < e + * + * This function generates a new AST with the transformed relation. + */ +ASTPtr generateOptimizedDateFilterAST(const String & comparator, const NameAndTypePair & column, const std::pair& range) +{ + const DateLUTImpl & date_lut = DateLUT::instance(); + + const String & column_name = column.name; + String start_date_or_date_time; + String end_date_or_date_time; + + if (isDateOrDate32(column.type.get())) + { + start_date_or_date_time = date_lut.dateToString(range.first.get()); + end_date_or_date_time = date_lut.dateToString(range.second.get()); + } + else if (isDateTime(column.type.get()) || isDateTime64(column.type.get())) + { + start_date_or_date_time = date_lut.timeToString(range.first.get()); + end_date_or_date_time = date_lut.timeToString(range.second.get()); + } + else [[unlikely]] return {}; + + if (comparator == "equals") + { + return makeASTFunction("and", + makeASTFunction("greaterOrEquals", + std::make_shared(column_name), + std::make_shared(start_date_or_date_time) + ), + makeASTFunction("less", + std::make_shared(column_name), + std::make_shared(end_date_or_date_time) + ) + ); + } + else if (comparator == "notEquals") + { + return makeASTFunction("or", + makeASTFunction("less", + std::make_shared(column_name), + std::make_shared(start_date_or_date_time) + ), + makeASTFunction("greaterOrEquals", + std::make_shared(column_name), + std::make_shared(end_date_or_date_time) + ) + ); + } + else if (comparator == "greater") + { + return makeASTFunction("greaterOrEquals", + std::make_shared(column_name), + std::make_shared(end_date_or_date_time) + ); + } + else if (comparator == "lessOrEquals") + { + return makeASTFunction("less", + std::make_shared(column_name), + std::make_shared(end_date_or_date_time) + ); + } + else if (comparator == "less" || comparator == "greaterOrEquals") + { + return makeASTFunction(comparator, + std::make_shared(column_name), + std::make_shared(start_date_or_date_time) + ); + } + else [[unlikely]] + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected equals, notEquals, less, lessOrEquals, greater, greaterOrEquals. Actual {}", + comparator); + } +} + +void OptimizeDateOrDateTimeConverterWithPreimageMatcher::visit(const ASTFunction & function, ASTPtr & ast, const Data & data) +{ + const static std::unordered_map swap_relations = { + {"equals", "equals"}, + {"notEquals", "notEquals"}, + {"less", "greater"}, + {"greater", "less"}, + {"lessOrEquals", "greaterOrEquals"}, + {"greaterOrEquals", "lessOrEquals"}, + }; + + if (!swap_relations.contains(function.name)) return; + + if (!function.arguments || function.arguments->children.size() != 2) return; + + size_t func_id = function.arguments->children.size(); + + for (size_t i = 0; i < function.arguments->children.size(); i++) + { + if (const auto * func = function.arguments->children[i]->as()) + { + func_id = i; + } + } + + if (func_id == function.arguments->children.size()) return; + + size_t literal_id = 1 - func_id; + const auto * literal = function.arguments->children[literal_id]->as(); + + if (!literal || literal->value.getType() != Field::Types::UInt64) return; + + String comparator = literal_id > func_id ? function.name : swap_relations.at(function.name); + + const auto * ast_func = function.arguments->children[func_id]->as(); + /// Currently we only handle single-argument functions. + if (!ast_func || !ast_func->arguments || ast_func->arguments->children.size() != 1) return; + + const auto * column_id = ast_func->arguments->children.at(0)->as(); + if (!column_id) return; + + auto pos = IdentifierSemantic::getMembership(*column_id); + if (!pos) + pos = IdentifierSemantic::chooseTableColumnMatch(*column_id, data.tables, true); + if (!pos) + return; + + if (*pos >= data.tables.size()) + return; + + auto data_type_and_name = data.tables[*pos].columns.tryGetByName(column_id->shortName()); + if (!data_type_and_name) return; + + const auto & converter = FunctionFactory::instance().tryGet(ast_func->name, data.context); + if (!converter) return; + + ColumnsWithTypeAndName args; + args.emplace_back(data_type_and_name->type, "tmp"); + auto converter_base = converter->build(args); + if (!converter_base || !converter_base->hasInformationAboutPreimage()) return; + + auto preimage_range = converter_base->getPreimage(*(data_type_and_name->type), literal->value); + if (!preimage_range) return; + + const auto new_ast = generateOptimizedDateFilterAST(comparator, *data_type_and_name, *preimage_range); + if (!new_ast) return; + + ast = new_ast; +} + +bool OptimizeDateOrDateTimeConverterWithPreimageMatcher::needChildVisit(ASTPtr & ast, ASTPtr & /*child*/) +{ + const static std::unordered_set relations = { + "equals", + "notEquals", + "less", + "greater", + "lessOrEquals", + "greaterOrEquals", + }; + + if (const auto * ast_function = ast->as()) + { + return !relations.contains(ast_function->name); + } + + return true; +} + +} diff --git a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.h b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.h new file mode 100644 index 00000000000..778fa462364 --- /dev/null +++ b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ASTFunction; + +/** Replace predicate having Date/DateTime converters with their preimages to improve performance. + * Given a Date column c, toYear(c) = 2023 -> c >= '2023-01-01' AND c < '2024-01-01' + * Or if c is a DateTime column, toYear(c) = 2023 -> c >= '2023-01-01 00:00:00' AND c < '2024-01-01 00:00:00'. + * The similar optimization also applies to other converters. + */ +class OptimizeDateOrDateTimeConverterWithPreimageMatcher +{ +public: + struct Data + { + const TablesWithColumns & tables; + ContextPtr context; + }; + + static void visit(ASTPtr & ast, Data & data) + { + if (const auto * ast_function = ast->as()) + visit(*ast_function, ast, data); + } + + static void visit(const ASTFunction & function, ASTPtr & ast, const Data & data); + + static bool needChildVisit(ASTPtr & ast, ASTPtr & child); +}; + +using OptimizeDateOrDateTimeConverterWithPreimageVisitor = InDepthNodeVisitor; +} diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index c38b3c79026..fd4d2c9d846 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -677,6 +678,21 @@ void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, ContextPtr context) RemoveInjectiveFunctionsVisitor(data).visit(query); } +void optimizeDateFilters(ASTSelectQuery * select_query, const std::vector & tables_with_columns, ContextPtr context) +{ + /// Predicates in HAVING clause has been moved to WHERE clause. + if (select_query->where()) + { + OptimizeDateOrDateTimeConverterWithPreimageVisitor::Data data{tables_with_columns, context}; + OptimizeDateOrDateTimeConverterWithPreimageVisitor(data).visit(select_query->refWhere()); + } + if (select_query->prewhere()) + { + OptimizeDateOrDateTimeConverterWithPreimageVisitor::Data data{tables_with_columns, context}; + OptimizeDateOrDateTimeConverterWithPreimageVisitor(data).visit(select_query->refPrewhere()); + } +} + void transformIfStringsIntoEnum(ASTPtr & query) { std::unordered_set function_names = {"if", "transform"}; @@ -780,6 +796,9 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, tables_with_columns, result.storage_snapshot->metadata, result.storage); } + /// Rewrite date filters to avoid the calls of converters such as toYear, toYYYYMM, etc. + optimizeDateFilters(select_query, tables_with_columns, context); + /// GROUP BY injective function elimination. optimizeGroupBy(select_query, context); diff --git a/tests/queries/0_stateless/02783_date_predicate_optimizations.reference b/tests/queries/0_stateless/02783_date_predicate_optimizations.reference index cd689b93034..872a5dd1d7d 100644 --- a/tests/queries/0_stateless/02783_date_predicate_optimizations.reference +++ b/tests/queries/0_stateless/02783_date_predicate_optimizations.reference @@ -1,2 +1,54 @@ 2021-12-31 23:00:00 0 2021-12-31 23:00:00 0 +Date +2 +3 +2 +4 +1 +3 +3 +2 +1 +4 +1 +4 +DateTime +2 +3 +2 +4 +1 +3 +3 +2 +1 +4 +1 +4 +Date32 +2 +3 +2 +4 +1 +3 +3 +2 +1 +4 +1 +4 +DateTime64 +2 +3 +2 +4 +1 +3 +3 +2 +1 +4 +1 +4 diff --git a/tests/queries/0_stateless/02783_date_predicate_optimizations.sql b/tests/queries/0_stateless/02783_date_predicate_optimizations.sql index abb13f1005e..0a2fa6cc93b 100644 --- a/tests/queries/0_stateless/02783_date_predicate_optimizations.sql +++ b/tests/queries/0_stateless/02783_date_predicate_optimizations.sql @@ -11,3 +11,79 @@ INSERT INTO source values ('2021-12-31 23:00:00', 0); SELECT * FROM source WHERE toYYYYMM(ts) = 202112; SELECT * FROM source WHERE toYear(ts) = 2021; + +DROP TABLE IF EXISTS source; +CREATE TABLE source +( + `dt` Date, + `ts` DateTime, + `dt_32` Date32, + `ts_64` DateTime64(3), + `n` Int32 +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(ts) +ORDER BY tuple(); + +INSERT INTO source values ('2022-12-31', '2022-12-31 23:59:59', '2022-12-31', '2022-12-31 23:59:59.123', 0); +INSERT INTO source values ('2023-01-01', '2023-01-01 00:00:00', '2023-01-01', '2023-01-01 00:00:00.000', 1); +INSERT INTO source values ('2023-12-01', '2023-12-01 00:00:00', '2023-12-01', '2023-12-01 00:00:00.000', 2); +INSERT INTO source values ('2023-12-31', '2023-12-31 23:59:59', '2023-12-31', '2023-12-31 23:59:59.123', 3); +INSERT INTO source values ('2024-01-01', '2024-01-01 00:00:00', '2024-01-01', '2024-01-01 00:00:00.000', 4); + +SELECT 'Date'; +SELECT count(*) FROM source WHERE toYYYYMM(dt) = 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt) <> 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt) < 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt) <= 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt) > 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt) >= 202312; +SELECT count(*) FROM source WHERE toYear(dt) = 2023; +SELECT count(*) FROM source WHERE toYear(dt) <> 2023; +SELECT count(*) FROM source WHERE toYear(dt) < 2023; +SELECT count(*) FROM source WHERE toYear(dt) <= 2023; +SELECT count(*) FROM source WHERE toYear(dt) > 2023; +SELECT count(*) FROM source WHERE toYear(dt) >= 2023; + +SELECT 'DateTime'; +SELECT count(*) FROM source WHERE toYYYYMM(ts) = 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts) <> 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts) < 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts) <= 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts) > 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts) >= 202312; +SELECT count(*) FROM source WHERE toYear(ts) = 2023; +SELECT count(*) FROM source WHERE toYear(ts) <> 2023; +SELECT count(*) FROM source WHERE toYear(ts) < 2023; +SELECT count(*) FROM source WHERE toYear(ts) <= 2023; +SELECT count(*) FROM source WHERE toYear(ts) > 2023; +SELECT count(*) FROM source WHERE toYear(ts) >= 2023; + +SELECT 'Date32'; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) = 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) <> 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) < 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) <= 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) > 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) >= 202312; +SELECT count(*) FROM source WHERE toYear(dt_32) = 2023; +SELECT count(*) FROM source WHERE toYear(dt_32) <> 2023; +SELECT count(*) FROM source WHERE toYear(dt_32) < 2023; +SELECT count(*) FROM source WHERE toYear(dt_32) <= 2023; +SELECT count(*) FROM source WHERE toYear(dt_32) > 2023; +SELECT count(*) FROM source WHERE toYear(dt_32) >= 2023; + +SELECT 'DateTime64'; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) = 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) <> 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) < 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) <= 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) > 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) >= 202312; +SELECT count(*) FROM source WHERE toYear(ts_64) = 2023; +SELECT count(*) FROM source WHERE toYear(ts_64) <> 2023; +SELECT count(*) FROM source WHERE toYear(ts_64) < 2023; +SELECT count(*) FROM source WHERE toYear(ts_64) <= 2023; +SELECT count(*) FROM source WHERE toYear(ts_64) > 2023; +SELECT count(*) FROM source WHERE toYear(ts_64) >= 2023; +DROP TABLE source; diff --git a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.reference b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.reference new file mode 100644 index 00000000000..9235e7e106a --- /dev/null +++ b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.reference @@ -0,0 +1,87 @@ +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE ((date1 < \'1993-01-01\') OR (date1 >= \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (date1 >= \'1994-01-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (date1 < \'1994-01-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (date1 >= \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1998-01-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) OR ((date1 >= \'1994-01-01\') AND (date1 < \'1995-01-01\'))) AND ((id >= 1) AND (id <= 3)) +SELECT + value1, + toYear(date1) AS year1 +FROM date_t +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +PREWHERE (date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\') +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE ((id >= 1) AND (id <= 3)) AND ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) +SELECT value1 +FROM date_t +WHERE (toYYYYMM(date1) = 199300) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (toYYYYMM(date1) = 199313) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1993-12-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1992-03-01\') AND (date1 < \'1992-04-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE ((date1 < \'1992-03-01\') OR (date1 >= \'1992-04-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (date1 < \'1992-03-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (date1 >= \'1992-04-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (date1 < \'1992-04-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (date1 >= \'1992-03-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1992-03-01\') OR ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\'))) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM datetime_t +WHERE ((date1 >= \'1993-01-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM datetime_t +WHERE ((date1 >= \'1993-12-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date32_t +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date32_t +WHERE ((date1 >= \'1993-12-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM datetime64_t +WHERE ((date1 >= \'1993-01-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM datetime64_t +WHERE ((date1 >= \'1993-12-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) diff --git a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.sql b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.sql new file mode 100644 index 00000000000..266be59b0a3 --- /dev/null +++ b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.sql @@ -0,0 +1,47 @@ +DROP TABLE IF EXISTS date_t; +CREATE TABLE date_t (id UInt32, value1 String, date1 Date) ENGINE ReplacingMergeTree() ORDER BY id; + +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) <> 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) < 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) > 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) <= 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) >= 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) BETWEEN 1993 AND 1997 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE (toYear(date1) = 1993 OR toYear(date1) = 1994) AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1, toYear(date1) as year1 FROM date_t WHERE year1 = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE 1993 > toYear(date1) AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t PREWHERE toYear(date1) = 1993 WHERE id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE id BETWEEN 1 AND 3 HAVING toYear(date1) = 1993; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199300 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199313 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199203 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) <> 199203 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) < 199203 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) > 199203 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) <= 199203 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) >= 199203 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE (toYYYYMM(date1) >= 199203 OR toYear(date1) = 1993) AND id BETWEEN 1 AND 3; +DROP TABLE date_t; + +DROP TABLE IF EXISTS datetime_t; +CREATE TABLE datetime_t (id UInt32, value1 String, date1 Datetime) ENGINE ReplacingMergeTree() ORDER BY id; + +EXPLAIN SYNTAX SELECT value1 FROM datetime_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM datetime_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; +DROP TABLE datetime_t; + +DROP TABLE IF EXISTS date32_t; +CREATE TABLE date32_t (id UInt32, value1 String, date1 Date32) ENGINE ReplacingMergeTree() ORDER BY id; + +EXPLAIN SYNTAX SELECT value1 FROM date32_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date32_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; +DROP TABLE date32_t; + +DROP TABLE IF EXISTS datetime64_t; +CREATE TABLE datetime64_t (id UInt32, value1 String, date1 Datetime64) ENGINE ReplacingMergeTree() ORDER BY id; + +EXPLAIN SYNTAX SELECT value1 FROM datetime64_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM datetime64_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; +DROP TABLE datetime64_t; From 1c2233b693077bbc5ce042c46a56aadaa49aab98 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 4 Jul 2023 15:46:40 +0000 Subject: [PATCH 1170/1997] Fix style check --- src/Interpreters/GraceHashJoin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index aa7091548d7..66dc1aa7bde 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -411,7 +411,7 @@ void GraceHashJoin::addBuckets(const size_t bucket_count) } buckets.reserve(buckets.size() + bucket_count); - for(auto & bucket : tmp_buckets) + for (auto & bucket : tmp_buckets) buckets.emplace_back(std::move(bucket)); } From 3cb459bd04d141ca0fffe7c1f6c389e4be434167 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 4 Jul 2023 15:50:56 +0000 Subject: [PATCH 1171/1997] Docs: Fix description of output field NON_UNIQUE for statement SHOW INDEXES --- docs/en/sql-reference/statements/show.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index f96eb55aa45..336b93db9d5 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -283,7 +283,7 @@ The optional keyword `EXTENDED` currently has no effect, it only exists for MySQ `SHOW INDEX` produces a result table with the following structure: - table - The name of the table (String) -- non_unique - 0 if the index can contain duplicates, 1 otherwise (UInt8) +- non_unique - 0 if the index cannot contain duplicates, 1 otherwise (UInt8) - key_name - The name of the index, `PRIMARY` if the index is a primary key index (String) - seq_in_index - Currently unused - column_name - Currently unused From 59928cb4856c5a82d3aeb402fef6936bfece3d85 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 4 Jul 2023 15:50:56 +0000 Subject: [PATCH 1172/1997] Docs: Fix description of output field NON_UNIQUE for statement SHOW INDEXES --- docs/en/sql-reference/statements/show.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index f96eb55aa45..336b93db9d5 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -283,7 +283,7 @@ The optional keyword `EXTENDED` currently has no effect, it only exists for MySQ `SHOW INDEX` produces a result table with the following structure: - table - The name of the table (String) -- non_unique - 0 if the index can contain duplicates, 1 otherwise (UInt8) +- non_unique - 0 if the index cannot contain duplicates, 1 otherwise (UInt8) - key_name - The name of the index, `PRIMARY` if the index is a primary key index (String) - seq_in_index - Currently unused - column_name - Currently unused From 047060f9a41589c3b0e19338ac03e0c89d076c87 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 4 Jul 2023 16:09:30 +0000 Subject: [PATCH 1173/1997] SHOW INDEX: Make fields COMMENT and INDEX_COMMENT more compatible with MySQL --- docs/en/sql-reference/statements/show.md | 18 ++--- .../InterpreterShowIndexesQuery.cpp | 8 +- .../0_stateless/02724_show_indexes.reference | 80 +++++++++---------- 3 files changed, 53 insertions(+), 53 deletions(-) diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index 336b93db9d5..38fa63b4e1c 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -205,7 +205,7 @@ The optional keyword `EXTENDED` currently has no effect, it only exists for MySQ The optional keyword `FULL` causes the output to include the collation, comment and privilege columns. -`SHOW COLUMNS` produces a result table with the following structure: +The statement produces a result table with the following structure: - field - The name of the column (String) - type - The column data type (String) - null - If the column data type is Nullable (UInt8) @@ -281,7 +281,7 @@ equivalent. If no database is specified, the query assumes the current database The optional keyword `EXTENDED` currently has no effect, it only exists for MySQL compatibility. -`SHOW INDEX` produces a result table with the following structure: +The statement produces a result table with the following structure: - table - The name of the table (String) - non_unique - 0 if the index cannot contain duplicates, 1 otherwise (UInt8) - key_name - The name of the index, `PRIMARY` if the index is a primary key index (String) @@ -293,8 +293,8 @@ The optional keyword `EXTENDED` currently has no effect, it only exists for MySQ - packed - Currently unused - null - Currently unused - index_type - The index type, e.g. `primary`, `minmax`, `bloom_filter` etc. (String) -- comment - Currently unused -- index_comment - Currently unused +- comment - `` additional information about the index, currently always `` (empty string) (String) +- index_comment - `` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field like in MySQL (String) - visible - If the index is visible to the optimizer, always `YES` (String) - expression - The index expression (String) @@ -310,11 +310,11 @@ Result: ``` text ┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐ -│ tbl │ 0 │ blf_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ bloom_filter │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ d, b │ -│ tbl │ 0 │ mm1_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ a, c, d │ -│ tbl │ 0 │ mm2_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ c, d, e │ -│ tbl │ 0 │ PRIMARY │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ A │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ primary │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ c, a │ -│ tbl │ 0 │ set_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ set │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ e │ +│ tbl │ 0 │ blf_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ bloom_filter │ │ │ YES │ d, b │ +│ tbl │ 0 │ mm1_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ │ │ YES │ a, c, d │ +│ tbl │ 0 │ mm2_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ │ │ YES │ c, d, e │ +│ tbl │ 0 │ PRIMARY │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ A │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ primary │ │ │ YES │ c, a │ +│ tbl │ 0 │ set_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ set │ │ │ YES │ e │ └───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘ ``` diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp index 51311c82eeb..66a1b2941a3 100644 --- a/src/Interpreters/InterpreterShowIndexesQuery.cpp +++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp @@ -50,8 +50,8 @@ FROM ( NULL AS packed, NULL AS null, 'primary' AS index_type, - NULL AS comment, - NULL AS index_comment, + '' AS comment, + '' AS index_comment, 'YES' AS visible, primary_key AS expression FROM system.tables @@ -71,8 +71,8 @@ FROM ( NULL AS packed, NULL AS null, type AS index_type, - NULL AS comment, - NULL AS index_comment, + '' AS comment, + '' AS index_comment, 'YES' AS visible, expr AS expression FROM system.data_skipping_indices diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference index 8365ade3231..8d3d37eab04 100644 --- a/tests/queries/0_stateless/02724_show_indexes.reference +++ b/tests/queries/0_stateless/02724_show_indexes.reference @@ -1,47 +1,47 @@ --- Aliases of SHOW INDEX -tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter \N \N YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N set \N \N YES e -tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter \N \N YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N set \N \N YES e -tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter \N \N YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N set \N \N YES e -tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter \N \N YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N set \N \N YES e +tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter YES d, b +tbl 0 mm1_idx \N \N \N \N \N \N \N minmax YES a, c, d +tbl 0 mm2_idx \N \N \N \N \N \N \N minmax YES c, d, e +tbl 0 PRIMARY \N \N A \N \N \N \N primary YES c, a +tbl 0 set_idx \N \N \N \N \N \N \N set YES e +tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter YES d, b +tbl 0 mm1_idx \N \N \N \N \N \N \N minmax YES a, c, d +tbl 0 mm2_idx \N \N \N \N \N \N \N minmax YES c, d, e +tbl 0 PRIMARY \N \N A \N \N \N \N primary YES c, a +tbl 0 set_idx \N \N \N \N \N \N \N set YES e +tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter YES d, b +tbl 0 mm1_idx \N \N \N \N \N \N \N minmax YES a, c, d +tbl 0 mm2_idx \N \N \N \N \N \N \N minmax YES c, d, e +tbl 0 PRIMARY \N \N A \N \N \N \N primary YES c, a +tbl 0 set_idx \N \N \N \N \N \N \N set YES e +tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter YES d, b +tbl 0 mm1_idx \N \N \N \N \N \N \N minmax YES a, c, d +tbl 0 mm2_idx \N \N \N \N \N \N \N minmax YES c, d, e +tbl 0 PRIMARY \N \N A \N \N \N \N primary YES c, a +tbl 0 set_idx \N \N \N \N \N \N \N set YES e --- EXTENDED -tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter \N \N YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N set \N \N YES e +tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter YES d, b +tbl 0 mm1_idx \N \N \N \N \N \N \N minmax YES a, c, d +tbl 0 mm2_idx \N \N \N \N \N \N \N minmax YES c, d, e +tbl 0 PRIMARY \N \N A \N \N \N \N primary YES c, a +tbl 0 set_idx \N \N \N \N \N \N \N set YES e --- WHERE -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e +tbl 0 mm1_idx \N \N \N \N \N \N \N minmax YES a, c, d +tbl 0 mm2_idx \N \N \N \N \N \N \N minmax YES c, d, e --- Check with weird table names -$4@^7 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c -NULL 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c -\' 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c -\' 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c +$4@^7 0 PRIMARY \N \N A \N \N \N \N primary YES c +NULL 0 PRIMARY \N \N A \N \N \N \N primary YES c +\' 0 PRIMARY \N \N A \N \N \N \N primary YES c +\' 0 PRIMARY \N \N A \N \N \N \N primary YES c --- Original table -tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter \N \N YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N set \N \N YES e +tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter YES d, b +tbl 0 mm1_idx \N \N \N \N \N \N \N minmax YES a, c, d +tbl 0 mm2_idx \N \N \N \N \N \N \N minmax YES c, d, e +tbl 0 PRIMARY \N \N A \N \N \N \N primary YES c, a +tbl 0 set_idx \N \N \N \N \N \N \N set YES e --- Equally named table in other database -tbl 0 mmi_idx \N \N \N \N \N \N \N minmax \N \N YES b -tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES a +tbl 0 mmi_idx \N \N \N \N \N \N \N minmax YES b +tbl 0 PRIMARY \N \N A \N \N \N \N primary YES a --- Short form -tbl 0 mmi_idx \N \N \N \N \N \N \N minmax \N \N YES b -tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES a +tbl 0 mmi_idx \N \N \N \N \N \N \N minmax YES b +tbl 0 PRIMARY \N \N A \N \N \N \N primary YES a From e9e3f87ed2ddc08f49a62ef3d880df203a3cd4e1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 4 Jul 2023 16:19:33 +0000 Subject: [PATCH 1174/1997] SHOW INDEX: Make fields INDEX_TYPE more compatible with MySQL --- docs/en/sql-reference/statements/show.md | 12 +-- .../InterpreterShowIndexesQuery.cpp | 4 +- .../0_stateless/02724_show_indexes.reference | 78 +++++++++---------- 3 files changed, 46 insertions(+), 48 deletions(-) diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index 38fa63b4e1c..c73782efbbf 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -292,7 +292,7 @@ The statement produces a result table with the following structure: - sub_part - Currently unused - packed - Currently unused - null - Currently unused -- index_type - The index type, e.g. `primary`, `minmax`, `bloom_filter` etc. (String) +- index_type - The index type, e.g. `PRIMARY`, `MINMAX`, `BLOOM_FILTER` etc. (String) - comment - `` additional information about the index, currently always `` (empty string) (String) - index_comment - `` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field like in MySQL (String) - visible - If the index is visible to the optimizer, always `YES` (String) @@ -310,11 +310,11 @@ Result: ``` text ┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐ -│ tbl │ 0 │ blf_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ bloom_filter │ │ │ YES │ d, b │ -│ tbl │ 0 │ mm1_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ │ │ YES │ a, c, d │ -│ tbl │ 0 │ mm2_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ │ │ YES │ c, d, e │ -│ tbl │ 0 │ PRIMARY │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ A │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ primary │ │ │ YES │ c, a │ -│ tbl │ 0 │ set_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ set │ │ │ YES │ e │ +│ tbl │ 0 │ blf_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ BLOOM_FILTER │ │ │ YES │ d, b │ +│ tbl │ 0 │ mm1_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ a, c, d │ +│ tbl │ 0 │ mm2_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ c, d, e │ +│ tbl │ 0 │ PRIMARY │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ A │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ c, a │ +│ tbl │ 0 │ set_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ SET │ │ │ YES │ e │ └───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘ ``` diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp index 66a1b2941a3..fc31b6ef257 100644 --- a/src/Interpreters/InterpreterShowIndexesQuery.cpp +++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp @@ -49,7 +49,7 @@ FROM ( NULL AS sub_part, NULL AS packed, NULL AS null, - 'primary' AS index_type, + 'PRIMARY' AS index_type, '' AS comment, '' AS index_comment, 'YES' AS visible, @@ -70,7 +70,7 @@ FROM ( NULL AS sub_part, NULL AS packed, NULL AS null, - type AS index_type, + upper(type) AS index_type, '' AS comment, '' AS index_comment, 'YES' AS visible, diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference index 8d3d37eab04..8872d74bbf2 100644 --- a/tests/queries/0_stateless/02724_show_indexes.reference +++ b/tests/queries/0_stateless/02724_show_indexes.reference @@ -1,47 +1,45 @@ --- Aliases of SHOW INDEX -tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N primary YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N set YES e -tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N primary YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N set YES e -tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N primary YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N set YES e -tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N primary YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N set YES e +tbl 0 blf_idx \N \N \N \N \N \N \N BLOOM_FILTER YES d, b +tbl 0 mm1_idx \N \N \N \N \N \N \N MINMAX YES a, c, d +tbl 0 mm2_idx \N \N \N \N \N \N \N MINMAX YES c, d, e +tbl 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c, a +tbl 0 set_idx \N \N \N \N \N \N \N SET YES e +tbl 0 blf_idx \N \N \N \N \N \N \N BLOOM_FILTER YES d, b +tbl 0 mm1_idx \N \N \N \N \N \N \N MINMAX YES a, c, d +tbl 0 mm2_idx \N \N \N \N \N \N \N MINMAX YES c, d, e +tbl 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c, a +tbl 0 set_idx \N \N \N \N \N \N \N SET YES e +tbl 0 blf_idx \N \N \N \N \N \N \N BLOOM_FILTER YES d, b +tbl 0 mm1_idx \N \N \N \N \N \N \N MINMAX YES a, c, d +tbl 0 mm2_idx \N \N \N \N \N \N \N MINMAX YES c, d, e +tbl 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c, a +tbl 0 set_idx \N \N \N \N \N \N \N SET YES e +tbl 0 blf_idx \N \N \N \N \N \N \N BLOOM_FILTER YES d, b +tbl 0 mm1_idx \N \N \N \N \N \N \N MINMAX YES a, c, d +tbl 0 mm2_idx \N \N \N \N \N \N \N MINMAX YES c, d, e +tbl 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c, a +tbl 0 set_idx \N \N \N \N \N \N \N SET YES e --- EXTENDED -tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N primary YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N set YES e +tbl 0 blf_idx \N \N \N \N \N \N \N BLOOM_FILTER YES d, b +tbl 0 mm1_idx \N \N \N \N \N \N \N MINMAX YES a, c, d +tbl 0 mm2_idx \N \N \N \N \N \N \N MINMAX YES c, d, e +tbl 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c, a +tbl 0 set_idx \N \N \N \N \N \N \N SET YES e --- WHERE -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax YES c, d, e --- Check with weird table names -$4@^7 0 PRIMARY \N \N A \N \N \N \N primary YES c -NULL 0 PRIMARY \N \N A \N \N \N \N primary YES c -\' 0 PRIMARY \N \N A \N \N \N \N primary YES c -\' 0 PRIMARY \N \N A \N \N \N \N primary YES c +$4@^7 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c +NULL 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c +\' 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c +\' 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c --- Original table -tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N primary YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N set YES e +tbl 0 blf_idx \N \N \N \N \N \N \N BLOOM_FILTER YES d, b +tbl 0 mm1_idx \N \N \N \N \N \N \N MINMAX YES a, c, d +tbl 0 mm2_idx \N \N \N \N \N \N \N MINMAX YES c, d, e +tbl 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c, a +tbl 0 set_idx \N \N \N \N \N \N \N SET YES e --- Equally named table in other database -tbl 0 mmi_idx \N \N \N \N \N \N \N minmax YES b -tbl 0 PRIMARY \N \N A \N \N \N \N primary YES a +tbl 0 mmi_idx \N \N \N \N \N \N \N MINMAX YES b +tbl 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES a --- Short form -tbl 0 mmi_idx \N \N \N \N \N \N \N minmax YES b -tbl 0 PRIMARY \N \N A \N \N \N \N primary YES a +tbl 0 mmi_idx \N \N \N \N \N \N \N MINMAX YES b +tbl 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES a From a8511a0be5bcbf78230f9489064fa6512030347c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 4 Jul 2023 16:30:18 +0000 Subject: [PATCH 1175/1997] Fix description of 'comment' field --- docs/en/sql-reference/statements/show.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index c73782efbbf..cd691a6ff27 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -293,7 +293,7 @@ The statement produces a result table with the following structure: - packed - Currently unused - null - Currently unused - index_type - The index type, e.g. `PRIMARY`, `MINMAX`, `BLOOM_FILTER` etc. (String) -- comment - `` additional information about the index, currently always `` (empty string) (String) +- comment - Additional information about the index, currently always `` (empty string) (String) - index_comment - `` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field like in MySQL (String) - visible - If the index is visible to the optimizer, always `YES` (String) - expression - The index expression (String) From 6aab7577ff8af246d1d1f778dd41121b19a04fa8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 4 Jul 2023 16:33:33 +0000 Subject: [PATCH 1176/1997] Document 'packed' field --- docs/en/sql-reference/statements/show.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index cd691a6ff27..e86746585a7 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -290,7 +290,7 @@ The statement produces a result table with the following structure: - collation - The sorting of the column in the index, `A` if ascending, `D` if descending, `NULL` if unsorted (Nullable(String)) - cardinality - Currently unused - sub_part - Currently unused -- packed - Currently unused +- packed - Always `NULL` because ClickHouse does not support packed (prefix-compressed) indexes like MySQL (Nullable(String)) - null - Currently unused - index_type - The index type, e.g. `PRIMARY`, `MINMAX`, `BLOOM_FILTER` etc. (String) - comment - Additional information about the index, currently always `` (empty string) (String) From eb86f274822154c49863dd6ad4a3952f74c3fdb2 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 4 Jul 2023 16:37:15 +0000 Subject: [PATCH 1177/1997] Document field 'sub_part' --- docs/en/sql-reference/statements/show.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index e86746585a7..61cca8b4565 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -289,7 +289,7 @@ The statement produces a result table with the following structure: - column_name - Currently unused - collation - The sorting of the column in the index, `A` if ascending, `D` if descending, `NULL` if unsorted (Nullable(String)) - cardinality - Currently unused -- sub_part - Currently unused +- sub_part - Always `NULL` because ClickHouse does not support index prefixes like MySQL (Nullable(String)) - packed - Always `NULL` because ClickHouse does not support packed (prefix-compressed) indexes like MySQL (Nullable(String)) - null - Currently unused - index_type - The index type, e.g. `PRIMARY`, `MINMAX`, `BLOOM_FILTER` etc. (String) From 5c463838b71ee0cca0493796a9742bde90b1fc42 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 4 Jul 2023 16:44:34 +0000 Subject: [PATCH 1178/1997] Improve compatibility of 'cardinality' field --- docs/en/sql-reference/statements/show.md | 12 +-- .../InterpreterShowIndexesQuery.cpp | 4 +- .../0_stateless/02724_show_indexes.reference | 76 +++++++++---------- 3 files changed, 46 insertions(+), 46 deletions(-) diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index 61cca8b4565..e13f152c0e6 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -288,7 +288,7 @@ The statement produces a result table with the following structure: - seq_in_index - Currently unused - column_name - Currently unused - collation - The sorting of the column in the index, `A` if ascending, `D` if descending, `NULL` if unsorted (Nullable(String)) -- cardinality - Currently unused +- cardinality - An estimation of the index cardinality (number of unique values in the index). Currently always 0. (UInt64) - sub_part - Always `NULL` because ClickHouse does not support index prefixes like MySQL (Nullable(String)) - packed - Always `NULL` because ClickHouse does not support packed (prefix-compressed) indexes like MySQL (Nullable(String)) - null - Currently unused @@ -310,11 +310,11 @@ Result: ``` text ┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐ -│ tbl │ 0 │ blf_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ BLOOM_FILTER │ │ │ YES │ d, b │ -│ tbl │ 0 │ mm1_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ a, c, d │ -│ tbl │ 0 │ mm2_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ c, d, e │ -│ tbl │ 0 │ PRIMARY │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ A │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ c, a │ -│ tbl │ 0 │ set_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ SET │ │ │ YES │ e │ +│ tbl │ 0 │ blf_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ BLOOM_FILTER │ │ │ YES │ d, b │ +│ tbl │ 0 │ mm1_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ a, c, d │ +│ tbl │ 0 │ mm2_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ c, d, e │ +│ tbl │ 0 │ PRIMARY │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ c, a │ +│ tbl │ 0 │ set_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ SET │ │ │ YES │ e │ └───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘ ``` diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp index fc31b6ef257..d5b34e00791 100644 --- a/src/Interpreters/InterpreterShowIndexesQuery.cpp +++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp @@ -45,7 +45,7 @@ FROM ( NULL AS seq_in_index, NULL AS column_name, 'A' AS collation, - NULL AS cardinality, + 0 AS cardinality, NULL AS sub_part, NULL AS packed, NULL AS null, @@ -66,7 +66,7 @@ FROM ( NULL AS seq_in_index, NULL AS column_name, NULL AS collation, - NULL AS cardinality, + 0 AS cardinality, NULL AS sub_part, NULL AS packed, NULL AS null, diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference index 8872d74bbf2..f3019a031af 100644 --- a/tests/queries/0_stateless/02724_show_indexes.reference +++ b/tests/queries/0_stateless/02724_show_indexes.reference @@ -1,45 +1,45 @@ --- Aliases of SHOW INDEX -tbl 0 blf_idx \N \N \N \N \N \N \N BLOOM_FILTER YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N MINMAX YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N MINMAX YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N SET YES e -tbl 0 blf_idx \N \N \N \N \N \N \N BLOOM_FILTER YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N MINMAX YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N MINMAX YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N SET YES e -tbl 0 blf_idx \N \N \N \N \N \N \N BLOOM_FILTER YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N MINMAX YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N MINMAX YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N SET YES e -tbl 0 blf_idx \N \N \N \N \N \N \N BLOOM_FILTER YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N MINMAX YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N MINMAX YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N SET YES e +tbl 0 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 0 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d +tbl 0 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e +tbl 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a +tbl 0 set_idx \N \N \N 0 \N \N \N SET YES e +tbl 0 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 0 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d +tbl 0 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e +tbl 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a +tbl 0 set_idx \N \N \N 0 \N \N \N SET YES e +tbl 0 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 0 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d +tbl 0 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e +tbl 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a +tbl 0 set_idx \N \N \N 0 \N \N \N SET YES e +tbl 0 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 0 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d +tbl 0 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e +tbl 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a +tbl 0 set_idx \N \N \N 0 \N \N \N SET YES e --- EXTENDED -tbl 0 blf_idx \N \N \N \N \N \N \N BLOOM_FILTER YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N MINMAX YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N MINMAX YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N SET YES e +tbl 0 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 0 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d +tbl 0 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e +tbl 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a +tbl 0 set_idx \N \N \N 0 \N \N \N SET YES e --- WHERE --- Check with weird table names -$4@^7 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c -NULL 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c -\' 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c -\' 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c +$4@^7 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c +NULL 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c +\' 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c +\' 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c --- Original table -tbl 0 blf_idx \N \N \N \N \N \N \N BLOOM_FILTER YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N MINMAX YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N MINMAX YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N SET YES e +tbl 0 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 0 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d +tbl 0 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e +tbl 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a +tbl 0 set_idx \N \N \N 0 \N \N \N SET YES e --- Equally named table in other database -tbl 0 mmi_idx \N \N \N \N \N \N \N MINMAX YES b -tbl 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES a +tbl 0 mmi_idx \N \N \N 0 \N \N \N MINMAX YES b +tbl 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES a --- Short form -tbl 0 mmi_idx \N \N \N \N \N \N \N MINMAX YES b -tbl 0 PRIMARY \N \N A \N \N \N \N PRIMARY YES a +tbl 0 mmi_idx \N \N \N 0 \N \N \N MINMAX YES b +tbl 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES a From b71043b2c9ee4d494a069b2b4f746334348c98a1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 4 Jul 2023 16:57:31 +0000 Subject: [PATCH 1179/1997] Fix field 'non_unique' --- docs/en/sql-reference/statements/show.md | 12 +-- .../InterpreterShowIndexesQuery.cpp | 4 +- .../0_stateless/02724_show_indexes.reference | 76 +++++++++---------- 3 files changed, 46 insertions(+), 46 deletions(-) diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index e13f152c0e6..2c3c56ba95a 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -283,7 +283,7 @@ The optional keyword `EXTENDED` currently has no effect, it only exists for MySQ The statement produces a result table with the following structure: - table - The name of the table (String) -- non_unique - 0 if the index cannot contain duplicates, 1 otherwise (UInt8) +- non_unique - Always `1` as ClickHouse does not support uniqueness constraints. (UInt8) - key_name - The name of the index, `PRIMARY` if the index is a primary key index (String) - seq_in_index - Currently unused - column_name - Currently unused @@ -310,11 +310,11 @@ Result: ``` text ┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐ -│ tbl │ 0 │ blf_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ BLOOM_FILTER │ │ │ YES │ d, b │ -│ tbl │ 0 │ mm1_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ a, c, d │ -│ tbl │ 0 │ mm2_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ c, d, e │ -│ tbl │ 0 │ PRIMARY │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ c, a │ -│ tbl │ 0 │ set_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ SET │ │ │ YES │ e │ +│ tbl │ 1 │ blf_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ BLOOM_FILTER │ │ │ YES │ d, b │ +│ tbl │ 1 │ mm1_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ a, c, d │ +│ tbl │ 1 │ mm2_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ c, d, e │ +│ tbl │ 1 │ PRIMARY │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ c, a │ +│ tbl │ 1 │ set_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ SET │ │ │ YES │ e │ └───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘ ``` diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp index d5b34e00791..3c001329ae3 100644 --- a/src/Interpreters/InterpreterShowIndexesQuery.cpp +++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp @@ -40,7 +40,7 @@ SELECT * FROM ( (SELECT name AS table, - 0 AS non_unique, + 1 AS non_unique, 'PRIMARY' AS key_name, NULL AS seq_in_index, NULL AS column_name, @@ -61,7 +61,7 @@ FROM ( UNION ALL ( SELECT table AS table, - 0 AS non_unique, + 1 AS non_unique, name AS key_name, NULL AS seq_in_index, NULL AS column_name, diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference index f3019a031af..69cd405ec86 100644 --- a/tests/queries/0_stateless/02724_show_indexes.reference +++ b/tests/queries/0_stateless/02724_show_indexes.reference @@ -1,45 +1,45 @@ --- Aliases of SHOW INDEX -tbl 0 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 0 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d -tbl 0 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e -tbl 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a -tbl 0 set_idx \N \N \N 0 \N \N \N SET YES e -tbl 0 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 0 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d -tbl 0 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e -tbl 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a -tbl 0 set_idx \N \N \N 0 \N \N \N SET YES e -tbl 0 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 0 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d -tbl 0 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e -tbl 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a -tbl 0 set_idx \N \N \N 0 \N \N \N SET YES e -tbl 0 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 0 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d -tbl 0 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e -tbl 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a -tbl 0 set_idx \N \N \N 0 \N \N \N SET YES e +tbl 1 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a +tbl 1 set_idx \N \N \N 0 \N \N \N SET YES e +tbl 1 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a +tbl 1 set_idx \N \N \N 0 \N \N \N SET YES e +tbl 1 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a +tbl 1 set_idx \N \N \N 0 \N \N \N SET YES e +tbl 1 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a +tbl 1 set_idx \N \N \N 0 \N \N \N SET YES e --- EXTENDED -tbl 0 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 0 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d -tbl 0 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e -tbl 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a -tbl 0 set_idx \N \N \N 0 \N \N \N SET YES e +tbl 1 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a +tbl 1 set_idx \N \N \N 0 \N \N \N SET YES e --- WHERE --- Check with weird table names -$4@^7 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c -NULL 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c -\' 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c -\' 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c +$4@^7 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c +NULL 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c +\' 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c +\' 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c --- Original table -tbl 0 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 0 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d -tbl 0 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e -tbl 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a -tbl 0 set_idx \N \N \N 0 \N \N \N SET YES e +tbl 1 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a +tbl 1 set_idx \N \N \N 0 \N \N \N SET YES e --- Equally named table in other database -tbl 0 mmi_idx \N \N \N 0 \N \N \N MINMAX YES b -tbl 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES a +tbl 1 mmi_idx \N \N \N 0 \N \N \N MINMAX YES b +tbl 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES a --- Short form -tbl 0 mmi_idx \N \N \N 0 \N \N \N MINMAX YES b -tbl 0 PRIMARY \N \N A 0 \N \N \N PRIMARY YES a +tbl 1 mmi_idx \N \N \N 0 \N \N \N MINMAX YES b +tbl 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES a From 0f68c894f54900aa323e4345bbc8c55eefd2040f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 4 Jul 2023 17:02:00 +0000 Subject: [PATCH 1180/1997] Point to existing system tables for alternatives --- docs/en/sql-reference/statements/show.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index 2c3c56ba95a..d30ded6f3dc 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -272,6 +272,10 @@ SHOW DICTIONARIES FROM db LIKE '%reg%' LIMIT 2 Displays a list of primary and data skipping indexes of a table. +This statement mostly exists for compatibility with MySQL. System tables [system.tables](../../operations/system-tables/tables.md) (for +primary keys) and [system.data_skipping_indices](../../operations/system-tables/data_skipping_indices.md) (for data skipping indices) +provide equivalent information but in a fashion more native to ClickHouse. + ```sql SHOW [EXTENDED] {INDEX | INDEXES | INDICES | KEYS } {FROM | IN} [{FROM | IN} ] [WHERE ] [INTO OUTFILE ] [FORMAT ] ``` From e84769cb23b1447dce57eb957480f7c5d7cdced8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 20:19:17 +0300 Subject: [PATCH 1181/1997] Update 02789_object_type_invalid_num_of_rows.reference --- .../0_stateless/02789_object_type_invalid_num_of_rows.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference index 7dec35f7acb..8b137891791 100644 --- a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference @@ -1 +1 @@ -0.02 + From 32ee0e7d08d0f525c69cad6df1639c3d27888a7c Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 4 Jul 2023 17:35:26 +0000 Subject: [PATCH 1182/1997] Add assert in ThreadStatus destructor for correct current_thread --- src/Common/ThreadStatus.cpp | 9 ++++++--- src/Common/ThreadStatus.h | 4 +++- src/Processors/Transforms/buildPushingToViewsChain.cpp | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index 9b0743d89c3..7a602afe7e7 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -67,8 +67,8 @@ ThreadGroup::ThreadGroup() : master_thread_id(CurrentThread::get().thread_id) {} -ThreadStatus::ThreadStatus() - : thread_id{getThreadId()} +ThreadStatus::ThreadStatus(bool check_current_thread_on_destruction_) + : thread_id{getThreadId()}, check_current_thread_on_destruction(check_current_thread_on_destruction_) { last_rusage = std::make_unique(); @@ -201,8 +201,11 @@ ThreadStatus::~ThreadStatus() /// Only change current_thread if it's currently being used by this ThreadStatus /// For example, PushingToViews chain creates and deletes ThreadStatus instances while running in the main query thread - if (current_thread == this) + if (check_current_thread_on_destruction) + { + assert(current_thread == this); current_thread = nullptr; + } } void ThreadStatus::updatePerformanceCounters() diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 061959d9f1f..6e4f074a162 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -224,8 +224,10 @@ private: Poco::Logger * log = nullptr; + bool check_current_thread_on_destruction; + public: - ThreadStatus(); + explicit ThreadStatus(bool check_current_thread_on_destruction_ = true); ~ThreadStatus(); ThreadGroupPtr getThreadGroup() const; diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 43085690519..7f7f9058f1b 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -282,7 +282,7 @@ Chain buildPushingToViewsChain( auto * original_thread = current_thread; SCOPE_EXIT({ current_thread = original_thread; }); - std::unique_ptr view_thread_status_ptr = std::make_unique(); + std::unique_ptr view_thread_status_ptr = std::make_unique(/*check_current_thread_on_destruction=*/ false); /// Copy of a ThreadStatus should be internal. view_thread_status_ptr->setInternalThread(); view_thread_status_ptr->attachToGroup(running_group); From 31ced70ced6cf8a82aac60b7cd7e9d2740aae2bf Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 4 Jul 2023 20:19:20 +0200 Subject: [PATCH 1183/1997] remove wrong commit, fix the exceptions in tests --- .../00429_long_http_bufferization.sh | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/tests/queries/0_stateless/00429_long_http_bufferization.sh b/tests/queries/0_stateless/00429_long_http_bufferization.sh index 55192422389..98dd300e6ab 100755 --- a/tests/queries/0_stateless/00429_long_http_bufferization.sh +++ b/tests/queries/0_stateless/00429_long_http_bufferization.sh @@ -15,7 +15,9 @@ function query { } function ch_url() { - ${CLICKHOUSE_CURL_COMMAND} -q -sS "${CLICKHOUSE_URL}&max_block_size=$max_block_size&$1" -d "$(query "$2")" + ${CLICKHOUSE_CURL_COMMAND} -q -sS \ + "${CLICKHOUSE_URL}${max_block_size:+"&max_block_size=$max_block_size"}&$1" \ + -d "$(query "$2")" } @@ -26,9 +28,9 @@ exception_pattern="DB::Exception:[[:print:]]*" function check_only_exception() { local res res=$(ch_url "$1" "$2") - #(echo "$res") - #(echo "$res" | wc -l) - #(echo "$res" | grep -c "$exception_pattern") + # echo "$res" + # echo "$res" | wc -l + # echo "$res" | grep -c "$exception_pattern" [[ $(echo "$res" | wc -l) -eq 1 ]] || echo FAIL 1 "$@" [[ $(echo "$res" | grep -c "$exception_pattern") -eq 1 ]] || echo FAIL 2 "$@" } @@ -36,27 +38,23 @@ function check_only_exception() { function check_last_line_exception() { local res res=$(ch_url "$1" "$2") - #echo "$res" > res - #echo "$res" | wc -c - #echo "$res" | tail -n -2 + # echo "$res" > res + # echo "$res" | wc -c + # echo "$res" | tail -n -2 [[ $(echo "$res" | tail -n -1 | grep -c "$exception_pattern") -eq 1 ]] || echo FAIL 3 "$@" [[ $(echo "$res" | head -n -1 | grep -c "$exception_pattern") -eq 0 ]] || echo FAIL 4 "$@" } function check_exception_handling() { - # it is impossible to override max_block_size, details here https://github.com/ClickHouse/ClickHouse/issues/51694 - # rebuild CLICKHOUSE_URL for one call in order to avoid using random parameters from CLICKHOUSE_URL_PARAMS - CLICKHOUSE_URL="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/?wait_end_of_query=0" \ - max_block_size=30000 \ format=TSV \ check_last_line_exception \ - "max_result_rows=400000&buffer_size=1048577&wait_end_of_query=0" 111222333444 + "max_block_size=30000&max_result_rows=400000&buffer_size=1048577&wait_end_of_query=0" 111222333444 check_only_exception "max_result_bytes=1000" 1001 check_only_exception "max_result_bytes=1000&wait_end_of_query=1" 1001 - check_only_exception "max_result_bytes=1048576&buffer_size=1048576&wait_end_of_query=0" 1048577 - check_only_exception "max_result_bytes=1048576&buffer_size=1048576&wait_end_of_query=1" 1048577 + check_last_line_exception "max_result_bytes=1048576&buffer_size=1048576&wait_end_of_query=0" 1048577 + check_only_exception "max_result_bytes=1048576&buffer_size=1048576&wait_end_of_query=1" 1048577 check_only_exception "max_result_bytes=1500000&buffer_size=2500000&wait_end_of_query=0" 1500001 check_only_exception "max_result_bytes=1500000&buffer_size=1500000&wait_end_of_query=1" 1500001 @@ -70,7 +68,6 @@ check_exception_handling # Tune setting to speed up combinatorial test -# max_block_size has no effect here, that value has been set inside CLICKHOUSE_URL max_block_size=500000 corner_sizes="1048576 $(seq 500000 1000000 3500000)" From d987b94ed48594541bf91bb42fb4f5a8ced52e1f Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 4 Jul 2023 20:51:15 +0200 Subject: [PATCH 1184/1997] fix the way how broken parts are detached --- src/Storages/MergeTree/IMergeTreeDataPart.h | 8 +- src/Storages/MergeTree/MergeTreeData.cpp | 23 +- src/Storages/MergeTree/MergeTreeData.h | 10 +- .../ReplicatedMergeTreePartCheckThread.cpp | 402 ++++++++++-------- .../ReplicatedMergeTreePartCheckThread.h | 44 +- src/Storages/StorageReplicatedMergeTree.cpp | 68 ++- 6 files changed, 317 insertions(+), 238 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index fd73d802579..1fdcbd7309c 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -242,9 +242,11 @@ public: /// Frozen by ALTER TABLE ... FREEZE ... It is used for information purposes in system.parts table. mutable std::atomic is_frozen {false}; - /// Indicated that the part was marked Outdated because it's broken, not because it's actually outdated - /// See outdateBrokenPartAndCloneToDetached(...) - mutable bool outdated_because_broken = false; + /// Indicates that the part was marked Outdated by PartCheckThread because the part was not committed to ZooKeeper + mutable bool is_unexpected_local_part = false; + + /// Indicates that the part was detached and marked Outdated because it's broken + mutable std::atomic_bool was_removed_as_broken = false; /// Flag for keep S3 data when zero-copy replication over S3 turned on. mutable bool force_keep_shared_data = false; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e9c3a7f66ae..e37d4273629 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4023,22 +4023,15 @@ void MergeTreeData::restoreAndActivatePart(const DataPartPtr & part, DataPartsLo } -void MergeTreeData::outdateBrokenPartAndCloneToDetached(const DataPartPtr & part_to_detach, const String & prefix) +void MergeTreeData::outdateUnexpectedPartAndCloneToDetached(const DataPartPtr & part_to_detach) { - auto metadata_snapshot = getInMemoryMetadataPtr(); - if (prefix.empty()) - LOG_INFO(log, "Cloning part {} to {} and making it obsolete.", part_to_detach->getDataPartStorage().getPartDirectory(), part_to_detach->name); - else - LOG_INFO(log, "Cloning part {} to {}_{} and making it obsolete.", part_to_detach->getDataPartStorage().getPartDirectory(), prefix, part_to_detach->name); - - part_to_detach->makeCloneInDetached(prefix, metadata_snapshot); + LOG_INFO(log, "Cloning part {} to unexpected_{} and making it obsolete.", part_to_detach->getDataPartStorage().getPartDirectory(), part_to_detach->name); + part_to_detach->makeCloneInDetached("unexpected", getInMemoryMetadataPtr()); DataPartsLock lock = lockParts(); + part_to_detach->is_unexpected_local_part = true; if (part_to_detach->getState() == DataPartState::Active) - { - part_to_detach->outdated_because_broken = true; removePartsFromWorkingSet(NO_TRANSACTION_RAW, {part_to_detach}, true, &lock); - } } void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeTreeData::DataPartPtr & part_to_detach, const String & prefix, bool restore_covered) @@ -4677,24 +4670,24 @@ MergeTreeData::DataPartsVector MergeTreeData::getVisibleDataPartsVectorInPartiti return res; } -MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const MergeTreePartInfo & part_info, const MergeTreeData::DataPartStates & valid_states) +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const MergeTreePartInfo & part_info, const MergeTreeData::DataPartStates & valid_states) const { auto lock = lockParts(); return getPartIfExistsUnlocked(part_info, valid_states, lock); } -MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_name, const MergeTreeData::DataPartStates & valid_states) +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_name, const MergeTreeData::DataPartStates & valid_states) const { auto lock = lockParts(); return getPartIfExistsUnlocked(part_name, valid_states, lock); } -MergeTreeData::DataPartPtr MergeTreeData::getPartIfExistsUnlocked(const String & part_name, const DataPartStates & valid_states, DataPartsLock & acquired_lock) +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExistsUnlocked(const String & part_name, const DataPartStates & valid_states, DataPartsLock & acquired_lock) const { return getPartIfExistsUnlocked(MergeTreePartInfo::fromPartName(part_name, format_version), valid_states, acquired_lock); } -MergeTreeData::DataPartPtr MergeTreeData::getPartIfExistsUnlocked(const MergeTreePartInfo & part_info, const DataPartStates & valid_states, DataPartsLock & /* acquired_lock */) +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExistsUnlocked(const MergeTreePartInfo & part_info, const DataPartStates & valid_states, DataPartsLock & /* acquired_lock */) const { auto it = data_parts_by_info.find(part_info); if (it == data_parts_by_info.end()) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index b27392b355b..d5991aaea71 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -521,10 +521,10 @@ public: DataPartsVector getDataPartsVectorInPartitionForInternalUsage(const DataPartStates & affordable_states, const String & partition_id, DataPartsLock * acquired_lock = nullptr) const; /// Returns the part with the given name and state or nullptr if no such part. - DataPartPtr getPartIfExistsUnlocked(const String & part_name, const DataPartStates & valid_states, DataPartsLock & acquired_lock); - DataPartPtr getPartIfExistsUnlocked(const MergeTreePartInfo & part_info, const DataPartStates & valid_states, DataPartsLock & acquired_lock); - DataPartPtr getPartIfExists(const String & part_name, const DataPartStates & valid_states); - DataPartPtr getPartIfExists(const MergeTreePartInfo & part_info, const DataPartStates & valid_states); + DataPartPtr getPartIfExistsUnlocked(const String & part_name, const DataPartStates & valid_states, DataPartsLock & acquired_lock) const; + DataPartPtr getPartIfExistsUnlocked(const MergeTreePartInfo & part_info, const DataPartStates & valid_states, DataPartsLock & acquired_lock) const; + DataPartPtr getPartIfExists(const String & part_name, const DataPartStates & valid_states) const; + DataPartPtr getPartIfExists(const MergeTreePartInfo & part_info, const DataPartStates & valid_states) const; /// Total size of active parts in bytes. size_t getTotalActiveSizeInBytes() const; @@ -654,7 +654,7 @@ public: virtual void forcefullyRemoveBrokenOutdatedPartFromZooKeeperBeforeDetaching(const String & /*part_name*/) {} /// Outdate broken part, set remove time to zero (remove as fast as possible) and make clone in detached directory. - void outdateBrokenPartAndCloneToDetached(const DataPartPtr & part, const String & prefix); + void outdateUnexpectedPartAndCloneToDetached(const DataPartPtr & part); /// If the part is Obsolete and not used by anybody else, immediately delete it from filesystem and remove from memory. void tryRemovePartImmediately(DataPartPtr && part); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index c495fdaf5e2..d6f8dbac883 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -131,7 +131,7 @@ size_t ReplicatedMergeTreePartCheckThread::size() const } -ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreePartCheckThread::searchForMissingPartOnOtherReplicas(const String & part_name) +bool ReplicatedMergeTreePartCheckThread::searchForMissingPartOnOtherReplicas(const String & part_name) const { auto zookeeper = storage.getZooKeeper(); @@ -198,13 +198,13 @@ ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreeP continue; LOG_INFO(log, "Found the missing part {} at {} on {}", part_name, part_on_replica, replica); - return MissingPartSearchResult::FoundAndNeedFetch; + return true; } if (part_on_replica_info.contains(part_info)) { LOG_INFO(log, "Found part {} on {} that covers the missing part {}", part_on_replica, replica, part_name); - return MissingPartSearchResult::FoundAndDontNeedFetch; + return true; } if (part_info.contains(part_on_replica_info)) @@ -227,11 +227,10 @@ ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreeP if (found_part_with_the_same_min_block && found_part_with_the_same_max_block) { - /// FIXME It may never appear LOG_INFO(log, "Found parts with the same min block and with the same max block as the missing part {} on replica {}. " "Hoping that it will eventually appear as a result of a merge. Parts: {}", part_name, replica, fmt::join(parts_found, ", ")); - return MissingPartSearchResult::FoundAndDontNeedFetch; + return true; } } } @@ -247,70 +246,9 @@ ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreeP not_found_msg = "smaller parts with either the same min block or the same max block."; LOG_ERROR(log, "No replica has part covering {} and a merge is impossible: we didn't find {}", part_name, not_found_msg); - return MissingPartSearchResult::LostForever; + return false; } -void ReplicatedMergeTreePartCheckThread::searchForMissingPartAndFetchIfPossible(const String & part_name, bool exists_in_zookeeper) -{ - auto zookeeper = storage.getZooKeeper(); - auto missing_part_search_result = searchForMissingPartOnOtherReplicas(part_name); - - /// If the part is in ZooKeeper, remove it from there and add the task to download it to the queue. - if (exists_in_zookeeper) - { - if (missing_part_search_result == MissingPartSearchResult::FoundAndNeedFetch) - { - LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally and found on other replica. Removing from ZooKeeper and queueing a fetch.", part_name); - } - else - { - LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally and not found on other replica. Removing it from ZooKeeper.", part_name); - } - - /// We cannot simply remove part from ZooKeeper, because it may be removed from virtual_part, - /// so we have to create some entry in the queue. Maybe we will execute it (by fetching part or covering part from somewhere), - /// maybe will simply replace with empty part. - storage.removePartAndEnqueueFetch(part_name, /* storage_init = */false); - } - - ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed); - - if (missing_part_search_result == MissingPartSearchResult::LostForever) - { - auto lost_part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version); - if (lost_part_info.level != 0 || lost_part_info.mutation != 0) - { - Strings source_parts; - bool part_in_queue = storage.queue.checkPartInQueueAndGetSourceParts(part_name, source_parts); - - /// If it's MERGE/MUTATION etc. we shouldn't replace result part with empty part - /// because some source parts can be lost, but some of them can exist. - if (part_in_queue && !source_parts.empty()) - { - LOG_ERROR(log, "Part {} found in queue and some source parts for it was lost. Will check all source parts.", part_name); - for (const String & source_part_name : source_parts) - enqueuePart(source_part_name); - - return; - } - } - - ThreadFuzzer::maybeInjectSleep(); - - if (storage.createEmptyPartInsteadOfLost(zookeeper, part_name)) - { - /** This situation is possible if on all the replicas where the part was, it deteriorated. - * For example, a replica that has just written it has power turned off and the data has not been written from cache to disk. - */ - LOG_ERROR(log, "Part {} is lost forever.", part_name); - ProfileEvents::increment(ProfileEvents::ReplicatedDataLoss); - } - else - { - LOG_WARNING(log, "Cannot create empty part {} instead of lost. Will retry later", part_name); - } - } -} std::pair ReplicatedMergeTreePartCheckThread::findLocalPart(const String & part_name) { @@ -335,12 +273,12 @@ std::pair ReplicatedMergeTreePartCheckThread::findLo return std::make_pair(exists_in_zookeeper, part); } -CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) +ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const String & part_name) { - LOG_INFO(log, "Checking part {}", part_name); - ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks); - + ReplicatedCheckResult result; auto [exists_in_zookeeper, part] = findLocalPart(part_name); + result.exists_in_zookeeper = exists_in_zookeeper; + result.part = part; LOG_TRACE(log, "Part {} in zookeeper: {}, locally: {}", part_name, exists_in_zookeeper, part != nullptr); @@ -351,130 +289,236 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na { /// We cannot rely on exists_in_zookeeper, because the cleanup thread is probably going to remove it from ZooKeeper /// Also, it will avoid "Cannot commit empty part: Part ... (state Outdated) already exists, but it will be deleted soon" - LOG_WARNING(log, "Part {} is Outdated, will wait for cleanup thread to handle it and check again later", part_name); time_t lifetime = time(nullptr) - outdated->remove_time; time_t max_lifetime = storage.getSettings()->old_parts_lifetime.totalSeconds(); time_t delay = lifetime >= max_lifetime ? 0 : max_lifetime - lifetime; - enqueuePart(part_name, delay + 30); - return {part_name, true, "Part is Outdated, will recheck later"}; + result.recheck_after = delay + 30; + + auto message = PreformattedMessage::create("Part {} is Outdated, will wait for cleanup thread to handle it " + "and check again after {}s", part_name, result.recheck_after); + LOG_WARNING(log, message); + result.status = {part_name, true, message.text}; + result.action = ReplicatedCheckResult::RecheckLater; + return result; } } /// We do not have this or a covering part. if (!part) { - searchForMissingPartAndFetchIfPossible(part_name, exists_in_zookeeper); - return {part_name, false, "Part is missing, will search for it"}; + result.status = {part_name, false, "Part is missing, will search for it"}; + result.action = ReplicatedCheckResult::TryFetchMissing; + return result; } /// We have this part, and it's active. We will check whether we need this part and whether it has the right data. - if (part->name == part_name) - { - auto zookeeper = storage.getZooKeeper(); - auto table_lock = storage.lockForShare(RWLockImpl::NO_QUERY, storage.getSettings()->lock_acquire_timeout_for_background_operations); - - auto local_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums( - part->getColumns(), part->checksums); - - /// The double get scheme is needed to retain compatibility with very old parts that were created - /// before the ReplicatedMergeTreePartHeader was introduced. - - String part_path = storage.replica_path + "/parts/" + part_name; - String part_znode; - /// If the part is in ZooKeeper, check its data with its checksums, and them with ZooKeeper. - if (zookeeper->tryGet(part_path, part_znode)) - { - LOG_INFO(log, "Checking data of part {}.", part_name); - - try - { - ReplicatedMergeTreePartHeader zk_part_header; - if (!part_znode.empty()) - zk_part_header = ReplicatedMergeTreePartHeader::fromString(part_znode); - else - { - String columns_znode = zookeeper->get(part_path + "/columns"); - String checksums_znode = zookeeper->get(part_path + "/checksums"); - zk_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes( - columns_znode, checksums_znode); - } - - if (local_part_header.getColumnsHash() != zk_part_header.getColumnsHash()) - throw Exception(ErrorCodes::TABLE_DIFFERS_TOO_MUCH, "Columns of local part {} are different from ZooKeeper", part_name); - - zk_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true); - - checkDataPart( - part, - true, - [this] { return need_stop.load(); }); - - if (need_stop) - { - LOG_INFO(log, "Checking part was cancelled."); - return {part_name, false, "Checking part was cancelled"}; - } - - LOG_INFO(log, "Part {} looks good.", part_name); - } - catch (const Exception & e) - { - /// Don't count the part as broken if we got known retryable exception. - /// In fact, there can be other similar situations because not all - /// of the exceptions are classified as retryable/non-retryable. But it is OK, - /// because there is a safety guard against deleting too many parts. - if (isRetryableException(e)) - throw; - - tryLogCurrentException(log, __PRETTY_FUNCTION__); - constexpr auto fmt_string = "Part {} looks broken. Removing it and will try to fetch."; - String message = fmt::format(fmt_string, part_name); - LOG_ERROR(log, fmt_string, part_name); - - /// Delete part locally. - storage.outdateBrokenPartAndCloneToDetached(part, "broken"); - - ThreadFuzzer::maybeInjectMemoryLimitException(); - ThreadFuzzer::maybeInjectSleep(); - - /// Part is broken, let's try to find it and fetch. - searchForMissingPartAndFetchIfPossible(part_name, exists_in_zookeeper); - - return {part_name, false, message}; - } - } - else if (part->modification_time + MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER < time(nullptr)) - { - /// If the part is not in ZooKeeper, delete it locally. - /// Probably, someone just wrote down the part, and has not yet added to ZK. - /// Therefore, delete only if the part is old (not very reliable). - ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed); - constexpr auto fmt_string = "Unexpected part {} in filesystem. Removing."; - String message = fmt::format(fmt_string, part_name); - LOG_ERROR(log, fmt_string, part_name); - storage.outdateBrokenPartAndCloneToDetached(part, "unexpected"); - ThreadFuzzer::maybeInjectSleep(); - return {part_name, false, message}; - } - else - { - /// TODO You need to make sure that the part is still checked after a while. - /// Otherwise, it's possible that the part was not added to ZK, - /// but remained in the filesystem and in a number of active parts. - /// And then for a long time (before restarting), the data on the replicas will be different. - - LOG_TRACE(log, "Young part {} with age {} seconds hasn't been added to ZooKeeper yet. It's ok.", part_name, (time(nullptr) - part->modification_time)); - } - } - else + if (part->name != part_name) { /// If we have a covering part, ignore all the problems with this part. /// In the worst case, errors will still appear `old_parts_lifetime` seconds in error log until the part is removed as the old one. - LOG_WARNING(log, "We have part {} covering part {}", part->name, part_name); + auto message = PreformattedMessage::create("We have part {} covering part {}, will not check", part->name, part_name); + LOG_WARNING(log, message); + result.status = {part_name, true, message.text}; + result.action = ReplicatedCheckResult::DoNothing; + return result; } - part->checkMetadata(); - return {part_name, true, ""}; + time_t current_time = time(nullptr); + auto zookeeper = storage.getZooKeeper(); + auto table_lock = storage.lockForShare(RWLockImpl::NO_QUERY, storage.getSettings()->lock_acquire_timeout_for_background_operations); + + auto local_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums( + part->getColumns(), part->checksums); + + + /// If the part is in ZooKeeper, check its data with its checksums, and them with ZooKeeper. + if (exists_in_zookeeper) + { + LOG_INFO(log, "Checking data of part {}.", part_name); + + /// The double get scheme is needed to retain compatibility with very old parts that were created + /// before the ReplicatedMergeTreePartHeader was introduced. + String part_path = storage.replica_path + "/parts/" + part_name; + String part_znode = zookeeper->get(part_path); + + try + { + ReplicatedMergeTreePartHeader zk_part_header; + if (!part_znode.empty()) + zk_part_header = ReplicatedMergeTreePartHeader::fromString(part_znode); + else + { + String columns_znode = zookeeper->get(part_path + "/columns"); + String checksums_znode = zookeeper->get(part_path + "/checksums"); + zk_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes( + columns_znode, checksums_znode); + } + + if (local_part_header.getColumnsHash() != zk_part_header.getColumnsHash()) + throw Exception(ErrorCodes::TABLE_DIFFERS_TOO_MUCH, "Columns of local part {} are different from ZooKeeper", part_name); + + zk_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true); + + checkDataPart( + part, + true, + [this] { return need_stop.load(); }); + + if (need_stop) + { + result.status = {part_name, false, "Checking part was cancelled"}; + result.action = ReplicatedCheckResult::Cancelled; + return result; + } + + part->checkMetadata(); + + LOG_INFO(log, "Part {} looks good.", part_name); + result.status = {part_name, true, ""}; + result.action = ReplicatedCheckResult::DoNothing; + return result; + } + catch (const Exception & e) + { + /// Don't count the part as broken if we got known retryable exception. + /// In fact, there can be other similar situations because not all + /// of the exceptions are classified as retryable/non-retryable. But it is OK, + /// because there is a safety guard against deleting too many parts. + if (isRetryableException(e)) + throw; + + tryLogCurrentException(log, __PRETTY_FUNCTION__); + + auto message = PreformattedMessage::create("Part {} looks broken. Removing it and will try to fetch.", part_name); + LOG_ERROR(log, message); + + /// Part is broken, let's try to find it and fetch. + result.status = {part_name, false, message}; + result.action = ReplicatedCheckResult::TryFetchMissing; + return result; + } + } + else if (part->modification_time + MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER < current_time) + { + /// If the part is not in ZooKeeper, delete it locally. + /// Probably, someone just wrote down the part, and has not yet added to ZK. + /// Therefore, delete only if the part is old (not very reliable). + constexpr auto fmt_string = "Unexpected part {} in filesystem. Removing."; + String message = fmt::format(fmt_string, part_name); + LOG_ERROR(log, fmt_string, part_name); + result.status = {part_name, false, message}; + result.action = ReplicatedCheckResult::DetachUnexpected; + return result; + } + else + { + auto message = PreformattedMessage::create("Young part {} with age {} seconds hasn't been added to ZooKeeper yet. It's ok.", + part_name, (current_time - part->modification_time)); + LOG_INFO(log, message); + result.recheck_after = part->modification_time + MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER - current_time; + result.status = {part_name, true, message}; + result.action = ReplicatedCheckResult::RecheckLater; + return result; + } +} + + +CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name) +{ + LOG_INFO(log, "Checking part {}", part_name); + ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks); + + ReplicatedCheckResult result = checkPartImpl(part_name); + switch (result.action) + { + case ReplicatedCheckResult::None: UNREACHABLE(); + case ReplicatedCheckResult::DoNothing: break; + case ReplicatedCheckResult::Cancelled: + LOG_INFO(log, "Checking part was cancelled."); + break; + + case ReplicatedCheckResult::RecheckLater: + enqueuePart(part_name, result.recheck_after); + break; + + case ReplicatedCheckResult::DetachUnexpected: + chassert(!result.exists_in_zookeeper); + ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed); + + storage.outdateUnexpectedPartAndCloneToDetached(result.part); + break; + + case ReplicatedCheckResult::TryFetchMissing: + { + ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed); + + /// If the part is in ZooKeeper, remove it from there and add the task to download it to the queue (atomically). + if (result.exists_in_zookeeper) + { + /// We cannot simply remove part from ZooKeeper, because it may be removed from virtual_part, + /// so we have to create some entry in the queue. Maybe we will execute it (by fetching part or covering part from somewhere), + /// maybe will simply replace with empty part. + if (result.part) + LOG_WARNING(log, "Part {} exists in ZooKeeper and the local part was broken. Detaching it, removing from ZooKeeper and queueing a fetch.", part_name); + else + LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally. Removing from ZooKeeper and queueing a fetch.", part_name); + + storage.removePartAndEnqueueFetch(part_name, /* storage_init = */ false); + break; + } + + chassert(!result.part); + + /// Part is not in ZooKeeper and not on disk (so there's nothing to detach or remove from ZooKeeper). + /// Probably we cannot execute some entry from the replication queue (so don't need to enqueue another one). + /// Either all replicas having the part are not active, or the part is lost forever. + bool is_lost = searchForMissingPartOnOtherReplicas(part_name); + if (is_lost) + onPartIsLostForever(part_name); + + break; + } + } + + return result.status; +} + +void ReplicatedMergeTreePartCheckThread::onPartIsLostForever(const String & part_name) +{ + auto lost_part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version); + if (lost_part_info.level != 0 || lost_part_info.mutation != 0) + { + Strings source_parts; + bool part_in_queue = storage.queue.checkPartInQueueAndGetSourceParts(part_name, source_parts); + + /// If it's MERGE/MUTATION etc. we shouldn't replace result part with empty part + /// because some source parts can be lost, but some of them can exist. + if (part_in_queue && !source_parts.empty()) + { + LOG_ERROR(log, "Part {} found in queue and some source parts for it was lost. Will check all source parts.", part_name); + for (const String & source_part_name : source_parts) + enqueuePart(source_part_name); + + return; + } + } + + ThreadFuzzer::maybeInjectSleep(); + + if (storage.createEmptyPartInsteadOfLost(storage.getZooKeeper(), part_name)) + { + /** This situation is possible if on all the replicas where the part was, it deteriorated. + * For example, a replica that has just written it has power turned off and the data has not been written from cache to disk. + */ + LOG_ERROR(log, "Part {} is lost forever.", part_name); + ProfileEvents::increment(ProfileEvents::ReplicatedDataLoss); + } + else + { + LOG_WARNING(log, "Cannot create empty part {} instead of lost. Will retry later", part_name); + constexpr time_t retry_after_seconds = 30; + enqueuePart(part_name, retry_after_seconds); + } } @@ -524,7 +568,7 @@ void ReplicatedMergeTreePartCheckThread::run() if (selected == parts_queue.end()) return; - checkPart(selected->first); + checkPartAndFix(selected->first); if (need_stop) return; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h index b86191dbf50..0a8fbc75c05 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h @@ -18,6 +18,27 @@ namespace DB class StorageReplicatedMergeTree; +struct ReplicatedCheckResult +{ + enum Action + { + None, + + Cancelled, + DoNothing, + RecheckLater, + + DetachUnexpected, + TryFetchMissing, + }; + + CheckResult status; + Action action = None; + + bool exists_in_zookeeper; + MergeTreeDataPartPtr part; + time_t recheck_after = 0; +}; /** Checks the integrity of the parts requested for validation. * @@ -44,7 +65,9 @@ public: size_t size() const; /// Check part by name - CheckResult checkPart(const String & part_name); + CheckResult checkPartAndFix(const String & part_name); + + ReplicatedCheckResult checkPartImpl(const String & part_name); std::unique_lock pausePartsCheck(); @@ -54,26 +77,13 @@ public: private: void run(); - /// Search for missing part and queue fetch if possible. Otherwise - /// remove part from zookeeper and queue. - void searchForMissingPartAndFetchIfPossible(const String & part_name, bool exists_in_zookeeper); + void onPartIsLostForever(const String & part_name); std::pair findLocalPart(const String & part_name); - enum MissingPartSearchResult - { - /// We found this part on other replica, let's fetch it. - FoundAndNeedFetch, - /// We found covering part or source part with same min and max block number - /// don't need to fetch because we should do it during normal queue processing. - FoundAndDontNeedFetch, - /// Covering part not found anywhere and exact part_name doesn't found on other - /// replicas. - LostForever, - }; - /// Search for missing part on other replicas or covering part on all replicas (including our replica). - MissingPartSearchResult searchForMissingPartOnOtherReplicas(const String & part_name); + /// Returns false if the part is lost forever. + bool searchForMissingPartOnOtherReplicas(const String & part_name) const; StorageReplicatedMergeTree & storage; String log_name; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index b1ba06c77f9..56b8d431588 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3351,6 +3351,17 @@ bool StorageReplicatedMergeTree::canExecuteFetch(const ReplicatedMergeTreeLogEnt return false; } + if (entry.source_replica.empty()) + { + auto part = getPartIfExists(entry.new_part_name, {MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated, MergeTreeDataPartState::Deleting}); + if (part && part->was_removed_as_broken) + { + disable_reason = fmt::format("Not executing fetch of part {} because we still have broken part with that name. " + "Waiting for the broken part to be removed first.", entry.new_part_name); + return false; + } + } + return true; } @@ -3731,23 +3742,44 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n { auto zookeeper = getZooKeeper(); + DataPartPtr broken_part; + auto outdate_broken_part = [this, &broken_part]() + { + if (broken_part) + return; + DataPartsLock lock = lockParts(); + if (broken_part->getState() == DataPartState::Active) + removePartsFromWorkingSet(NO_TRANSACTION_RAW, {broken_part}, true, &lock); + }; + /// We don't know exactly what happened to broken part /// and we are going to remove all covered log entries. /// It's quite dangerous, so clone covered parts to detached. auto broken_part_info = MergeTreePartInfo::fromPartName(part_name, format_version); - auto partition_range = getVisibleDataPartsVectorInPartition(getContext(), broken_part_info.partition_id); + auto partition_range = getDataPartsVectorInPartitionForInternalUsage({MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}, + broken_part_info.partition_id); for (const auto & part : partition_range) { if (!broken_part_info.contains(part->info)) continue; - /// Broken part itself either already moved to detached or does not exist. - assert(broken_part_info != part->info); - part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr()); + if (broken_part_info == part->info) + { + chassert(!broken_part); + chassert(!storage_init); + part->was_removed_as_broken = true; + part->makeCloneInDetached("broken", getInMemoryMetadataPtr()); + broken_part = part; + } + else + { + part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr()); + } } ThreadFuzzer::maybeInjectSleep(); + ThreadFuzzer::maybeInjectMemoryLimitException(); /// It's possible that queue contains entries covered by part_name. /// For example, we had GET_PART all_1_42_5 and MUTATE_PART all_1_42_5_63, @@ -3762,6 +3794,7 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n queue.removePartProducingOpsInRange(zookeeper, broken_part_info, /* covering_entry= */ {}); ThreadFuzzer::maybeInjectSleep(); + ThreadFuzzer::maybeInjectMemoryLimitException(); String part_path = fs::path(replica_path) / "parts" / part_name; @@ -3780,7 +3813,7 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n /// but we are going to remove it from /parts and add to queue again. Coordination::Stat is_lost_stat; String is_lost_value = zookeeper->get(replica_path + "/is_lost", &is_lost_stat); - assert(is_lost_value == "0"); + chassert(is_lost_value == "0"); ops.emplace_back(zkutil::makeSetRequest(replica_path + "/is_lost", is_lost_value, is_lost_stat.version)); part_create_time = stat.ctime / 1000; @@ -3802,12 +3835,8 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n ReplicatedMergeTreeMergePredicate merge_pred = queue.getMergePredicate(zookeeper, PartitionIdsHint{broken_part_info.partition_id}); if (merge_pred.isGoingToBeDropped(broken_part_info)) { - LOG_INFO(log, "Broken part {} is covered by drop range, don't need to fetch it, removing it from ZooKeeper", part_name); - - /// But we have to remove it from ZooKeeper because broken parts are not removed from ZK during Outdated parts cleanup - /// There's a chance that DROP_RANGE will remove it, but only if it was not already removed by cleanup thread - if (exists_in_zookeeper) - removePartsFromZooKeeperWithRetries({part_name}); + LOG_INFO(log, "Broken part {} is covered by drop range, don't need to fetch it", part_name); + outdate_broken_part(); return; } @@ -3836,10 +3865,11 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n zkutil::KeeperMultiException::check(rc, ops, results); - String path_created = dynamic_cast(*results.back()).path_created; - log_entry->znode_name = path_created.substr(path_created.find_last_of('/') + 1); - queue.insert(zookeeper, log_entry); - break; + /// Make the part outdated after creating the log entry. + /// Otherwise, if we failed to create the entry, cleanup thread could remove the part from ZooKeeper (leading to diverged replicas) + outdate_broken_part(); + queue_updating_task->schedule(); + return; } } @@ -6841,10 +6871,10 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZKImpl(zkutil::ZooKee { /// Broken part can be removed from zk by removePartAndEnqueueFetch(...) only. /// Removal without enqueueing a fetch leads to intersecting parts. - if (part->is_duplicate || part->outdated_because_broken) + if (part->is_duplicate || part->is_unexpected_local_part) { - LOG_WARNING(log, "Will not remove part {} from ZooKeeper (is_duplicate: {}, outdated_because_broken: {})", - part->name, part->is_duplicate, part->outdated_because_broken); + LOG_WARNING(log, "Will not remove part {} from ZooKeeper (is_duplicate: {}, is_unexpected_local_part: {})", + part->name, part->is_duplicate, part->is_unexpected_local_part); parts_to_delete_only_from_filesystem.emplace_back(part); } else @@ -8189,7 +8219,7 @@ CheckResults StorageReplicatedMergeTree::checkData(const ASTPtr & query, Context { try { - results.push_back(part_check_thread.checkPart(part->name)); + results.push_back(part_check_thread.checkPartAndFix(part->name)); } catch (const Exception & ex) { From 289d9849d408d9bd38e95d89b6434a8d6bf57664 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 22 Jun 2023 22:50:09 +0200 Subject: [PATCH 1185/1997] Allow SQL standard FETCH without OFFSET --- src/Parsers/ParserSelectQuery.cpp | 94 ++++++++++--------- .../02790_sql_standard_fetch.reference | 36 +++++++ .../0_stateless/02790_sql_standard_fetch.sql | 31 ++++++ 3 files changed, 119 insertions(+), 42 deletions(-) create mode 100644 tests/queries/0_stateless/02790_sql_standard_fetch.reference create mode 100644 tests/queries/0_stateless/02790_sql_standard_fetch.sql diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 1c48f773823..341c1ef60b4 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -292,6 +292,9 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// This is needed for TOP expression, because it can also use WITH TIES. bool limit_with_ties_occured = false; + bool has_offset_clause = false; + bool offset_clause_has_sql_standard_row_or_rows = false; /// OFFSET offset_row_count {ROW | ROWS} + /// LIMIT length | LIMIT offset, length | LIMIT count BY expr-list | LIMIT offset, length BY expr-list if (s_limit.ignore(pos, expected)) { @@ -316,6 +319,8 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (!exp_elem.parse(pos, limit_offset, expected)) return false; + + has_offset_clause = true; } else if (s_with_ties.ignore(pos, expected)) { @@ -351,60 +356,65 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else if (s_offset.ignore(pos, expected)) { - /// OFFSET offset_row_count {ROW | ROWS} FETCH {FIRST | NEXT} fetch_row_count {ROW | ROWS} {ONLY | WITH TIES} - bool offset_with_fetch_maybe = false; + /// OFFSET without LIMIT + + has_offset_clause = true; if (!exp_elem.parse(pos, limit_offset, expected)) return false; + /// SQL standard OFFSET N ROW[S] ... + + if (s_row.ignore(pos, expected)) + offset_clause_has_sql_standard_row_or_rows = true; + + if (s_rows.ignore(pos, expected)) + { + if (offset_clause_has_sql_standard_row_or_rows) + throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together"); + + offset_clause_has_sql_standard_row_or_rows = true; + } + } + + /// SQL standard FETCH (either following SQL standard OFFSET or following ORDER BY) + if ((!has_offset_clause || offset_clause_has_sql_standard_row_or_rows) + && s_fetch.ignore(pos, expected)) + { + /// FETCH clause must exist with "ORDER BY" + if (!order_expression_list) + throw Exception(ErrorCodes::OFFSET_FETCH_WITHOUT_ORDER_BY, "Can not use OFFSET FETCH clause without ORDER BY"); + + if (s_first.ignore(pos, expected)) + { + if (s_next.ignore(pos, expected)) + throw Exception(ErrorCodes::FIRST_AND_NEXT_TOGETHER, "Can not use FIRST and NEXT together"); + } + else if (!s_next.ignore(pos, expected)) + return false; + + if (!exp_elem.parse(pos, limit_length, expected)) + return false; + if (s_row.ignore(pos, expected)) { if (s_rows.ignore(pos, expected)) throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together"); - offset_with_fetch_maybe = true; } - else if (s_rows.ignore(pos, expected)) + else if (!s_rows.ignore(pos, expected)) + return false; + + if (s_with_ties.ignore(pos, expected)) { - offset_with_fetch_maybe = true; + select_query->limit_with_ties = true; } - - if (offset_with_fetch_maybe && s_fetch.ignore(pos, expected)) + else if (s_only.ignore(pos, expected)) { - /// OFFSET FETCH clause must exists with "ORDER BY" - if (!order_expression_list) - throw Exception(ErrorCodes::OFFSET_FETCH_WITHOUT_ORDER_BY, "Can not use OFFSET FETCH clause without ORDER BY"); - - if (s_first.ignore(pos, expected)) - { - if (s_next.ignore(pos, expected)) - throw Exception(ErrorCodes::FIRST_AND_NEXT_TOGETHER, "Can not use FIRST and NEXT together"); - } - else if (!s_next.ignore(pos, expected)) - return false; - - if (!exp_elem.parse(pos, limit_length, expected)) - return false; - - if (s_row.ignore(pos, expected)) - { - if (s_rows.ignore(pos, expected)) - throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together"); - } - else if (!s_rows.ignore(pos, expected)) - return false; - - if (s_with_ties.ignore(pos, expected)) - { - select_query->limit_with_ties = true; - } - else if (s_only.ignore(pos, expected)) - { - select_query->limit_with_ties = false; - } - else - { - return false; - } + select_query->limit_with_ties = false; + } + else + { + return false; } } diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.reference b/tests/queries/0_stateless/02790_sql_standard_fetch.reference new file mode 100644 index 00000000000..429eecbc936 --- /dev/null +++ b/tests/queries/0_stateless/02790_sql_standard_fetch.reference @@ -0,0 +1,36 @@ +┌─id─┬─name──┬─department─┬─salary─┐ +│ 25 │ Frank │ it │ 120 │ +│ 23 │ Henry │ it │ 104 │ +│ 24 │ Irene │ it │ 104 │ +│ 33 │ Alice │ sales │ 100 │ +│ 32 │ Dave │ sales │ 96 │ +└────┴───────┴────────────┴────────┘ +┌─id─┬─name──┬─department─┬─salary─┐ +│ 25 │ Frank │ it │ 120 │ +│ 23 │ Henry │ it │ 104 │ +│ 24 │ Irene │ it │ 104 │ +│ 33 │ Alice │ sales │ 100 │ +│ 32 │ Dave │ sales │ 96 │ +└────┴───────┴────────────┴────────┘ +┌─id─┬─name──┬─department─┬─salary─┐ +│ 25 │ Frank │ it │ 120 │ +│ 23 │ Henry │ it │ 104 │ +│ 24 │ Irene │ it │ 104 │ +│ 33 │ Alice │ sales │ 100 │ +│ 31 │ Cindy │ sales │ 96 │ +│ 32 │ Dave │ sales │ 96 │ +└────┴───────┴────────────┴────────┘ +┌─id─┬─name──┬─department─┬─salary─┐ +│ 33 │ Alice │ sales │ 100 │ +│ 31 │ Cindy │ sales │ 96 │ +│ 32 │ Dave │ sales │ 96 │ +│ 22 │ Grace │ it │ 90 │ +│ 21 │ Emma │ it │ 84 │ +└────┴───────┴────────────┴────────┘ +┌─id─┬─name──┬─department─┬─salary─┐ +│ 33 │ Alice │ sales │ 100 │ +│ 31 │ Cindy │ sales │ 96 │ +│ 32 │ Dave │ sales │ 96 │ +│ 22 │ Grace │ it │ 90 │ +│ 21 │ Emma │ it │ 84 │ +└────┴───────┴────────────┴────────┘ diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.sql b/tests/queries/0_stateless/02790_sql_standard_fetch.sql new file mode 100644 index 00000000000..58ffa035d47 --- /dev/null +++ b/tests/queries/0_stateless/02790_sql_standard_fetch.sql @@ -0,0 +1,31 @@ +# https://antonz.org/sql-fetch/ + +CREATE TEMPORARY TABLE employees (id UInt64, name String, department String, salary UInt64); +INSERT INTO employees VALUES (23, 'Henry', 'it', 104), (24, 'Irene', 'it', 104), (25, 'Frank', 'it', 120), (31, 'Cindy', 'sales', 96), (33, 'Alice', 'sales', 100), (32, 'Dave', 'sales', 96), (22, 'Grace', 'it', 90), (21, 'Emma', 'it', '84'); + +select * from employees +order by salary desc +limit 5 +format PrettyCompactNoEscapes; + +select * from employees +order by salary desc +fetch first 5 rows only +format PrettyCompactNoEscapes; + +select * from employees +order by salary desc +fetch first 5 rows with ties +format PrettyCompactNoEscapes; + +select * from employees +order by salary desc +offset 3 rows +fetch next 5 rows only +format PrettyCompactNoEscapes; + +select * from employees +order by salary desc +offset 3 rows +fetch first 5 rows only +format PrettyCompactNoEscapes; From caa75a7fc38ab95a405488b3826022683160eed9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 20:59:28 +0200 Subject: [PATCH 1186/1997] Make the test stable --- .../0_stateless/02790_sql_standard_fetch.sql | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.sql b/tests/queries/0_stateless/02790_sql_standard_fetch.sql index 58ffa035d47..4204279a746 100644 --- a/tests/queries/0_stateless/02790_sql_standard_fetch.sql +++ b/tests/queries/0_stateless/02790_sql_standard_fetch.sql @@ -1,30 +1,33 @@ -# https://antonz.org/sql-fetch/ +-- https://antonz.org/sql-fetch/ CREATE TEMPORARY TABLE employees (id UInt64, name String, department String, salary UInt64); INSERT INTO employees VALUES (23, 'Henry', 'it', 104), (24, 'Irene', 'it', 104), (25, 'Frank', 'it', 120), (31, 'Cindy', 'sales', 96), (33, 'Alice', 'sales', 100), (32, 'Dave', 'sales', 96), (22, 'Grace', 'it', 90), (21, 'Emma', 'it', '84'); -select * from employees +-- Determinism +SET max_threads = 1, parallelize_output_from_storages = 0; + +select * from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc limit 5 format PrettyCompactNoEscapes; -select * from employees +select * from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc fetch first 5 rows only format PrettyCompactNoEscapes; -select * from employees +select * from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc fetch first 5 rows with ties format PrettyCompactNoEscapes; -select * from employees +select * from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc offset 3 rows fetch next 5 rows only format PrettyCompactNoEscapes; -select * from employees +select * from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc offset 3 rows fetch first 5 rows only From 99f02e0f6bfb4682e972b8258fea7be02c0f2691 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 4 Jul 2023 22:06:17 +0300 Subject: [PATCH 1187/1997] Add const to trying to fix build --- src/Functions/array/range.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/array/range.cpp b/src/Functions/array/range.cpp index 47e90de2e2b..b14f2baca15 100644 --- a/src/Functions/array/range.cpp +++ b/src/Functions/array/range.cpp @@ -414,7 +414,7 @@ private: if (arguments.size() == 1) { throwIfNullValue(arguments[0]); - auto * col = arguments[0].column.get(); + const auto * col = arguments[0].column.get(); if (arguments[0].type->isNullable()) { const auto * nullable = checkAndGetColumn(*arguments[0].column); From bb5b47cacf30c84f51e3c8a70040bf5707a5e742 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 4 Jul 2023 19:07:11 +0000 Subject: [PATCH 1188/1997] do not access Exception::thread_frame_pointers if not initialized --- src/Daemon/BaseDaemon.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 6d29523a354..a75aac7a08e 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -154,7 +154,10 @@ static void signalHandler(int sig, siginfo_t * info, void * context) writePODBinary(*info, out); writePODBinary(signal_context, out); writePODBinary(stack_trace, out); - writeVectorBinary(Exception::thread_frame_pointers, out); + if (Exception::enable_job_stack_trace) + writeVectorBinary(Exception::thread_frame_pointers, out); + else + writeVarUInt(0, out); writeBinary(static_cast(getThreadId()), out); writePODBinary(current_thread, out); From 8551a38e73df8fb4ee29ebbe0825e2c57468a70b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 21:19:30 +0200 Subject: [PATCH 1189/1997] Update some tests --- .../test_on_cluster_timeouts/configs/users_config.xml | 1 + tests/integration/test_quorum_inserts_parallel/test.py | 2 +- .../integration/test_read_temporary_tables_on_failure/test.py | 2 +- tests/integration/test_rename_column/test.py | 4 ++-- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_on_cluster_timeouts/configs/users_config.xml b/tests/integration/test_on_cluster_timeouts/configs/users_config.xml index 76f01279ce4..85bd9b7215a 100644 --- a/tests/integration/test_on_cluster_timeouts/configs/users_config.xml +++ b/tests/integration/test_on_cluster_timeouts/configs/users_config.xml @@ -2,6 +2,7 @@ 2 + 0 diff --git a/tests/integration/test_quorum_inserts_parallel/test.py b/tests/integration/test_quorum_inserts_parallel/test.py index 99548e37a54..dabd0e5ee43 100644 --- a/tests/integration/test_quorum_inserts_parallel/test.py +++ b/tests/integration/test_quorum_inserts_parallel/test.py @@ -27,7 +27,7 @@ def started_cluster(): def test_parallel_quorum_actually_parallel(started_cluster): - settings = {"insert_quorum": "3", "insert_quorum_parallel": "1"} + settings = {"insert_quorum": "3", "insert_quorum_parallel": "1", "function_sleep_max_microseconds_per_block": "0"} for i, node in enumerate([node1, node2, node3]): node.query( "CREATE TABLE r (a UInt64, b String) ENGINE=ReplicatedMergeTree('/test/r', '{num}') ORDER BY tuple()".format( diff --git a/tests/integration/test_read_temporary_tables_on_failure/test.py b/tests/integration/test_read_temporary_tables_on_failure/test.py index fd1d92eff92..b137ebc8c94 100644 --- a/tests/integration/test_read_temporary_tables_on_failure/test.py +++ b/tests/integration/test_read_temporary_tables_on_failure/test.py @@ -19,7 +19,7 @@ def start_cluster(): def test_different_versions(start_cluster): with pytest.raises(QueryTimeoutExceedException): - node.query("SELECT sleepEachRow(3) FROM numbers(10)", timeout=5) + node.query("SELECT sleepEachRow(3) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 0", timeout=5) with pytest.raises(QueryRuntimeException): node.query("SELECT 1", settings={"max_concurrent_queries_for_user": 1}) assert node.contains_in_log("Too many simultaneous queries for user") diff --git a/tests/integration/test_rename_column/test.py b/tests/integration/test_rename_column/test.py index 8dc57cf08ff..1c87b101b11 100644 --- a/tests/integration/test_rename_column/test.py +++ b/tests/integration/test_rename_column/test.py @@ -159,7 +159,7 @@ def insert( ) elif slow: query.append( - "INSERT INTO {table_name} ({col0}, {col1}) SELECT number + sleepEachRow(0.001) AS {col0}, number + 1 AS {col1} FROM numbers_mt({chunk})".format( + "INSERT INTO {table_name} ({col0}, {col1}) SELECT number + sleepEachRow(0.001) AS {col0}, number + 1 AS {col1} FROM numbers_mt({chunk}) SETTINGS function_sleep_max_microseconds_per_block = 0".format( table_name=table_name, chunk=chunk, col0=col_names[0], @@ -198,7 +198,7 @@ def select( try: if slow: r = node.query( - "SELECT count() FROM (SELECT num2, sleepEachRow(0.5) FROM {} WHERE {} % 1000 > 0)".format( + "SELECT count() FROM (SELECT num2, sleepEachRow(0.5) FROM {} WHERE {} % 1000 > 0) SETTINGS function_sleep_max_microseconds_per_block = 0".format( table_name, col_name ) ) From 966e93b9084541c311ddb482c7e767413ceb359f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 4 Jul 2023 17:02:00 +0000 Subject: [PATCH 1190/1997] Point to existing system tables for alternatives --- docs/en/sql-reference/statements/show.md | 32 ++++---- .../InterpreterShowIndexesQuery.cpp | 8 +- .../0_stateless/02724_show_indexes.reference | 76 +++++++++---------- 3 files changed, 58 insertions(+), 58 deletions(-) diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index d30ded6f3dc..b5bacef7b1f 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -286,21 +286,21 @@ equivalent. If no database is specified, the query assumes the current database The optional keyword `EXTENDED` currently has no effect, it only exists for MySQL compatibility. The statement produces a result table with the following structure: -- table - The name of the table (String) +- table - The name of the table. (String) - non_unique - Always `1` as ClickHouse does not support uniqueness constraints. (UInt8) -- key_name - The name of the index, `PRIMARY` if the index is a primary key index (String) -- seq_in_index - Currently unused -- column_name - Currently unused -- collation - The sorting of the column in the index, `A` if ascending, `D` if descending, `NULL` if unsorted (Nullable(String)) +- key_name - The name of the index, `PRIMARY` if the index is a primary key index. (String) +- seq_in_index - Currently always `1`. (In MySQL, this field denotes the position of the column in a non-functional index.) (UInt8) +- column_name - Currently always `` (empty string), also see field `expression`. (In MySQL, this field denotes the name of the column in a non-functional index.) (String) +- collation - The sorting of the column in the index: `A` if ascending, `D` if descending, `NULL` if unsorted. (Nullable(String)) - cardinality - An estimation of the index cardinality (number of unique values in the index). Currently always 0. (UInt64) -- sub_part - Always `NULL` because ClickHouse does not support index prefixes like MySQL (Nullable(String)) -- packed - Always `NULL` because ClickHouse does not support packed (prefix-compressed) indexes like MySQL (Nullable(String)) +- sub_part - Always `NULL` because ClickHouse does not support index prefixes like MySQL. (Nullable(String)) +- packed - Always `NULL` because ClickHouse does not support packed indexes (like MySQL). (Nullable(String)) - null - Currently unused - index_type - The index type, e.g. `PRIMARY`, `MINMAX`, `BLOOM_FILTER` etc. (String) -- comment - Additional information about the index, currently always `` (empty string) (String) -- index_comment - `` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field like in MySQL (String) -- visible - If the index is visible to the optimizer, always `YES` (String) -- expression - The index expression (String) +- comment - Additional information about the index, currently always `` (empty string). (String) +- index_comment - `` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field (like in MySQL). (String) +- visible - If the index is visible to the optimizer, always `YES`. (String) +- expression - The index expression. (In MySQL this field is only used for functional-indexes.) (String) **Examples** @@ -314,11 +314,11 @@ Result: ``` text ┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐ -│ tbl │ 1 │ blf_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ BLOOM_FILTER │ │ │ YES │ d, b │ -│ tbl │ 1 │ mm1_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ a, c, d │ -│ tbl │ 1 │ mm2_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ c, d, e │ -│ tbl │ 1 │ PRIMARY │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ c, a │ -│ tbl │ 1 │ set_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ SET │ │ │ YES │ e │ +│ tbl │ 1 │ blf_idx │ 1 │ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ BLOOM_FILTER │ │ │ YES │ d, b │ +│ tbl │ 1 │ mm1_idx │ 1 │ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ a, c, d │ +│ tbl │ 1 │ mm2_idx │ 1 │ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ c, d, e │ +│ tbl │ 1 │ PRIMARY │ 1 │ │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ c, a │ +│ tbl │ 1 │ set_idx │ 1 │ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ SET │ │ │ YES │ e │ └───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘ ``` diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp index 3c001329ae3..5aafc22389f 100644 --- a/src/Interpreters/InterpreterShowIndexesQuery.cpp +++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp @@ -42,8 +42,8 @@ FROM ( name AS table, 1 AS non_unique, 'PRIMARY' AS key_name, - NULL AS seq_in_index, - NULL AS column_name, + 1 AS seq_in_index, + '' AS column_name, 'A' AS collation, 0 AS cardinality, NULL AS sub_part, @@ -63,8 +63,8 @@ FROM ( table AS table, 1 AS non_unique, name AS key_name, - NULL AS seq_in_index, - NULL AS column_name, + 1 AS seq_in_index, + '' AS column_name, NULL AS collation, 0 AS cardinality, NULL AS sub_part, diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference index 69cd405ec86..c5b7883e17e 100644 --- a/tests/queries/0_stateless/02724_show_indexes.reference +++ b/tests/queries/0_stateless/02724_show_indexes.reference @@ -1,45 +1,45 @@ --- Aliases of SHOW INDEX -tbl 1 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 1 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d -tbl 1 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a -tbl 1 set_idx \N \N \N 0 \N \N \N SET YES e -tbl 1 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 1 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d -tbl 1 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a -tbl 1 set_idx \N \N \N 0 \N \N \N SET YES e -tbl 1 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 1 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d -tbl 1 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a -tbl 1 set_idx \N \N \N 0 \N \N \N SET YES e -tbl 1 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 1 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d -tbl 1 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a -tbl 1 set_idx \N \N \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c, a +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c, a +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c, a +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c, a +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- EXTENDED -tbl 1 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 1 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d -tbl 1 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a -tbl 1 set_idx \N \N \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c, a +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- WHERE --- Check with weird table names -$4@^7 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c -NULL 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c -\' 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c -\' 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c +$4@^7 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c +NULL 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c +\' 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c +\' 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c --- Original table -tbl 1 blf_idx \N \N \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 1 mm1_idx \N \N \N 0 \N \N \N MINMAX YES a, c, d -tbl 1 mm2_idx \N \N \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES c, a -tbl 1 set_idx \N \N \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c, a +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- Equally named table in other database -tbl 1 mmi_idx \N \N \N 0 \N \N \N MINMAX YES b -tbl 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES a +tbl 1 mmi_idx 1 \N 0 \N \N \N MINMAX YES b +tbl 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES a --- Short form -tbl 1 mmi_idx \N \N \N 0 \N \N \N MINMAX YES b -tbl 1 PRIMARY \N \N A 0 \N \N \N PRIMARY YES a +tbl 1 mmi_idx 1 \N 0 \N \N \N MINMAX YES b +tbl 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES a From 13cc329bfbc0f8e7506f082ba7a02218fc09f70d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 4 Jul 2023 19:32:43 +0000 Subject: [PATCH 1191/1997] Automatic style fix --- tests/integration/test_quorum_inserts_parallel/test.py | 6 +++++- .../test_read_temporary_tables_on_failure/test.py | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_quorum_inserts_parallel/test.py b/tests/integration/test_quorum_inserts_parallel/test.py index dabd0e5ee43..7f8784d822c 100644 --- a/tests/integration/test_quorum_inserts_parallel/test.py +++ b/tests/integration/test_quorum_inserts_parallel/test.py @@ -27,7 +27,11 @@ def started_cluster(): def test_parallel_quorum_actually_parallel(started_cluster): - settings = {"insert_quorum": "3", "insert_quorum_parallel": "1", "function_sleep_max_microseconds_per_block": "0"} + settings = { + "insert_quorum": "3", + "insert_quorum_parallel": "1", + "function_sleep_max_microseconds_per_block": "0", + } for i, node in enumerate([node1, node2, node3]): node.query( "CREATE TABLE r (a UInt64, b String) ENGINE=ReplicatedMergeTree('/test/r', '{num}') ORDER BY tuple()".format( diff --git a/tests/integration/test_read_temporary_tables_on_failure/test.py b/tests/integration/test_read_temporary_tables_on_failure/test.py index b137ebc8c94..77c8f3cf26b 100644 --- a/tests/integration/test_read_temporary_tables_on_failure/test.py +++ b/tests/integration/test_read_temporary_tables_on_failure/test.py @@ -19,7 +19,10 @@ def start_cluster(): def test_different_versions(start_cluster): with pytest.raises(QueryTimeoutExceedException): - node.query("SELECT sleepEachRow(3) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 0", timeout=5) + node.query( + "SELECT sleepEachRow(3) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 0", + timeout=5, + ) with pytest.raises(QueryRuntimeException): node.query("SELECT 1", settings={"max_concurrent_queries_for_user": 1}) assert node.contains_in_log("Too many simultaneous queries for user") From 3021180e0133c8904a29cfc1d4254a0504f9a5fb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 21:33:12 +0200 Subject: [PATCH 1192/1997] Update --- .github/workflows/master.yml | 23 +++++++++-------------- .github/workflows/pull_request.yml | 24 ++++++++++-------------- tests/ci/ci_config.py | 2 +- 3 files changed, 20 insertions(+), 29 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index c6270af0efa..6996221e1aa 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -851,8 +851,8 @@ jobs: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRISCV64: - needs: [DockerHubPush] - runs-on: [self-hosted, builder] + needs: [ DockerHubPush ] + runs-on: [ self-hosted, builder ] steps: - name: Set envs run: | @@ -864,38 +864,33 @@ jobs: BUILD_NAME=binary_riscv64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync --recursive - git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | - # shellcheck disable=SC2046 - docker kill $(docker ps -q) ||: - # shellcheck disable=SC2046 - docker rm -f $(docker ps -a -q) ||: + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 0ec4d997a4d..fe7c3bba410 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -912,8 +912,8 @@ jobs: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRISCV64: - needs: [DockerHubPush, FastTest, StyleCheck] - runs-on: [self-hosted, builder] + needs: [ DockerHubPush, FastTest, StyleCheck ] + runs-on: [ self-hosted, builder ] steps: - name: Set envs run: | @@ -925,36 +925,32 @@ jobs: BUILD_NAME=binary_riscv64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync --recursive - git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | - # shellcheck disable=SC2046 - docker kill $(docker ps -q) ||: - # shellcheck disable=SC2046 - docker rm -f $(docker ps -a -q) ||: + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 6f86c24184b..1777180a76e 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -174,7 +174,7 @@ CI_CONFIG = { "comment": "SSE2-only build", }, "binary_riscv64": { - "compiler": "clang-14-riscv64", + "compiler": "clang-16-riscv64", "build_type": "", "sanitizer": "", "package_type": "binary", From 6345879cdf4ba9c33f121a17a16e389761791de5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 22:56:58 +0300 Subject: [PATCH 1193/1997] Update src/Disks/VolumeJBOD.cpp Co-authored-by: Sergei Trifonov --- src/Disks/VolumeJBOD.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/VolumeJBOD.cpp b/src/Disks/VolumeJBOD.cpp index 885b1d56b0d..519f3378c4c 100644 --- a/src/Disks/VolumeJBOD.cpp +++ b/src/Disks/VolumeJBOD.cpp @@ -46,11 +46,11 @@ VolumeJBOD::VolumeJBOD( for (const auto & disk : disks) { auto size = disk->getTotalSpace(); - sizes.push_back(*size); if (size) sum_size += *size; else break; + sizes.push_back(*size); } if (sizes.size() == disks.size()) { From 5a3299572626c5ce5fcd53759b134de49287a4e3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 22:57:39 +0300 Subject: [PATCH 1194/1997] Update src/Disks/IVolume.cpp Co-authored-by: Sergei Trifonov --- src/Disks/IVolume.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/IVolume.cpp b/src/Disks/IVolume.cpp index 15b52acb422..43caf07d70a 100644 --- a/src/Disks/IVolume.cpp +++ b/src/Disks/IVolume.cpp @@ -51,7 +51,7 @@ IVolume::IVolume( std::optional IVolume::getMaxUnreservedFreeSpace() const { - std::optional res = 0; + std::optional res; for (const auto & disk : disks) res = std::max(res, disk->getUnreservedSpace()); return res; From c76cf53391426471d2a374b63c302e2a383258a5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 22:14:37 +0200 Subject: [PATCH 1195/1997] Address review comments --- src/Disks/IVolume.cpp | 9 ++++++++- src/Disks/StoragePolicy.cpp | 13 ++++++++----- src/Disks/loadLocalDiskConfig.cpp | 2 +- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/Disks/IVolume.cpp b/src/Disks/IVolume.cpp index 43caf07d70a..0b072e6ba8b 100644 --- a/src/Disks/IVolume.cpp +++ b/src/Disks/IVolume.cpp @@ -53,7 +53,14 @@ std::optional IVolume::getMaxUnreservedFreeSpace() const { std::optional res; for (const auto & disk : disks) - res = std::max(res, disk->getUnreservedSpace()); + { + auto disk_unreserved_space = disk->getUnreservedSpace(); + if (!disk_unreserved_space) + return std::nullopt; /// There is at least one unlimited disk. + + if (!res || *disk_unreserved_space > *res) + res = disk_unreserved_space; + } return res; } diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index 92cca23ca76..6b8d7186a15 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -209,14 +209,17 @@ DiskPtr StoragePolicy::tryGetDiskByName(const String & disk_name) const UInt64 StoragePolicy::getMaxUnreservedFreeSpace() const { - UInt64 res = 0; + std::optional res; for (const auto & volume : volumes) { - auto max_unreserved_for_volume = volume->getMaxUnreservedFreeSpace(); - if (max_unreserved_for_volume) - res = std::max(res, *max_unreserved_for_volume); + auto volume_unreserved_space = volume->getMaxUnreservedFreeSpace(); + if (!volume_unreserved_space) + return -1ULL; /// There is at least one unlimited disk. + + if (!res || *volume_unreserved_space > *res) + res = volume_unreserved_space; } - return res; + return res.value_or(-1ULL); } diff --git a/src/Disks/loadLocalDiskConfig.cpp b/src/Disks/loadLocalDiskConfig.cpp index 0e5eca17ca7..0c4a9e7af32 100644 --- a/src/Disks/loadLocalDiskConfig.cpp +++ b/src/Disks/loadLocalDiskConfig.cpp @@ -56,7 +56,7 @@ void loadDiskLocalConfig(const String & name, tmp_path = context->getPath(); // Create tmp disk for getting total disk space. - keep_free_space_bytes = static_cast(DiskLocal("tmp", tmp_path, 0).getTotalSpace() * ratio); + keep_free_space_bytes = static_cast(*DiskLocal("tmp", tmp_path, 0).getTotalSpace() * ratio); } } From 1ea526101242c4c0c64e6b0892b7fa2985f7f684 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 4 Jul 2023 22:19:53 +0200 Subject: [PATCH 1196/1997] Intermediate version --- src/Storages/MergeTree/DataPartsExchange.cpp | 2 + src/Storages/StorageReplicatedMergeTree.cpp | 105 +++++++++++++++++++ src/Storages/StorageReplicatedMergeTree.h | 17 ++- 3 files changed, 123 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 23bbc1c7f9d..7424a248491 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -203,6 +203,8 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write sendPartFromMemory(part, out, send_projections); else sendPartFromDisk(part, out, client_protocol_version, false, send_projections); + + data.addLastSentPart(part->name); } catch (const NetException &) { diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index dac9e6923a5..7f282b6c0e6 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3928,6 +3928,111 @@ String StorageReplicatedMergeTree::findReplicaHavingPart(const String & part_nam return {}; } +void StorageReplicatedMergeTree::addLastSentPart(const MergeTreePartInfo & info) +{ + { + std::lock_guard lock(last_sent_parts_mutex); + last_sent_parts.emplace_back(info); + while (last_sent_parts.size() > LAST_SENT_PARS_WINDOW_SIZE) + last_sent_parts.pop_front(); + } + + last_sent_parts_cv.notify_all(); +} + +void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(size_t wait_ms) +{ + if (wait_ms == 0) + { + LOG_INFO(log, "Will not wait for unique parts to be fetched by other replicas because wait time is zero"); + return; + } + + auto zookeeper = getZooKeeper(); + + auto unique_parts_set = findReplicaUniqueParts(replica_name, zookeeper_path, format_version, zookeeper); + if (unique_parts_set.empty()) + { + LOG_INFO(log, "Will not wait for unique parts to be fetched because we don't have any unique parts"); + return; + } + + auto wait_predicate = [&] () -> void + { + bool all_fetched = true; + for (const auto & part : unique_parts_set) + { + bool found = false; + for (const auto & sent_part : last_sent_parts) + { + if (sent_part.contains(part)) + { + found = true; + break; + } + } + if (!found) + { + all_fetched = false; + break; + } + } + return all_fetched; + }; + + std::unique_lock lock(last_sent_parts_mutex); + if (!last_sent_parts_cv.wait_for(last_sent_parts_cv, std::chrono::duration_cast(wait_ms), wait_predicate)) + LOG_WARNING(log, "Failed to wait for unqiue parts to be fetched in {} ms, {} parts can be left on this replica", wait_ms, unqiue_parts_set.size()); +} + +std::vector StorageReplicatedMergeTree::findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_) +{ + if (zookeeper_->exists(fs::path(zookeeper_path_) / "replicas" / replica_name_ / "is_active")) + return {}; + + Strings replicas = zookeeper_->getChildren(fs::path(zookeeper_path_) / "replicas"); + Strings our_parts; + std::vector data_parts_on_replicas; + for (const String & replica : replicas) + { + if (!zookeeper_->exists(fs::path(zookeeper_path_) / "replicas" / replica / "is_active")) + continue; + + Strings parts = zookeeper_->getChildren(fs::path(zookeeper_path_) / "replicas" / replica / "parts"); + if (replica == replica_name_) + { + our_parts = parts; + } + else + { + data_parts_on_replicas.emplace_back(format_version_); + for (const auto & part : parts) + { + if (!data_parts_on_replicas.back().getContainingPart(part).empty()) + data_parts_on_replicas.back().add(part); + } + } + } + + NameSet our_unique_parts; + for (const auto & part : our_parts) + { + bool found = false; + for (const auto & active_parts_set : data_parts_on_replicas) + { + if (!active_parts_set.getContainingPart(part).empty()) + { + found = true; + break; + } + } + if (!found) + our_unique_parts.insert(MergeTreePartInfo::fromPartName(part, format_version)); + } + + return our_unique_parts; +} + String StorageReplicatedMergeTree::findReplicaHavingCoveringPart(LogEntry & entry, bool active) { auto zookeeper = getZooKeeper(); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index bdd3f0da5bf..4661f0a56da 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -340,6 +340,15 @@ public: /// Get a sequential consistent view of current parts. ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock getMaxAddedBlocks() const; + void addLastSentPart(const MergeTreePartInfo & info); + std::deque getLastSentParts() const + { + std::lock_guard lock(last_sent_parts_mutex); + return last_sent_parts; + } + + void waitForUniquePartsToBeFetchedByOtherReplicas(size_t wait_ms); + private: std::atomic_bool are_restoring_replica {false}; @@ -444,9 +453,14 @@ private: Poco::Event partial_shutdown_event {false}; /// Poco::Event::EVENT_MANUALRESET std::atomic shutdown_called {false}; - std::atomic flush_called {false}; + + static constexpr size_t LAST_SENT_PARS_WINDOW_SIZE = 1000; + std::mutex last_sent_parts_mutex; + std::condition_variable last_sent_parts_cv; + std::deque last_sent_parts; /// Threads. + /// /// A task that keeps track of the updates in the logs of all replicas and loads them into the queue. bool queue_update_in_progress = false; @@ -697,6 +711,7 @@ private: */ String findReplicaHavingCoveringPart(LogEntry & entry, bool active); String findReplicaHavingCoveringPart(const String & part_name, bool active, String & found_part_name); + static std::vector findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_); /** Download the specified part from the specified replica. * If `to_detached`, the part is placed in the `detached` directory. From ead43836f7b9f1eb04e8cd4e9c293f39ddf1ec1a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 22:35:01 +0200 Subject: [PATCH 1197/1997] Fix the test --- .../02796_calculate_text_stack_trace.sql | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql b/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql index 601bd16fb39..52d55bdbe11 100644 --- a/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql +++ b/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql @@ -1,16 +1,20 @@ -- Tags: no-parallel -TRUNCATE TABLE system.text_log; - SELECT 'Hello', throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } SYSTEM FLUSH LOGS; -SELECT length(stack_trace) > 1000 FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT \'Hello\', throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; -SELECT message LIKE '%Stack trace%' FROM system.text_log WHERE level = 'Error' AND message LIKE '%Exception%throwIf%' ORDER BY event_time_microseconds DESC LIMIT 10; -TRUNCATE TABLE system.text_log; +SELECT length(stack_trace) > 1000 FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT \'Hello\', throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; + +SELECT message LIKE '%Stack trace%' FROM system.text_log WHERE level = 'Error' AND message LIKE '%Exception%throwIf%' + AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT \'Hello\', throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1) + ORDER BY event_time_microseconds DESC LIMIT 10; SET calculate_text_stack_trace = 0; SELECT 'World', throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } SYSTEM FLUSH LOGS; + SELECT length(stack_trace) FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT \'World\', throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; -SELECT message LIKE '%Stack trace%' FROM system.text_log WHERE level = 'Error' AND message LIKE '%Exception%throwIf%' ORDER BY event_time_microseconds DESC LIMIT 10; + +SELECT message LIKE '%Stack trace%' FROM system.text_log WHERE level = 'Error' AND message LIKE '%Exception%throwIf%' + AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT \'World\', throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1) + ORDER BY event_time_microseconds DESC LIMIT 10; From 607a8a1c465baf85818ec41b8229f7afda8d6fb8 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 4 Jul 2023 22:52:59 +0200 Subject: [PATCH 1198/1997] fix --- src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index d6f8dbac883..1cc3736bd2e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -472,8 +472,8 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & p /// Part is not in ZooKeeper and not on disk (so there's nothing to detach or remove from ZooKeeper). /// Probably we cannot execute some entry from the replication queue (so don't need to enqueue another one). /// Either all replicas having the part are not active, or the part is lost forever. - bool is_lost = searchForMissingPartOnOtherReplicas(part_name); - if (is_lost) + bool found_something = searchForMissingPartOnOtherReplicas(part_name); + if (!found_something) onPartIsLostForever(part_name); break; From da105d491661d4a7a564263d11499c74126f0453 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 4 Jul 2023 23:01:06 +0200 Subject: [PATCH 1199/1997] impl --- src/Functions/FunctionsHashing.h | 5 ++++- tests/queries/0_stateless/02790_keyed_hash_bug.reference | 1 + tests/queries/0_stateless/02790_keyed_hash_bug.sql | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02790_keyed_hash_bug.reference create mode 100644 tests/queries/0_stateless/02790_keyed_hash_bug.sql diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index a4d4fbd085d..f20cf4a5ff4 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -81,7 +81,7 @@ namespace impl static SipHashKey parseSipHashKey(const ColumnWithTypeAndName & key) { - SipHashKey ret; + SipHashKey ret{}; const auto * tuple = checkAndGetColumn(key.column.get()); if (!tuple) @@ -90,6 +90,9 @@ namespace impl if (tuple->tupleSize() != 2) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64"); + if (tuple->empty()) + return ret; + if (const auto * key0col = checkAndGetColumn(&(tuple->getColumn(0)))) ret.key0 = key0col->get64(0); else diff --git a/tests/queries/0_stateless/02790_keyed_hash_bug.reference b/tests/queries/0_stateless/02790_keyed_hash_bug.reference new file mode 100644 index 00000000000..a321a9052d0 --- /dev/null +++ b/tests/queries/0_stateless/02790_keyed_hash_bug.reference @@ -0,0 +1 @@ +16324913028386710556 diff --git a/tests/queries/0_stateless/02790_keyed_hash_bug.sql b/tests/queries/0_stateless/02790_keyed_hash_bug.sql new file mode 100644 index 00000000000..409e284d0d5 --- /dev/null +++ b/tests/queries/0_stateless/02790_keyed_hash_bug.sql @@ -0,0 +1,2 @@ +--- previously caused MemorySanitizer: use-of-uninitialized-value, because we tried to read hash key from empty tuple column during interpretation +SELECT sipHash64Keyed((1111111111111111111, toUInt64(222222222222223))) group by toUInt64(222222222222223); From f9a3856715a0de7ad8bb1136b8d22ded277bafd4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 4 Jul 2023 21:02:30 +0000 Subject: [PATCH 1200/1997] Poor man's tuple parsing --- .../InterpreterShowIndexesQuery.cpp | 23 +++++- .../0_stateless/02724_show_indexes.reference | 82 ++++++++++--------- 2 files changed, 63 insertions(+), 42 deletions(-) diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp index 5aafc22389f..2f65cc3ec3a 100644 --- a/src/Interpreters/InterpreterShowIndexesQuery.cpp +++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp @@ -42,8 +42,8 @@ FROM ( name AS table, 1 AS non_unique, 'PRIMARY' AS key_name, - 1 AS seq_in_index, - '' AS column_name, + arrayJoin(splitByString(', ', primary_key)) AS column_name, + row_number() over (order by column_name) AS seq_in_index, 'A' AS collation, 0 AS cardinality, NULL AS sub_part, @@ -53,7 +53,7 @@ FROM ( '' AS comment, '' AS index_comment, 'YES' AS visible, - primary_key AS expression + '' AS expression FROM system.tables WHERE database = '{0}' @@ -63,8 +63,8 @@ FROM ( table AS table, 1 AS non_unique, name AS key_name, - 1 AS seq_in_index, '' AS column_name, + 1 AS seq_in_index, NULL AS collation, 0 AS cardinality, NULL AS sub_part, @@ -86,6 +86,21 @@ ORDER BY index_type, expression;)", database, table, where_expression); /// sort the output of SHOW INDEXES otherwise (SELECT * FROM (SHOW INDEXES ...) ORDER BY ...) is rejected) and 3. some /// SQL tests can take advantage of this. + /// Note about compatibility of fields 'column_name', 'seq_in_index' and 'expression' with MySQL: + /// MySQL has non-functional and functional indexes. + /// - Non-functional indexes only reference columns, e.g. 'col1, col2'. In this case, `SHOW INDEX` produces as many result rows as there + /// are indexed columns. 'column_name' and 'seq_in_index' (an ascending integer 1, 2, ...) are filled, 'expression' is empty. + /// - Functional indexes can reference arbitrary expressions, e.g. 'col1 + 1, concat(col2, col3)'. 'SHOW INDEX' produces a single row + /// with `column_name` and `seq_in_index` empty and `expression` filled with the entire index expression. Only non-primary-key indexes + /// can be functional indexes. + /// Above SELECT tries to emulate that. Caveats: + /// 1. The primary key index sub-SELECT assumes the primary key expression is non-functional. Non-functional primary key indexes in + /// ClickHouse are possible but quiete obscure. In MySQL they are not possible at all. + /// 2. Related to 1.: Poor man's tuple parsing with splitByString() in the PK sub-SELECT messes up for functional primary key index + /// expressions where the comma is not only used as separator between tuple components, e.g. in 'col1 + 1, concat(col2, col3)'. + /// 3. The data skipping index sub-SELECT assumes the index expression is functional. 3rd party tools that expect MySQL semantics from + /// SHOW INDEX will probably not care as MySQL has no skipping indexes and they only use the result to figure out the primary key. + return rewritten_query; } diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference index c5b7883e17e..063105e3332 100644 --- a/tests/queries/0_stateless/02724_show_indexes.reference +++ b/tests/queries/0_stateless/02724_show_indexes.reference @@ -1,45 +1,51 @@ --- Aliases of SHOW INDEX -tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d -tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c, a -tbl 1 set_idx 1 \N 0 \N \N \N SET YES e -tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d -tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c, a -tbl 1 set_idx 1 \N 0 \N \N \N SET YES e -tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d -tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c, a -tbl 1 set_idx 1 \N 0 \N \N \N SET YES e -tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d -tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c, a -tbl 1 set_idx 1 \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY a 1 A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY c 2 A 0 \N \N \N PRIMARY YES +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY a 1 A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY c 2 A 0 \N \N \N PRIMARY YES +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY a 1 A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY c 2 A 0 \N \N \N PRIMARY YES +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY c 2 A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY a 1 A 0 \N \N \N PRIMARY YES +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- EXTENDED -tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d -tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c, a -tbl 1 set_idx 1 \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY a 1 A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY c 2 A 0 \N \N \N PRIMARY YES +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- WHERE --- Check with weird table names -$4@^7 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c -NULL 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c -\' 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c -\' 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c +$4@^7 1 PRIMARY c 1 A 0 \N \N \N PRIMARY YES +NULL 1 PRIMARY c 1 A 0 \N \N \N PRIMARY YES +\' 1 PRIMARY c 1 A 0 \N \N \N PRIMARY YES +\' 1 PRIMARY c 1 A 0 \N \N \N PRIMARY YES --- Original table -tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d -tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES c, a -tbl 1 set_idx 1 \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY c 2 A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY a 1 A 0 \N \N \N PRIMARY YES +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- Equally named table in other database -tbl 1 mmi_idx 1 \N 0 \N \N \N MINMAX YES b -tbl 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES a +tbl 1 mmi_idx 1 \N 0 \N \N \N MINMAX YES b +tbl 1 PRIMARY a 1 A 0 \N \N \N PRIMARY YES --- Short form -tbl 1 mmi_idx 1 \N 0 \N \N \N MINMAX YES b -tbl 1 PRIMARY 1 A 0 \N \N \N PRIMARY YES a +tbl 1 mmi_idx 1 \N 0 \N \N \N MINMAX YES b +tbl 1 PRIMARY a 1 A 0 \N \N \N PRIMARY YES From 7f1ee68c87160089d70f4cef04c975c38b01218e Mon Sep 17 00:00:00 2001 From: Han Fei Date: Tue, 4 Jul 2023 23:08:54 +0200 Subject: [PATCH 1201/1997] refine --- src/Functions/DateTimeTransforms.h | 8 +++---- src/Functions/IFunction.h | 4 ++++ ...OrDateTimeConverterWithPreimageVisitor.cpp | 21 +++++++++---------- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 84c71c89b11..e59a9046277 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -734,11 +734,11 @@ struct ToYearImpl const DateLUTImpl & date_lut = DateLUT::instance(); - auto start_time = date_lut.makeDateTime(year, 1, 1, 0, 0, 0); + auto start_time = date_lut.makeDayNum(year, 1, 1); auto end_time = date_lut.addYears(start_time, 1); if (isDateOrDate32(type) || isDateTime(type) || isDateTime64(type)) - return {std::make_pair(Field(start_time), Field(end_time))}; + return {std::make_pair(Field(Int32(start_time)), Field(Int32(end_time)))}; else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64", @@ -1412,11 +1412,11 @@ struct ToYYYYMMImpl const DateLUTImpl & date_lut = DateLUT::instance(); - auto start_time = date_lut.makeDateTime(year, month, 1, 0, 0, 0); + auto start_time = date_lut.makeDayNum(year, month, 1); auto end_time = date_lut.addMonths(start_time, 1); if (isDateOrDate32(type) || isDateTime(type) || isDateTime64(type)) - return {std::make_pair(Field(start_time), Field(end_time))}; + return {std::make_pair(Field(Int32(start_time)), Field(Int32(end_time)))}; else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64", diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 433cb61d04e..928475652f4 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -11,6 +11,10 @@ #include "config.h" +#if USE_EMBEDDED_COMPILER +# include +#endif + #include /// This file contains user interface for functions. diff --git a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp index a377bb4bba6..9c2fdf6dee9 100644 --- a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp +++ b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp @@ -4,6 +4,7 @@ #include #include #include +#include "base/DayNum.h" #include #include #include @@ -37,20 +38,18 @@ ASTPtr generateOptimizedDateFilterAST(const String & comparator, const NameAndTy const DateLUTImpl & date_lut = DateLUT::instance(); const String & column_name = column.name; - String start_date_or_date_time; - String end_date_or_date_time; - if (isDateOrDate32(column.type.get())) + auto start_date = range.first.get(); + auto end_date = range.second.get(); + String start_date_or_date_time = date_lut.dateToString(ExtendedDayNum(static_cast(start_date))); + String end_date_or_date_time = date_lut.dateToString(ExtendedDayNum(static_cast(end_date))); + + if (isDateTime(column.type.get()) || isDateTime64(column.type.get())) { - start_date_or_date_time = date_lut.dateToString(range.first.get()); - end_date_or_date_time = date_lut.dateToString(range.second.get()); + start_date_or_date_time += " 00:00:00"; + end_date_or_date_time += " 00:00:00"; } - else if (isDateTime(column.type.get()) || isDateTime64(column.type.get())) - { - start_date_or_date_time = date_lut.timeToString(range.first.get()); - end_date_or_date_time = date_lut.timeToString(range.second.get()); - } - else [[unlikely]] return {}; + else if (!isDateOrDate32(column.type.get())) return {}; if (comparator == "equals") { From 2460268e3c260254021902f57e0e21e40d8d9d29 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 23:22:08 +0200 Subject: [PATCH 1202/1997] Remove templates --- src/Functions/GregorianDate.cpp | 272 ++++++++++++++ src/Functions/GregorianDate.h | 481 +++++------------------- src/Functions/fromModifiedJulianDay.cpp | 5 +- src/Functions/toModifiedJulianDay.cpp | 8 +- 4 files changed, 376 insertions(+), 390 deletions(-) create mode 100644 src/Functions/GregorianDate.cpp diff --git a/src/Functions/GregorianDate.cpp b/src/Functions/GregorianDate.cpp new file mode 100644 index 00000000000..0f8a95ff3e7 --- /dev/null +++ b/src/Functions/GregorianDate.cpp @@ -0,0 +1,272 @@ +#include + +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; + extern const int CANNOT_PARSE_DATE; + extern const int CANNOT_FORMAT_DATETIME; + extern const int LOGICAL_ERROR; +} + +namespace gd +{ + static inline constexpr bool is_leap_year(int32_t year) + { + return (year % 4 == 0) && ((year % 400 == 0) || (year % 100 != 0)); + } + + static inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month) + { + switch (month) + { + case 1: return 31; + case 2: return is_leap_year ? 29 : 28; + case 3: return 31; + case 4: return 30; + case 5: return 31; + case 6: return 30; + case 7: return 31; + case 8: return 31; + case 9: return 30; + case 10: return 31; + case 11: return 30; + case 12: return 31; + default: + std::terminate(); + } + } + + /** Integer division truncated toward negative infinity. + */ + template + static inline constexpr I div(I x, J y) + { + const auto y_cast = static_cast(y); + if (x > 0 && y_cast < 0) + return ((x - 1) / y_cast) - 1; + else if (x < 0 && y_cast > 0) + return ((x + 1) / y_cast) - 1; + else + return x / y_cast; + } + + /** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x. + */ + template + static inline constexpr I mod(I x, J y) + { + const auto y_cast = static_cast(y); + const auto r = x % y_cast; + if ((x > 0 && y_cast < 0) || (x < 0 && y_cast > 0)) + return r == 0 ? static_cast(0) : r + y_cast; + else + return r; + } + + /** Like std::min(), but the type of operands may differ. + */ + template + static inline constexpr I min(I x, J y) + { + const auto y_cast = static_cast(y); + return x < y_cast ? x : y_cast; + } + + static inline char readDigit(ReadBuffer & in) + { + char c; + if (!in.read(c)) + throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot parse input: expected a digit at the end of stream"); + else if (c < '0' || c > '9') + throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot read input: expected a digit but got something else"); + else + return c - '0'; + } +} + +GregorianDate::GregorianDate(ReadBuffer & in) +{ + year_ = gd::readDigit(in) * 1000 + + gd::readDigit(in) * 100 + + gd::readDigit(in) * 10 + + gd::readDigit(in); + + assertChar('-', in); + + month_ = gd::readDigit(in) * 10 + + gd::readDigit(in); + + assertChar('-', in); + + day_of_month_ = gd::readDigit(in) * 10 + + gd::readDigit(in); + + assertEOF(in); + + if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > gd::monthLength(gd::is_leap_year(year_), month_)) + throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date: {}", toString()); +} + +GregorianDate::GregorianDate(int64_t modified_julian_day) +{ + const OrdinalDate ord(modified_julian_day); + const MonthDay md(gd::is_leap_year(ord.year()), ord.dayOfYear()); + + year_ = ord.year(); + month_ = md.month(); + day_of_month_ = md.dayOfMonth(); +} + +int64_t GregorianDate::toModifiedJulianDay() const +{ + const MonthDay md(month_, day_of_month_); + const auto day_of_year = md.dayOfYear(gd::is_leap_year(year_)); + const OrdinalDate ord(year_, day_of_year); + return ord.toModifiedJulianDay(); +} + +template +ReturnType GregorianDate::writeImpl(WriteBuffer & buf) const +{ + if (year_ < 0 || year_ > 9999) + { + if constexpr (std::is_same_v) + throw Exception(ErrorCodes::CANNOT_FORMAT_DATETIME, + "Impossible to stringify: year too big or small: {}", DB::toString(year_)); + else + return false; + } + else + { + auto y = year_; + writeChar('0' + y / 1000, buf); y %= 1000; + writeChar('0' + y / 100, buf); y %= 100; + writeChar('0' + y / 10, buf); y %= 10; + writeChar('0' + y , buf); + + writeChar('-', buf); + + auto m = month_; + writeChar('0' + m / 10, buf); m %= 10; + writeChar('0' + m , buf); + + writeChar('-', buf); + + auto d = day_of_month_; + writeChar('0' + d / 10, buf); d %= 10; + writeChar('0' + d , buf); + } + + return ReturnType(true); +} + +std::string GregorianDate::toString() const +{ + WriteBufferFromOwnString buf; + write(buf); + return buf.str(); +} + +OrdinalDate::OrdinalDate(int32_t year, uint16_t day_of_year) + : year_(year) + , day_of_year_(day_of_year) +{ + if (day_of_year < 1 || day_of_year > (gd::is_leap_year(year) ? 366 : 365)) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ordinal date: {}-{}", toString(year), toString(day_of_year)); + } +} + +OrdinalDate::OrdinalDate(int64_t modified_julian_day) +{ + /// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively). + + if (modified_julian_day < -678941) + throw Exception( + ErrorCodes::CANNOT_FORMAT_DATETIME, + "Value cannot be represented as date because it's out of range"); + + if (modified_julian_day > 2973119) + throw Exception( + ErrorCodes::CANNOT_FORMAT_DATETIME, + "Value cannot be represented as date because it's out of range"); + + const auto a = modified_julian_day + 678575; + const auto quad_cent = gd::div(a, 146097); + const auto b = gd::mod(a, 146097); + const auto cent = gd::min(gd::div(b, 36524), 3); + const auto c = b - cent * 36524; + const auto quad = gd::div(c, 1461); + const auto d = gd::mod(c, 1461); + const auto y = gd::min(gd::div(d, 365), 3); + + day_of_year_ = d - y * 365 + 1; + year_ = static_cast(quad_cent * 400 + cent * 100 + quad * 4 + y + 1); +} + +int64_t OrdinalDate::toModifiedJulianDay() const noexcept +{ + const auto y = year_ - 1; + return day_of_year_ + + 365 * y + + gd::div(y, 4) + - gd::div(y, 100) + + gd::div(y, 400) + - 678576; +} + +MonthDay::MonthDay(uint8_t month, uint8_t day_of_month) + : month_(month) + , day_of_month_(day_of_month) +{ + if (month < 1 || month > 12) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid month: {}", DB::toString(month)); + /* We can't validate day_of_month here, because we don't know if + * it's a leap year. */ +} + +MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year) +{ + if (day_of_year < 1 || day_of_year > (is_leap_year ? 366 : 365)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of year: {}{}", + (is_leap_year ? "leap, " : "non-leap, "), DB::toString(day_of_year)); + + month_ = 1; + uint16_t d = day_of_year; + while (true) + { + const auto len = gd::monthLength(is_leap_year, month_); + if (d <= len) + break; + month_++; + d -= len; + } + day_of_month_ = d; +} + +uint16_t MonthDay::dayOfYear(bool is_leap_year) const +{ + if (day_of_month_ < 1 || day_of_month_ > gd::monthLength(is_leap_year, month_)) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of month: {}{}-{}", + (is_leap_year ? "leap, " : "non-leap, "), DB::toString(month_), DB::toString(day_of_month_)); + } + const auto k = month_ <= 2 ? 0 : is_leap_year ? -1 :-2; + return (367 * month_ - 362) / 12 + k + day_of_month_; +} + +template void GregorianDate::writeImpl(WriteBuffer & buf) const; +template bool GregorianDate::writeImpl(WriteBuffer & buf) const; + +} diff --git a/src/Functions/GregorianDate.h b/src/Functions/GregorianDate.h index 16fcb5ea061..4a0cbec5afe 100644 --- a/src/Functions/GregorianDate.h +++ b/src/Functions/GregorianDate.h @@ -1,425 +1,138 @@ #pragma once -#include -#include #include -#include -#include -#include -#include - -#include namespace DB { - namespace ErrorCodes - { - extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; - extern const int CANNOT_PARSE_DATE; - extern const int CANNOT_FORMAT_DATETIME; - extern const int LOGICAL_ERROR; - } - /** Proleptic Gregorian calendar date. YearT is an integral type +class ReadBuffer; +class WriteBuffer; + +/// Proleptic Gregorian calendar date. +class GregorianDate +{ +public: + /** Construct from date in text form 'YYYY-MM-DD' by reading from + * ReadBuffer. + */ + explicit GregorianDate(ReadBuffer & in); + + /** Construct from Modified Julian Day. The type T is an + * integral type which should be at least 32 bits wide, and + * should preferably signed. + */ + explicit GregorianDate(int64_t modified_julian_day); + + /** Convert to Modified Julian Day. The type T is an integral type * which should be at least 32 bits wide, and should preferably - * be signed. - */ - template - class GregorianDate - { - public: - /** Construct from date in text form 'YYYY-MM-DD' by reading from - * ReadBuffer. - */ - explicit GregorianDate(ReadBuffer & in); - - /** Construct from Modified Julian Day. The type T is an - * integral type which should be at least 32 bits wide, and - * should preferably signed. - */ - explicit GregorianDate(is_integer auto modified_julian_day); - - /** Convert to Modified Julian Day. The type T is an integral type - * which should be at least 32 bits wide, and should preferably - * signed. - */ - template - T toModifiedJulianDay() const; - - /** Write the date in text form 'YYYY-MM-DD' to a buffer. - */ - void write(WriteBuffer & buf) const - { - writeImpl(buf); - } - - bool tryWrite(WriteBuffer & buf) const - { - return writeImpl(buf); - } - - /** Convert to a string in text form 'YYYY-MM-DD'. - */ - std::string toString() const; - - YearT year() const noexcept - { - return year_; - } - - uint8_t month() const noexcept - { - return month_; - } - - uint8_t dayOfMonth() const noexcept - { - return day_of_month_; - } - - private: - YearT year_ = 0; - uint8_t month_ = 0; - uint8_t day_of_month_ = 0; - - template - ReturnType writeImpl(WriteBuffer & buf) const; - }; - - /** ISO 8601 Ordinal Date. YearT is an integral type which should - * be at least 32 bits wide, and should preferably signed. - */ - template - class OrdinalDate - { - public: - OrdinalDate(YearT year, uint16_t day_of_year); - - /** Construct from Modified Julian Day. The type T is an - * integral type which should be at least 32 bits wide, and - * should preferably signed. - */ - template - explicit OrdinalDate(DayT modified_julian_day); - - /** Convert to Modified Julian Day. The type T is an integral - * type which should be at least 32 bits wide, and should - * preferably be signed. - */ - template - T toModifiedJulianDay() const noexcept; - - YearT year() const noexcept - { - return year_; - } - - uint16_t dayOfYear() const noexcept - { - return day_of_year_; - } - - private: - YearT year_ = 0; - uint16_t day_of_year_ = 0; - }; - - class MonthDay - { - public: - /** Construct from month and day. */ - MonthDay(uint8_t month, uint8_t day_of_month); - - /** Construct from day of year in Gregorian or Julian - * calendars to month and day. - */ - MonthDay(bool is_leap_year, uint16_t day_of_year); - - /** Convert month and day in Gregorian or Julian calendars to - * day of year. - */ - uint16_t dayOfYear(bool is_leap_year) const; - - uint8_t month() const noexcept - { - return month_; - } - - uint8_t dayOfMonth() const noexcept - { - return day_of_month_; - } - - private: - uint8_t month_ = 0; - uint8_t day_of_month_ = 0; - }; -} - - -namespace gd -{ - using namespace DB; - - template - static inline constexpr bool is_leap_year(YearT year) - { - return (year % 4 == 0) && ((year % 400 == 0) || (year % 100 != 0)); - } - - static inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month) - { - switch (month) - { - case 1: return 31; - case 2: return is_leap_year ? 29 : 28; - case 3: return 31; - case 4: return 30; - case 5: return 31; - case 6: return 30; - case 7: return 31; - case 8: return 31; - case 9: return 30; - case 10: return 31; - case 11: return 30; - case 12: return 31; - default: - std::terminate(); - } - } - - /** Integer division truncated toward negative infinity. + * signed. */ - template - static inline constexpr I div(I x, J y) - { - const auto y_cast = static_cast(y); - if (x > 0 && y_cast < 0) - return ((x - 1) / y_cast) - 1; - else if (x < 0 && y_cast > 0) - return ((x + 1) / y_cast) - 1; - else - return x / y_cast; - } + int64_t toModifiedJulianDay() const; - /** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x. + /** Write the date in text form 'YYYY-MM-DD' to a buffer. */ - template - static inline constexpr I mod(I x, J y) + void write(WriteBuffer & buf) const { - const auto y_cast = static_cast(y); - const auto r = x % y_cast; - if ((x > 0 && y_cast < 0) || (x < 0 && y_cast > 0)) - return r == 0 ? static_cast(0) : r + y_cast; - else - return r; + writeImpl(buf); } - /** Like std::min(), but the type of operands may differ. + bool tryWrite(WriteBuffer & buf) const + { + return writeImpl(buf); + } + + /** Convert to a string in text form 'YYYY-MM-DD'. */ - template - static inline constexpr I min(I x, J y) + std::string toString() const; + + int32_t year() const noexcept { - const auto y_cast = static_cast(y); - return x < y_cast ? x : y_cast; + return year_; } - static inline char readDigit(ReadBuffer & in) + uint8_t month() const noexcept { - char c; - if (!in.read(c)) - throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot parse input: expected a digit at the end of stream"); - else if (c < '0' || c > '9') - throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot read input: expected a digit but got something else"); - else - return c - '0'; - } -} - -namespace DB -{ - template - GregorianDate::GregorianDate(ReadBuffer & in) - { - year_ = gd::readDigit(in) * 1000 - + gd::readDigit(in) * 100 - + gd::readDigit(in) * 10 - + gd::readDigit(in); - - assertChar('-', in); - - month_ = gd::readDigit(in) * 10 - + gd::readDigit(in); - - assertChar('-', in); - - day_of_month_ = gd::readDigit(in) * 10 - + gd::readDigit(in); - - assertEOF(in); - - if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > gd::monthLength(gd::is_leap_year(year_), month_)) - throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date: {}", toString()); + return month_; } - template - GregorianDate::GregorianDate(is_integer auto modified_julian_day) + uint8_t dayOfMonth() const noexcept { - const OrdinalDate ord(modified_julian_day); - const MonthDay md(gd::is_leap_year(ord.year()), ord.dayOfYear()); - - year_ = ord.year(); - month_ = md.month(); - day_of_month_ = md.dayOfMonth(); + return day_of_month_; } - template - template - T GregorianDate::toModifiedJulianDay() const - { - const MonthDay md(month_, day_of_month_); - const auto day_of_year = md.dayOfYear(gd::is_leap_year(year_)); - const OrdinalDate ord(year_, day_of_year); - return ord.template toModifiedJulianDay(); - } +private: + int32_t year_ = 0; + uint8_t month_ = 0; + uint8_t day_of_month_ = 0; - template template - ReturnType GregorianDate::writeImpl(WriteBuffer & buf) const + ReturnType writeImpl(WriteBuffer & buf) const; +}; + +/** ISO 8601 Ordinal Date. + */ +class OrdinalDate +{ +public: + OrdinalDate(int32_t year, uint16_t day_of_year); + + /** Construct from Modified Julian Day. The type T is an + * integral type which should be at least 32 bits wide, and + * should preferably signed. + */ + explicit OrdinalDate(int64_t modified_julian_day); + + /** Convert to Modified Julian Day. The type T is an integral + * type which should be at least 32 bits wide, and should + * preferably be signed. + */ + int64_t toModifiedJulianDay() const noexcept; + + int32_t year() const noexcept { - if (year_ < 0 || year_ > 9999) - { - if constexpr (std::is_same_v) - throw Exception(ErrorCodes::CANNOT_FORMAT_DATETIME, - "Impossible to stringify: year too big or small: {}", DB::toString(year_)); - else - return false; - } - else - { - auto y = year_; - writeChar('0' + y / 1000, buf); y %= 1000; - writeChar('0' + y / 100, buf); y %= 100; - writeChar('0' + y / 10, buf); y %= 10; - writeChar('0' + y , buf); - - writeChar('-', buf); - - auto m = month_; - writeChar('0' + m / 10, buf); m %= 10; - writeChar('0' + m , buf); - - writeChar('-', buf); - - auto d = day_of_month_; - writeChar('0' + d / 10, buf); d %= 10; - writeChar('0' + d , buf); - } - - return ReturnType(true); + return year_; } - template - std::string GregorianDate::toString() const + uint16_t dayOfYear() const noexcept { - WriteBufferFromOwnString buf; - write(buf); - return buf.str(); + return day_of_year_; } - template - OrdinalDate::OrdinalDate(YearT year, uint16_t day_of_year) - : year_(year) - , day_of_year_(day_of_year) +private: + int32_t year_ = 0; + uint16_t day_of_year_ = 0; +}; + +class MonthDay +{ +public: + /** Construct from month and day. */ + MonthDay(uint8_t month, uint8_t day_of_month); + + /** Construct from day of year in Gregorian or Julian + * calendars to month and day. + */ + MonthDay(bool is_leap_year, uint16_t day_of_year); + + /** Convert month and day in Gregorian or Julian calendars to + * day of year. + */ + uint16_t dayOfYear(bool is_leap_year) const; + + uint8_t month() const noexcept { - if (day_of_year < 1 || day_of_year > (gd::is_leap_year(year) ? 366 : 365)) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ordinal date: {}-{}", toString(year), toString(day_of_year)); - } + return month_; } - template - template - OrdinalDate::OrdinalDate(DayT modified_julian_day) + uint8_t dayOfMonth() const noexcept { - /// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively). - - if constexpr (is_signed_v && std::numeric_limits::lowest() < -678941) - if (modified_julian_day < -678941) - throw Exception( - ErrorCodes::CANNOT_FORMAT_DATETIME, - "Value cannot be represented as date because it's out of range"); - - if constexpr (std::numeric_limits::max() > 2973119) - if (modified_julian_day > 2973119) - throw Exception( - ErrorCodes::CANNOT_FORMAT_DATETIME, - "Value cannot be represented as date because it's out of range"); - - const auto a = modified_julian_day + 678575; - const auto quad_cent = gd::div(a, 146097); - const auto b = gd::mod(a, 146097); - const auto cent = gd::min(gd::div(b, 36524), 3); - const auto c = b - cent * 36524; - const auto quad = gd::div(c, 1461); - const auto d = gd::mod(c, 1461); - const auto y = gd::min(gd::div(d, 365), 3); - - day_of_year_ = d - y * 365 + 1; - year_ = static_cast(quad_cent * 400 + cent * 100 + quad * 4 + y + 1); + return day_of_month_; } - template - template - T OrdinalDate::toModifiedJulianDay() const noexcept - { - const auto y = year_ - 1; - return day_of_year_ - + 365 * y - + gd::div(y, 4) - - gd::div(y, 100) - + gd::div(y, 400) - - 678576; - } +private: + uint8_t month_ = 0; + uint8_t day_of_month_ = 0; +}; - inline MonthDay::MonthDay(uint8_t month, uint8_t day_of_month) - : month_(month) - , day_of_month_(day_of_month) - { - if (month < 1 || month > 12) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid month: {}", DB::toString(month)); - /* We can't validate day_of_month here, because we don't know if - * it's a leap year. */ - } - - inline MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year) - { - if (day_of_year < 1 || day_of_year > (is_leap_year ? 366 : 365)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of year: {}{}", - (is_leap_year ? "leap, " : "non-leap, "), DB::toString(day_of_year)); - - month_ = 1; - uint16_t d = day_of_year; - while (true) - { - const auto len = gd::monthLength(is_leap_year, month_); - if (d <= len) - break; - month_++; - d -= len; - } - day_of_month_ = d; - } - - inline uint16_t MonthDay::dayOfYear(bool is_leap_year) const - { - if (day_of_month_ < 1 || day_of_month_ > gd::monthLength(is_leap_year, month_)) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of month: {}{}-{}", - (is_leap_year ? "leap, " : "non-leap, "), DB::toString(month_), DB::toString(day_of_month_)); - } - const auto k = month_ <= 2 ? 0 : is_leap_year ? -1 :-2; - return (367 * month_ - 362) / 12 + k + day_of_month_; - } } diff --git a/src/Functions/fromModifiedJulianDay.cpp b/src/Functions/fromModifiedJulianDay.cpp index a7c2c04bf01..bad0696e503 100644 --- a/src/Functions/fromModifiedJulianDay.cpp +++ b/src/Functions/fromModifiedJulianDay.cpp @@ -13,6 +13,7 @@ #include #include + namespace DB { @@ -56,14 +57,14 @@ namespace DB { if constexpr (nullOnErrors) { - const GregorianDate<> gd(vec_from[i]); + const GregorianDate gd(vec_from[i]); (*vec_null_map_to)[i] = gd.tryWrite(write_buffer); writeChar(0, write_buffer); offsets_to[i] = write_buffer.count(); } else { - const GregorianDate<> gd(vec_from[i]); + const GregorianDate gd(vec_from[i]); gd.write(write_buffer); writeChar(0, write_buffer); offsets_to[i] = write_buffer.count(); diff --git a/src/Functions/toModifiedJulianDay.cpp b/src/Functions/toModifiedJulianDay.cpp index 0d854bcc110..f800b279385 100644 --- a/src/Functions/toModifiedJulianDay.cpp +++ b/src/Functions/toModifiedJulianDay.cpp @@ -80,8 +80,8 @@ namespace DB { try { - const GregorianDate<> date(read_buffer); - vec_to[i] = date.toModifiedJulianDay(); + const GregorianDate date(read_buffer); + vec_to[i] = static_cast(date.toModifiedJulianDay()); vec_null_map_to[i] = false; } catch (const Exception & e) @@ -97,8 +97,8 @@ namespace DB } else { - const GregorianDate<> date(read_buffer); - vec_to[i] = date.toModifiedJulianDay(); + const GregorianDate date(read_buffer); + vec_to[i] = static_cast(date.toModifiedJulianDay()); } } From 2a6b5e4ec6134e5c6451301ddcfa5d6acd949567 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 23:28:45 +0200 Subject: [PATCH 1203/1997] Fixed bad code --- src/Functions/GregorianDate.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Functions/GregorianDate.cpp b/src/Functions/GregorianDate.cpp index 0f8a95ff3e7..38ed3e2ddf8 100644 --- a/src/Functions/GregorianDate.cpp +++ b/src/Functions/GregorianDate.cpp @@ -115,7 +115,7 @@ GregorianDate::GregorianDate(ReadBuffer & in) assertEOF(in); if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > gd::monthLength(gd::is_leap_year(year_), month_)) - throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date: {}", toString()); + throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date"); } GregorianDate::GregorianDate(int64_t modified_julian_day) @@ -143,7 +143,7 @@ ReturnType GregorianDate::writeImpl(WriteBuffer & buf) const { if constexpr (std::is_same_v) throw Exception(ErrorCodes::CANNOT_FORMAT_DATETIME, - "Impossible to stringify: year too big or small: {}", DB::toString(year_)); + "Impossible to stringify: year too big or small: {}", year_); else return false; } @@ -231,7 +231,7 @@ MonthDay::MonthDay(uint8_t month, uint8_t day_of_month) , day_of_month_(day_of_month) { if (month < 1 || month > 12) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid month: {}", DB::toString(month)); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid month: {}", month); /* We can't validate day_of_month here, because we don't know if * it's a leap year. */ } @@ -240,7 +240,7 @@ MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year) { if (day_of_year < 1 || day_of_year > (is_leap_year ? 366 : 365)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of year: {}{}", - (is_leap_year ? "leap, " : "non-leap, "), DB::toString(day_of_year)); + (is_leap_year ? "leap, " : "non-leap, "), day_of_year); month_ = 1; uint16_t d = day_of_year; @@ -249,7 +249,7 @@ MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year) const auto len = gd::monthLength(is_leap_year, month_); if (d <= len) break; - month_++; + ++month_; d -= len; } day_of_month_ = d; @@ -260,7 +260,7 @@ uint16_t MonthDay::dayOfYear(bool is_leap_year) const if (day_of_month_ < 1 || day_of_month_ > gd::monthLength(is_leap_year, month_)) { throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of month: {}{}-{}", - (is_leap_year ? "leap, " : "non-leap, "), DB::toString(month_), DB::toString(day_of_month_)); + (is_leap_year ? "leap, " : "non-leap, "), month_, day_of_month_); } const auto k = month_ <= 2 ? 0 : is_leap_year ? -1 :-2; return (367 * month_ - 362) / 12 + k + day_of_month_; From 04d745a5e012df5f5d438136636b479fe6c173e0 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Tue, 4 Jul 2023 22:39:51 +0200 Subject: [PATCH 1204/1997] Repro test --- ...parallel_replicas_prewhere_count.reference | 4 ++++ ...02811_parallel_replicas_prewhere_count.sql | 24 +++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.reference create mode 100644 tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.sql diff --git a/tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.reference b/tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.reference new file mode 100644 index 00000000000..fe8f022b908 --- /dev/null +++ b/tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.reference @@ -0,0 +1,4 @@ +-- count() ------------------------------ +2 +-- count() with parallel replicas ------- +2 diff --git a/tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.sql b/tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.sql new file mode 100644 index 00000000000..0cf53158646 --- /dev/null +++ b/tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.sql @@ -0,0 +1,24 @@ +CREATE TABLE users (uid Int16, name String, age Int16) ENGINE=MergeTree() ORDER BY uid; + +INSERT INTO users VALUES (111, 'JFK', 33); +INSERT INTO users VALUES (6666, 'KLM', 48); +INSERT INTO users VALUES (88888, 'AMS', 50); + + +SELECT '-- count() ------------------------------'; +SELECT count() FROM users PREWHERE uid > 2000; + + +-- enable parallel replicas but with high granules threshold +SET +skip_unavailable_shards=1, +allow_experimental_parallel_reading_from_replicas=1, +max_parallel_replicas=3, +use_hedged_requests=0, +cluster_for_parallel_replicas='parallel_replicas', +parallel_replicas_for_non_replicated_merge_tree=1, +parallel_replicas_min_number_of_granules_to_enable=1000; + + +SELECT '-- count() with parallel replicas -------'; +SELECT count() FROM users PREWHERE uid > 2000; From c0629a38d1679c5fc0ccbe7eb92a3b36a3830144 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Tue, 4 Jul 2023 23:09:25 +0200 Subject: [PATCH 1205/1997] Fix for incorrect result when parallel replicas not completely disabled --- src/Storages/MergeTree/MergeTreeData.cpp | 3 +++ src/Storages/SelectQueryInfo.h | 2 ++ src/Storages/StorageMergeTree.cpp | 9 +++++++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 6bbf80944a7..fa9bfd38a23 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7196,7 +7196,10 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage( if (query_context->canUseParallelReplicasOnInitiator() && to_stage >= QueryProcessingStage::WithMergeableState) { if (!canUseParallelReplicasBasedOnPKAnalysis(query_context, storage_snapshot, query_info)) + { + query_info.parallel_replicas_disabled = true; return QueryProcessingStage::Enum::FetchColumns; + } /// ReplicatedMergeTree if (supportsReplication()) diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 8fbc64b7a24..13d6909fd52 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -255,6 +255,8 @@ struct SelectQueryInfo Block minmax_count_projection_block; MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr; + bool parallel_replicas_disabled = false; + bool is_parameterized_view = false; NameToNameMap parameterized_view_values; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 4c0c0c8e3fa..ffd03df1c7b 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -209,7 +209,9 @@ void StorageMergeTree::read( size_t max_block_size, size_t num_streams) { - if (local_context->canUseParallelReplicasOnInitiator() && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree) + if (!query_info.parallel_replicas_disabled && + local_context->canUseParallelReplicasOnInitiator() && + local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree) { auto table_id = getStorageID(); @@ -240,7 +242,10 @@ void StorageMergeTree::read( } else { - const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower() && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree; + const bool enable_parallel_reading = + !query_info.parallel_replicas_disabled && + local_context->canUseParallelReplicasOnFollower() && + local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree; if (auto plan = reader.read( column_names, storage_snapshot, query_info, From 24b9c430f83b938329d228abd62ed44845fa63fc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 5 Jul 2023 00:39:10 +0200 Subject: [PATCH 1206/1997] Continuation --- src/Functions/GregorianDate.cpp | 198 ++++++++++++++++++------ src/Functions/GregorianDate.h | 17 ++ src/Functions/fromModifiedJulianDay.cpp | 6 +- src/Functions/toModifiedJulianDay.cpp | 23 +-- 4 files changed, 178 insertions(+), 66 deletions(-) diff --git a/src/Functions/GregorianDate.cpp b/src/Functions/GregorianDate.cpp index 38ed3e2ddf8..da1172c8916 100644 --- a/src/Functions/GregorianDate.cpp +++ b/src/Functions/GregorianDate.cpp @@ -1,7 +1,6 @@ #include #include -#include #include #include #include @@ -19,7 +18,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -namespace gd +namespace { static inline constexpr bool is_leap_year(int32_t year) { @@ -93,49 +92,129 @@ namespace gd else return c - '0'; } + + static inline bool tryReadDigit(ReadBuffer & in, char & c) + { + if (in.read(c) && c >= '0' && c <= '9') + { + c -= '0'; + return true; + } + + return false; + } +} + +void GregorianDate::init(ReadBuffer & in) +{ + year_ = readDigit(in) * 1000 + + readDigit(in) * 100 + + readDigit(in) * 10 + + readDigit(in); + + assertChar('-', in); + + month_ = readDigit(in) * 10 + + readDigit(in); + + assertChar('-', in); + + day_of_month_ = readDigit(in) * 10 + + readDigit(in); + + assertEOF(in); + + if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year(year_), month_)) + throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date"); +} + +bool GregorianDate::tryInit(ReadBuffer & in) +{ + char c[8]; + + if ( !tryReadDigit(in, c[0]) + || !tryReadDigit(in, c[1]) + || !tryReadDigit(in, c[2]) + || !tryReadDigit(in, c[3]) + || !checkChar('-', in) + || !tryReadDigit(in, c[4]) + || !tryReadDigit(in, c[5]) + || !checkChar('-', in) + || !tryReadDigit(in, c[6]) + || !tryReadDigit(in, c[7]) + || !in.eof()) + { + return false; + } + + year_ = c[0] * 1000 + c[1] * 100 + c[2] * 10 + c[3]; + month_ = c[4] * 10 + c[5]; + day_of_month_ = c[6] * 10 + c[7]; + + if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year(year_), month_)) + return false; + + return true; } GregorianDate::GregorianDate(ReadBuffer & in) { - year_ = gd::readDigit(in) * 1000 - + gd::readDigit(in) * 100 - + gd::readDigit(in) * 10 - + gd::readDigit(in); + init(in); +} - assertChar('-', in); +void GregorianDate::init(int64_t modified_julian_day) +{ + const OrdinalDate ord(modified_julian_day); + const MonthDay md(is_leap_year(ord.year()), ord.dayOfYear()); - month_ = gd::readDigit(in) * 10 - + gd::readDigit(in); + year_ = ord.year(); + month_ = md.month(); + day_of_month_ = md.dayOfMonth(); +} - assertChar('-', in); +bool GregorianDate::tryInit(int64_t modified_julian_day) +{ + OrdinalDate ord; + if (!ord.tryInit(modified_julian_day)) + return false; - day_of_month_ = gd::readDigit(in) * 10 - + gd::readDigit(in); + MonthDay md(is_leap_year(ord.year()), ord.dayOfYear()); - assertEOF(in); + year_ = ord.year(); + month_ = md.month(); + day_of_month_ = md.dayOfMonth(); - if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > gd::monthLength(gd::is_leap_year(year_), month_)) - throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date"); + return true; } GregorianDate::GregorianDate(int64_t modified_julian_day) { - const OrdinalDate ord(modified_julian_day); - const MonthDay md(gd::is_leap_year(ord.year()), ord.dayOfYear()); - - year_ = ord.year(); - month_ = md.month(); - day_of_month_ = md.dayOfMonth(); + init(modified_julian_day); } int64_t GregorianDate::toModifiedJulianDay() const { const MonthDay md(month_, day_of_month_); - const auto day_of_year = md.dayOfYear(gd::is_leap_year(year_)); + + const auto day_of_year = md.dayOfYear(is_leap_year(year_)); + const OrdinalDate ord(year_, day_of_year); return ord.toModifiedJulianDay(); } +bool GregorianDate::tryToModifiedJulianDay(int64_t & res) const +{ + const MonthDay md(month_, day_of_month_); + const auto day_of_year = md.dayOfYear(is_leap_year(year_)); + OrdinalDate ord; + + if (!ord.tryInit(year_, day_of_year)) + return false; + + res = ord.toModifiedJulianDay(); + return true; +} + template ReturnType GregorianDate::writeImpl(WriteBuffer & buf) const { @@ -178,51 +257,76 @@ std::string GregorianDate::toString() const return buf.str(); } -OrdinalDate::OrdinalDate(int32_t year, uint16_t day_of_year) - : year_(year) - , day_of_year_(day_of_year) +void OrdinalDate::init(int32_t year, uint16_t day_of_year) { - if (day_of_year < 1 || day_of_year > (gd::is_leap_year(year) ? 366 : 365)) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ordinal date: {}-{}", toString(year), toString(day_of_year)); - } + year_ = year; + day_of_year_ = day_of_year; + + if (day_of_year < 1 || day_of_year > (is_leap_year(year) ? 366 : 365)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ordinal date: {}-{}", year, day_of_year); } -OrdinalDate::OrdinalDate(int64_t modified_julian_day) +bool OrdinalDate::tryInit(int32_t year, uint16_t day_of_year) +{ + year_ = year; + day_of_year_ = day_of_year; + + return !(day_of_year < 1 || day_of_year > (is_leap_year(year) ? 366 : 365)); +} + +void OrdinalDate::init(int64_t modified_julian_day) +{ + if (!tryInit(modified_julian_day)) + throw Exception( + ErrorCodes::CANNOT_FORMAT_DATETIME, + "Value cannot be represented as date because it's out of range"); +} + +bool OrdinalDate::tryInit(int64_t modified_julian_day) { /// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively). if (modified_julian_day < -678941) - throw Exception( - ErrorCodes::CANNOT_FORMAT_DATETIME, - "Value cannot be represented as date because it's out of range"); + return false; if (modified_julian_day > 2973119) - throw Exception( - ErrorCodes::CANNOT_FORMAT_DATETIME, - "Value cannot be represented as date because it's out of range"); + return false; const auto a = modified_julian_day + 678575; - const auto quad_cent = gd::div(a, 146097); - const auto b = gd::mod(a, 146097); - const auto cent = gd::min(gd::div(b, 36524), 3); + const auto quad_cent = div(a, 146097); + const auto b = mod(a, 146097); + const auto cent = min(div(b, 36524), 3); const auto c = b - cent * 36524; - const auto quad = gd::div(c, 1461); - const auto d = gd::mod(c, 1461); - const auto y = gd::min(gd::div(d, 365), 3); + const auto quad = div(c, 1461); + const auto d = mod(c, 1461); + const auto y = min(div(d, 365), 3); day_of_year_ = d - y * 365 + 1; year_ = static_cast(quad_cent * 400 + cent * 100 + quad * 4 + y + 1); + + return true; +} + + +OrdinalDate::OrdinalDate(int32_t year, uint16_t day_of_year) +{ + init(year, day_of_year); +} + +OrdinalDate::OrdinalDate(int64_t modified_julian_day) +{ + init(modified_julian_day); } int64_t OrdinalDate::toModifiedJulianDay() const noexcept { const auto y = year_ - 1; + return day_of_year_ + 365 * y - + gd::div(y, 4) - - gd::div(y, 100) - + gd::div(y, 400) + + div(y, 4) + - div(y, 100) + + div(y, 400) - 678576; } @@ -246,7 +350,7 @@ MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year) uint16_t d = day_of_year; while (true) { - const auto len = gd::monthLength(is_leap_year, month_); + const auto len = monthLength(is_leap_year, month_); if (d <= len) break; ++month_; @@ -257,7 +361,7 @@ MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year) uint16_t MonthDay::dayOfYear(bool is_leap_year) const { - if (day_of_month_ < 1 || day_of_month_ > gd::monthLength(is_leap_year, month_)) + if (day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year, month_)) { throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of month: {}{}-{}", (is_leap_year ? "leap, " : "non-leap, "), month_, day_of_month_); diff --git a/src/Functions/GregorianDate.h b/src/Functions/GregorianDate.h index 4a0cbec5afe..2528223443e 100644 --- a/src/Functions/GregorianDate.h +++ b/src/Functions/GregorianDate.h @@ -13,11 +13,19 @@ class WriteBuffer; class GregorianDate { public: + GregorianDate() {} + + void init(ReadBuffer & in); + bool tryInit(ReadBuffer & in); + /** Construct from date in text form 'YYYY-MM-DD' by reading from * ReadBuffer. */ explicit GregorianDate(ReadBuffer & in); + void init(int64_t modified_julian_day); + bool tryInit(int64_t modified_julian_day); + /** Construct from Modified Julian Day. The type T is an * integral type which should be at least 32 bits wide, and * should preferably signed. @@ -29,6 +37,7 @@ public: * signed. */ int64_t toModifiedJulianDay() const; + bool tryToModifiedJulianDay(int64_t & res) const; /** Write the date in text form 'YYYY-MM-DD' to a buffer. */ @@ -75,6 +84,14 @@ private: class OrdinalDate { public: + OrdinalDate() {} + + void init(int32_t year, uint16_t day_of_year); + bool tryInit(int32_t year, uint16_t day_of_year); + + void init(int64_t modified_julian_day); + bool tryInit(int64_t modified_julian_day); + OrdinalDate(int32_t year, uint16_t day_of_year); /** Construct from Modified Julian Day. The type T is an diff --git a/src/Functions/fromModifiedJulianDay.cpp b/src/Functions/fromModifiedJulianDay.cpp index bad0696e503..8736b1fce7f 100644 --- a/src/Functions/fromModifiedJulianDay.cpp +++ b/src/Functions/fromModifiedJulianDay.cpp @@ -57,14 +57,14 @@ namespace DB { if constexpr (nullOnErrors) { - const GregorianDate gd(vec_from[i]); - (*vec_null_map_to)[i] = gd.tryWrite(write_buffer); + GregorianDate gd; + (*vec_null_map_to)[i] = !(gd.tryInit(vec_from[i]) && gd.tryWrite(write_buffer)); writeChar(0, write_buffer); offsets_to[i] = write_buffer.count(); } else { - const GregorianDate gd(vec_from[i]); + GregorianDate gd(vec_from[i]); gd.write(write_buffer); writeChar(0, write_buffer); offsets_to[i] = write_buffer.count(); diff --git a/src/Functions/toModifiedJulianDay.cpp b/src/Functions/toModifiedJulianDay.cpp index f800b279385..5b4cd34141c 100644 --- a/src/Functions/toModifiedJulianDay.cpp +++ b/src/Functions/toModifiedJulianDay.cpp @@ -78,22 +78,13 @@ namespace DB if constexpr (nullOnErrors) { - try - { - const GregorianDate date(read_buffer); - vec_to[i] = static_cast(date.toModifiedJulianDay()); - vec_null_map_to[i] = false; - } - catch (const Exception & e) - { - if (e.code() == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED || e.code() == ErrorCodes::CANNOT_PARSE_DATE) - { - vec_to[i] = static_cast(0); - vec_null_map_to[i] = true; - } - else - throw; - } + GregorianDate date; + + int64_t res = 0; + bool success = date.tryInit(read_buffer) && date.tryToModifiedJulianDay(res); + + vec_to[i] = static_cast(res); + vec_null_map_to[i] = !success; } else { From 39199fd1168816c0e46da0011e21ad20573517e8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 5 Jul 2023 00:49:10 +0200 Subject: [PATCH 1207/1997] Update test --- .../0_stateless/02789_object_type_invalid_num_of_rows.reference | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference index 8b137891791..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference @@ -1 +0,0 @@ - From 94f0bd6b84dbaa3961ac689ecc1354a9385ca339 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 5 Jul 2023 02:12:38 +0300 Subject: [PATCH 1208/1997] Update 00474_readonly_settings.sh --- tests/queries/0_stateless/00474_readonly_settings.sh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/00474_readonly_settings.sh b/tests/queries/0_stateless/00474_readonly_settings.sh index 9432579f9e6..3a857d81a74 100755 --- a/tests/queries/0_stateless/00474_readonly_settings.sh +++ b/tests/queries/0_stateless/00474_readonly_settings.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -10,12 +11,8 @@ $CLICKHOUSE_CLIENT --query="select toUInt64(pow(2, 62)) as value format JSON" -- $CLICKHOUSE_CLIENT --readonly=1 --multiquery --query="set output_format_json_quote_64bit_integers=1 ; select toUInt64(pow(2, 63)) as value format JSON" --server_logs_file=/dev/null 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode' && echo "OK" || echo "FAIL" $CLICKHOUSE_CLIENT --readonly=1 --multiquery --query="set output_format_json_quote_64bit_integers=0 ; select toUInt64(pow(2, 63)) as value format JSON" --server_logs_file=/dev/null 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode' && echo "OK" || echo "FAIL" - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=1" | grep value ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=0" | grep value -#${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&session_timeout=3600" -d 'SET readonly = 1' - -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=1" 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode.' && echo "OK" || echo "FAIL" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=1" 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode' && echo "OK" || echo "FAIL" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=0" 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode' && echo "OK" || echo "FAIL" - From ede63a0f4e8239c56999a72bfe3af3f59e63dfb2 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 5 Jul 2023 00:30:28 +0000 Subject: [PATCH 1209/1997] fix drop column with enabled sparse columns --- src/Storages/MergeTree/MutateTask.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index f4a071b8f27..f23ef82fca8 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -67,7 +67,9 @@ static void splitAndModifyMutationCommands( if (!isWidePart(part) || !isFullPartStorage(part->getDataPartStorage())) { - NameSet mutated_columns, dropped_columns; + NameSet mutated_columns; + NameSet dropped_columns; + for (const auto & command : commands) { if (command.type == MutationCommand::Type::MATERIALIZE_INDEX @@ -258,6 +260,10 @@ getColumnsForNewDataPart( storage_columns.emplace_back(column); } + NameSet storage_columns_set; + for (const auto & [name, _] : storage_columns) + storage_columns_set.insert(name); + for (const auto & command : all_commands) { if (command.type == MutationCommand::UPDATE) @@ -292,13 +298,15 @@ getColumnsForNewDataPart( SerializationInfoByName new_serialization_infos; for (const auto & [name, old_info] : serialization_infos) { - if (removed_columns.contains(name)) - continue; - auto it = renamed_columns_from_to.find(name); auto new_name = it == renamed_columns_from_to.end() ? name : it->second; - if (!updated_header.has(new_name)) + if (!storage_columns_set.contains(new_name)) + continue; + + /// In compact part we read all columns and all of them are in @updated_header. + /// But in wide part we must keep serialization infos for columns that are not touched by mutation. + if (!updated_header.has(new_name) && isWidePart(source_part)) { new_serialization_infos.emplace(new_name, old_info); continue; From 759b8b9a7685f566a88e86f5db5ebccb0db34869 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 5 Jul 2023 07:17:52 +0000 Subject: [PATCH 1210/1997] Fix more tests --- .../00941_system_columns_race_condition.sh | 46 +++++++++++-------- .../0_stateless/02470_mutation_sync_race.sh | 8 ++-- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/tests/queries/0_stateless/00941_system_columns_race_condition.sh b/tests/queries/0_stateless/00941_system_columns_race_condition.sh index 69dfb30cd2c..4f2cd6ee91b 100755 --- a/tests/queries/0_stateless/00941_system_columns_race_condition.sh +++ b/tests/queries/0_stateless/00941_system_columns_race_condition.sh @@ -14,35 +14,43 @@ $CLICKHOUSE_CLIENT -q "CREATE TABLE alter_table (a UInt8, b Int16, c Float32, d function thread1() { - # NOTE: database = $CLICKHOUSE_DATABASE is unwanted - while true; do $CLICKHOUSE_CLIENT --query "SELECT name FROM system.columns UNION ALL SELECT name FROM system.columns FORMAT Null"; done + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + # NOTE: database = $CLICKHOUSE_DATABASE is unwanted + $CLICKHOUSE_CLIENT --query "SELECT name FROM system.columns UNION ALL SELECT name FROM system.columns FORMAT Null"; + done } function thread2() { - while true; do $CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table ADD COLUMN h String; ALTER TABLE alter_table MODIFY COLUMN h UInt64; ALTER TABLE alter_table DROP COLUMN h;"; done + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + $CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table ADD COLUMN h String; ALTER TABLE alter_table MODIFY COLUMN h UInt64; ALTER TABLE alter_table DROP COLUMN h;"; + done } # https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout export -f thread1; export -f thread2; -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & +TIMEOUT=15 + +thread1 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & wait diff --git a/tests/queries/0_stateless/02470_mutation_sync_race.sh b/tests/queries/0_stateless/02470_mutation_sync_race.sh index 6c259e46cb1..37e99663ab5 100755 --- a/tests/queries/0_stateless/02470_mutation_sync_race.sh +++ b/tests/queries/0_stateless/02470_mutation_sync_race.sh @@ -12,7 +12,11 @@ $CLICKHOUSE_CLIENT -q "insert into src values (0)" function thread() { + local TIMELIMIT=$((SECONDS+$1)) for i in $(seq 1000); do + if [ $SECONDS -ge "$TIMELIMIT" ]; then + return + fi $CLICKHOUSE_CLIENT -q "alter table src detach partition tuple()" $CLICKHOUSE_CLIENT -q "alter table src attach partition tuple()" $CLICKHOUSE_CLIENT -q "alter table src update A = ${i} where 1 settings mutations_sync=2" @@ -20,8 +24,6 @@ function thread() done } -export -f thread; - TIMEOUT=30 -timeout $TIMEOUT bash -c thread || true +thread $TIMEOUT || true \ No newline at end of file From bcc569b9e322d0456e25dbd99e0a951a5831ecce Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 3 Jul 2023 18:14:02 +0200 Subject: [PATCH 1211/1997] Add temporary pin for docker-ce --- docker/test/integration/runner/Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 38d8ed5f223..d6c127c8421 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -47,11 +47,13 @@ ENV TZ=Etc/UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV DOCKER_CHANNEL stable +# Unpin the docker version after the release 24.0.3 is released +# https://github.com/moby/moby/issues/45770#issuecomment-1618255130 RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \ && add-apt-repository "deb https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}" \ && apt-get update \ && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ - docker-ce \ + docker-ce='5:23.*' \ && rm -rf \ /var/lib/apt/lists/* \ /var/cache/debconf \ From b3edfbaab63af0b2168ce5d68ce63264e2093de4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 4 Jul 2023 21:10:25 +0000 Subject: [PATCH 1212/1997] Update docs --- docs/en/sql-reference/statements/show.md | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index b5bacef7b1f..1a1e4dbd2c7 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -289,8 +289,8 @@ The statement produces a result table with the following structure: - table - The name of the table. (String) - non_unique - Always `1` as ClickHouse does not support uniqueness constraints. (UInt8) - key_name - The name of the index, `PRIMARY` if the index is a primary key index. (String) -- seq_in_index - Currently always `1`. (In MySQL, this field denotes the position of the column in a non-functional index.) (UInt8) -- column_name - Currently always `` (empty string), also see field `expression`. (In MySQL, this field denotes the name of the column in a non-functional index.) (String) +- column_name - For a primary key index, the name of the column. For a data skipping index: '' (empty string), see field "expression". (String) +- seq_in_index - For a primary key index, the position of the column starting from `1`. For a data skipping index: always `1`. (UInt8) - collation - The sorting of the column in the index: `A` if ascending, `D` if descending, `NULL` if unsorted. (Nullable(String)) - cardinality - An estimation of the index cardinality (number of unique values in the index). Currently always 0. (UInt64) - sub_part - Always `NULL` because ClickHouse does not support index prefixes like MySQL. (Nullable(String)) @@ -300,7 +300,7 @@ The statement produces a result table with the following structure: - comment - Additional information about the index, currently always `` (empty string). (String) - index_comment - `` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field (like in MySQL). (String) - visible - If the index is visible to the optimizer, always `YES`. (String) -- expression - The index expression. (In MySQL this field is only used for functional-indexes.) (String) +- expression - For a data skipping index, the index expression. For a primary key index: '' (empty string). (String) **Examples** @@ -313,13 +313,14 @@ SHOW INDEX FROM 'tbl' Result: ``` text -┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐ -│ tbl │ 1 │ blf_idx │ 1 │ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ BLOOM_FILTER │ │ │ YES │ d, b │ -│ tbl │ 1 │ mm1_idx │ 1 │ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ a, c, d │ -│ tbl │ 1 │ mm2_idx │ 1 │ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ c, d, e │ -│ tbl │ 1 │ PRIMARY │ 1 │ │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ c, a │ -│ tbl │ 1 │ set_idx │ 1 │ │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ SET │ │ │ YES │ e │ -└───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘ +┌─table─┬─non_unique─┬─key_name─┬─column_name─┬─seq_in_index─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐ +│ tbl │ 1 │ blf_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ BLOOM_FILTER │ │ │ YES │ d, b │ +│ tbl │ 1 │ mm1_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ a, c, d │ +│ tbl │ 1 │ mm2_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ c, d, e │ +│ tbl │ 1 │ PRIMARY │ c │ 1 │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ │ +│ tbl │ 1 │ PRIMARY │ a │ 2 │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ │ +│ tbl │ 1 │ set_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ SET │ │ │ YES │ e │ +└───────┴────────────┴──────────┴─────────────┴──────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘ ``` **See also** From e3796e30546a8a56ba06d76ae57317b5fc1abd7c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 5 Jul 2023 09:01:09 +0000 Subject: [PATCH 1213/1997] Update ORDER BY for more stable test results --- src/Interpreters/InterpreterShowIndexesQuery.cpp | 2 +- tests/queries/0_stateless/02724_show_indexes.reference | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp index 2f65cc3ec3a..35f32a79310 100644 --- a/src/Interpreters/InterpreterShowIndexesQuery.cpp +++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp @@ -80,7 +80,7 @@ FROM ( database = '{0}' AND table = '{1}')) {2} -ORDER BY index_type, expression;)", database, table, where_expression); +ORDER BY index_type, expression, column_name, seq_in_index;)", database, table, where_expression); /// Sorting is strictly speaking not necessary but 1. it is convenient for users, 2. SQL currently does not allow to /// sort the output of SHOW INDEXES otherwise (SELECT * FROM (SHOW INDEXES ...) ORDER BY ...) is rejected) and 3. some diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference index 063105e3332..cee0598d625 100644 --- a/tests/queries/0_stateless/02724_show_indexes.reference +++ b/tests/queries/0_stateless/02724_show_indexes.reference @@ -20,8 +20,8 @@ tbl 1 set_idx 1 \N 0 \N \N \N SET YES e tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY c 2 A 0 \N \N \N PRIMARY YES tbl 1 PRIMARY a 1 A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY c 2 A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- EXTENDED tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b @@ -40,8 +40,8 @@ NULL 1 PRIMARY c 1 A 0 \N \N \N PRIMARY YES tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY c 2 A 0 \N \N \N PRIMARY YES tbl 1 PRIMARY a 1 A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY c 2 A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- Equally named table in other database tbl 1 mmi_idx 1 \N 0 \N \N \N MINMAX YES b From 9544c035b9d8b4646defd770b829715043b145d7 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Wed, 5 Jul 2023 11:15:31 +0200 Subject: [PATCH 1214/1997] Revert "refine" This reverts commit 7f1ee68c87160089d70f4cef04c975c38b01218e. --- src/Functions/DateTimeTransforms.h | 8 +++---- src/Functions/IFunction.h | 4 ---- ...OrDateTimeConverterWithPreimageVisitor.cpp | 21 ++++++++++--------- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index e59a9046277..84c71c89b11 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -734,11 +734,11 @@ struct ToYearImpl const DateLUTImpl & date_lut = DateLUT::instance(); - auto start_time = date_lut.makeDayNum(year, 1, 1); + auto start_time = date_lut.makeDateTime(year, 1, 1, 0, 0, 0); auto end_time = date_lut.addYears(start_time, 1); if (isDateOrDate32(type) || isDateTime(type) || isDateTime64(type)) - return {std::make_pair(Field(Int32(start_time)), Field(Int32(end_time)))}; + return {std::make_pair(Field(start_time), Field(end_time))}; else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64", @@ -1412,11 +1412,11 @@ struct ToYYYYMMImpl const DateLUTImpl & date_lut = DateLUT::instance(); - auto start_time = date_lut.makeDayNum(year, month, 1); + auto start_time = date_lut.makeDateTime(year, month, 1, 0, 0, 0); auto end_time = date_lut.addMonths(start_time, 1); if (isDateOrDate32(type) || isDateTime(type) || isDateTime64(type)) - return {std::make_pair(Field(Int32(start_time)), Field(Int32(end_time)))}; + return {std::make_pair(Field(start_time), Field(end_time))}; else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64", diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 928475652f4..433cb61d04e 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -11,10 +11,6 @@ #include "config.h" -#if USE_EMBEDDED_COMPILER -# include -#endif - #include /// This file contains user interface for functions. diff --git a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp index 9c2fdf6dee9..a377bb4bba6 100644 --- a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp +++ b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp @@ -4,7 +4,6 @@ #include #include #include -#include "base/DayNum.h" #include #include #include @@ -38,18 +37,20 @@ ASTPtr generateOptimizedDateFilterAST(const String & comparator, const NameAndTy const DateLUTImpl & date_lut = DateLUT::instance(); const String & column_name = column.name; + String start_date_or_date_time; + String end_date_or_date_time; - auto start_date = range.first.get(); - auto end_date = range.second.get(); - String start_date_or_date_time = date_lut.dateToString(ExtendedDayNum(static_cast(start_date))); - String end_date_or_date_time = date_lut.dateToString(ExtendedDayNum(static_cast(end_date))); - - if (isDateTime(column.type.get()) || isDateTime64(column.type.get())) + if (isDateOrDate32(column.type.get())) { - start_date_or_date_time += " 00:00:00"; - end_date_or_date_time += " 00:00:00"; + start_date_or_date_time = date_lut.dateToString(range.first.get()); + end_date_or_date_time = date_lut.dateToString(range.second.get()); } - else if (!isDateOrDate32(column.type.get())) return {}; + else if (isDateTime(column.type.get()) || isDateTime64(column.type.get())) + { + start_date_or_date_time = date_lut.timeToString(range.first.get()); + end_date_or_date_time = date_lut.timeToString(range.second.get()); + } + else [[unlikely]] return {}; if (comparator == "equals") { From d8a66a81233441676fdd8f0c786060c2b1aacd56 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 5 Jul 2023 17:49:01 +0800 Subject: [PATCH 1215/1997] fix asan error --- src/Functions/substringIndex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp index d1791c9696b..5f3f054b624 100644 --- a/src/Functions/substringIndex.cpp +++ b/src/Functions/substringIndex.cpp @@ -208,7 +208,7 @@ namespace { size_t res_offset = res_data.size(); res_data.resize(res_offset + res_ref.size + 1); - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], res_ref.data, res_ref.size); + memcpy(&res_data[res_offset], res_ref.data, res_ref.size); res_offset += res_ref.size; res_data[res_offset] = 0; ++res_offset; From 2e5643cc4133f207b46534a4cf8a7875d7c18a8e Mon Sep 17 00:00:00 2001 From: Han Fei Date: Wed, 5 Jul 2023 11:57:18 +0200 Subject: [PATCH 1216/1997] use UTC LUT --- src/Functions/DateTimeTransforms.h | 4 ++-- src/Functions/IFunction.h | 4 ++++ .../OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 84c71c89b11..510a88db2b6 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -732,7 +732,7 @@ struct ToYearImpl auto year = point.get(); if (year < DATE_LUT_MIN_YEAR || year >= DATE_LUT_MAX_YEAR) return std::nullopt; - const DateLUTImpl & date_lut = DateLUT::instance(); + const DateLUTImpl & date_lut = DateLUT::instance("UTC"); auto start_time = date_lut.makeDateTime(year, 1, 1, 0, 0, 0); auto end_time = date_lut.addYears(start_time, 1); @@ -1410,7 +1410,7 @@ struct ToYYYYMMImpl if (year < DATE_LUT_MIN_YEAR || year > DATE_LUT_MAX_YEAR || month < 1 || month > 12 || (year == DATE_LUT_MAX_YEAR && month == 12)) return std::nullopt; - const DateLUTImpl & date_lut = DateLUT::instance(); + const DateLUTImpl & date_lut = DateLUT::instance("UTC"); auto start_time = date_lut.makeDateTime(year, month, 1, 0, 0, 0); auto end_time = date_lut.addMonths(start_time, 1); diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 433cb61d04e..09758d59e4a 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -13,6 +13,10 @@ #include +#if USE_EMBEDDED_COMPILER +# include +#endif + /// This file contains user interface for functions. namespace llvm diff --git a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp index a377bb4bba6..6a9251cec49 100644 --- a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp +++ b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp @@ -34,7 +34,7 @@ namespace ErrorCodes */ ASTPtr generateOptimizedDateFilterAST(const String & comparator, const NameAndTypePair & column, const std::pair& range) { - const DateLUTImpl & date_lut = DateLUT::instance(); + const DateLUTImpl & date_lut = DateLUT::instance("UTC"); const String & column_name = column.name; String start_date_or_date_time; From e5fa845dd30ca7434be17844b5795ea2f7599cfb Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 5 Jul 2023 12:11:08 +0200 Subject: [PATCH 1217/1997] Fix --- tests/config/config.d/storage_conf.xml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 402ffc77f2b..1c7f7f8d5d3 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -1,7 +1,6 @@ - s3 s3_disk/ @@ -10,7 +9,6 @@ clickhouse 20000 - cache s3_disk @@ -88,13 +86,6 @@ - - -
- s3_cache_small -
-
-
From 47cffa6f1ed6832e38d30a95f2c63e26506b0a10 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 5 Jul 2023 09:40:53 +0000 Subject: [PATCH 1218/1997] Properly check the first part disk --- tests/integration/test_multiple_disks/test.py | 86 +++++++++---------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index b5606ee8bc2..c0fbe39196d 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -49,6 +49,18 @@ def start_cluster(): cluster.shutdown() +def get_oldest_part(node, table_name): + return node.query( + f"SELECT name FROM system.parts WHERE table = '{table_name}' and active = 1 ORDER BY modification_time LIMIT 1" + ).strip() + + +def get_disk_for_part(node, table_name, part): + return node.query( + f"SELECT disk_name FROM system.parts WHERE table == '{table_name}' and active = 1 and name = '{part}' ORDER BY modification_time" + ).strip() + + def test_system_tables(start_cluster): expected_disks_data = [ { @@ -694,15 +706,13 @@ def test_jbod_overflow(start_cluster, name, engine): def test_background_move(start_cluster, name, engine): try: node1.query_with_retry( - """ + f""" CREATE TABLE IF NOT EXISTS {name} ( s1 String ) ENGINE = {engine} ORDER BY tuple() SETTINGS storage_policy='moving_jbod_with_external' - """.format( - name=name, engine=engine - ) + """ ) node1.query(f"SYSTEM STOP MERGES {name}") @@ -718,25 +728,27 @@ def test_background_move(start_cluster, name, engine): ) ) + first_part = get_oldest_part(node1, name) + used_disks = get_used_disks_for_table(node1, name) retry = 20 i = 0 - while not sum(1 for x in used_disks if x == "jbod1") <= 2 and i < retry: + # multiple moves can be assigned in parallel so we can move later parts before the oldest + # we need to wait explicitly until the oldest part is moved + while get_disk_for_part(node1, name, first_part) != "external" and i < retry: time.sleep(0.5) - used_disks = get_used_disks_for_table(node1, name) i += 1 + used_disks = get_used_disks_for_table(node1, name) assert sum(1 for x in used_disks if x == "jbod1") <= 2 # first (oldest) part was moved to external - assert used_disks[0] == "external" + assert get_disk_for_part(node1, name, first_part) == "external" node1.query("SYSTEM FLUSH LOGS") path = node1.query( - "SELECT path_on_disk FROM system.part_log WHERE table = '{}' AND event_type='MovePart' AND part_name = 'all_1_1_0'".format( - name - ) + f"SELECT path_on_disk FROM system.part_log WHERE table = '{name}' AND event_type='MovePart' AND part_name = '{first_part}'" ) # first (oldest) part was moved to external @@ -762,36 +774,28 @@ def test_background_move(start_cluster, name, engine): def test_start_stop_moves(start_cluster, name, engine): try: node1.query_with_retry( - """ + f""" CREATE TABLE IF NOT EXISTS {name} ( s1 String ) ENGINE = {engine} ORDER BY tuple() SETTINGS storage_policy='moving_jbod_with_external' - """.format( - name=name, engine=engine - ) + """ ) - node1.query_with_retry("INSERT INTO {} VALUES ('HELLO')".format(name)) - node1.query_with_retry("INSERT INTO {} VALUES ('WORLD')".format(name)) + node1.query_with_retry(f"INSERT INTO {name} VALUES ('HELLO')") + node1.query_with_retry(f"INSERT INTO {name} VALUES ('WORLD')") used_disks = get_used_disks_for_table(node1, name) assert all(d == "jbod1" for d in used_disks), "All writes shoud go to jbods" - first_part = node1.query( - "SELECT name FROM system.parts WHERE table = '{}' and active = 1 ORDER BY modification_time LIMIT 1".format( - name - ) - ).strip() + first_part = get_oldest_part(node1, name) node1.query("SYSTEM STOP MOVES") with pytest.raises(QueryRuntimeException): node1.query( - "ALTER TABLE {} MOVE PART '{}' TO VOLUME 'external'".format( - name, first_part - ) + f"ALTER TABLE {name} MOVE PART '{first_part}' TO VOLUME 'external'" ) used_disks = get_used_disks_for_table(node1, name) @@ -801,24 +805,18 @@ def test_start_stop_moves(start_cluster, name, engine): node1.query("SYSTEM START MOVES") - node1.query( - "ALTER TABLE {} MOVE PART '{}' TO VOLUME 'external'".format( - name, first_part - ) - ) + node1.query(f"ALTER TABLE {name} MOVE PART '{first_part}' TO VOLUME 'external'") disk = node1.query( - "SELECT disk_name FROM system.parts WHERE table = '{}' and name = '{}' and active = 1".format( - name, first_part - ) + f"SELECT disk_name FROM system.parts WHERE table = '{name}' and name = '{first_part}' and active = 1" ).strip() assert disk == "external" - node1.query_with_retry("TRUNCATE TABLE {}".format(name)) + node1.query_with_retry(f"TRUNCATE TABLE {name}") - node1.query("SYSTEM STOP MOVES {}".format(name)) - node1.query("SYSTEM STOP MERGES {}".format(name)) + node1.query(f"SYSTEM STOP MOVES {name}") + node1.query(f"SYSTEM STOP MERGES {name}") for i in range(5): data = [] # 5MB in total @@ -831,6 +829,8 @@ def test_start_stop_moves(start_cluster, name, engine): ) ) + first_part = get_oldest_part(node1, name) + used_disks = get_used_disks_for_table(node1, name) retry = 5 @@ -843,23 +843,23 @@ def test_start_stop_moves(start_cluster, name, engine): # first (oldest) part doesn't move anywhere assert used_disks[0] == "jbod1" - node1.query("SYSTEM START MOVES {}".format(name)) + node1.query(f"SYSTEM START MOVES {name}") - # wait sometime until background backoff finishes + # multiple moves can be assigned in parallel so we can move later parts before the oldest + # we need to wait explicitly until the oldest part is moved retry = 60 i = 0 - while not sum(1 for x in used_disks if x == "jbod1") <= 2 and i < retry: + while get_disk_for_part(node1, name, first_part) != "external" and i < retry: time.sleep(1) - used_disks = get_used_disks_for_table(node1, name) i += 1 - node1.query("SYSTEM START MERGES {}".format(name)) + # first (oldest) part moved to external + assert get_disk_for_part(node1, name, first_part) == "external" + used_disks = get_used_disks_for_table(node1, name) assert sum(1 for x in used_disks if x == "jbod1") <= 2 - # first (oldest) part moved to external - assert used_disks[0] == "external" - + node1.query(f"SYSTEM START MERGES {name}") finally: node1.query_with_retry(f"DROP TABLE IF EXISTS {name} SYNC") From e6422f814418fce9e020e5f32029192e8f6a5dd7 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 5 Jul 2023 11:52:46 +0200 Subject: [PATCH 1219/1997] Delete comment, rename variable --- src/Interpreters/FilesystemCacheLog.h | 11 +---------- src/Interpreters/InterpreterSystemQuery.cpp | 8 ++++---- src/Parsers/ASTSystemQuery.cpp | 8 ++++---- src/Parsers/ASTSystemQuery.h | 4 ++-- src/Parsers/ParserSystemQuery.cpp | 4 ++-- 5 files changed, 13 insertions(+), 22 deletions(-) diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index d6dd00e5463..0d088a922e0 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -11,16 +11,7 @@ namespace DB { -/// -/// -------- Column --------- Type ------ -/// | event_date | DateTime | -/// | event_time | UInt64 | -/// | query_id | String | -/// | remote_file_path | String | -/// | segment_range | Tuple | -/// | read_type | String | -/// ------------------------------------- -/// + struct FilesystemCacheLogElement { enum class CacheType diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index e1ff8676bc7..1c2eb66923e 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -370,15 +370,15 @@ BlockIO InterpreterSystemQuery::execute() else { auto cache = FileCacheFactory::instance().getByName(query.filesystem_cache_name).cache; - if (query.delete_key.empty()) + if (query.key_to_drop.empty()) { cache->removeAllReleasable(); } else { - auto key = FileCacheKey::fromKeyString(query.delete_key); - if (query.delete_offset.has_value()) - cache->removeFileSegment(key, query.delete_offset.value()); + auto key = FileCacheKey::fromKeyString(query.key_to_drop); + if (query.offset_to_drop.has_value()) + cache->removeFileSegment(key, query.offset_to_drop.value()); else cache->removeKey(key); } diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 9c5e7bff61e..22244a7075c 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -212,11 +212,11 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, if (!filesystem_cache_name.empty()) { settings.ostr << (settings.hilite ? hilite_none : "") << " " << filesystem_cache_name; - if (!delete_key.empty()) + if (!key_to_drop.empty()) { - settings.ostr << (settings.hilite ? hilite_none : "") << " KEY " << delete_key; - if (delete_offset.has_value()) - settings.ostr << (settings.hilite ? hilite_none : "") << " OFFSET " << delete_offset.value(); + settings.ostr << (settings.hilite ? hilite_none : "") << " KEY " << key_to_drop; + if (offset_to_drop.has_value()) + settings.ostr << (settings.hilite ? hilite_none : "") << " OFFSET " << offset_to_drop.value(); } } } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index ebc3e9cd430..6c81162f103 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -107,8 +107,8 @@ public: UInt64 seconds{}; String filesystem_cache_name; - std::string delete_key; - std::optional delete_offset; + std::string key_to_drop; + std::optional offset_to_drop; String backup_name; diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index ef71e994d56..09c86876b48 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -409,9 +409,9 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & res->filesystem_cache_name = ast->as()->value.safeGet(); if (ParserKeyword{"KEY"}.ignore(pos, expected) && ParserIdentifier().parse(pos, ast, expected)) { - res->delete_key = ast->as()->name(); + res->key_to_drop = ast->as()->name(); if (ParserKeyword{"OFFSET"}.ignore(pos, expected) && ParserLiteral().parse(pos, ast, expected)) - res->delete_offset = ast->as()->value.safeGet(); + res->offset_to_drop = ast->as()->value.safeGet(); } } if (!parseQueryWithOnCluster(res, pos, expected)) From 3dc0afd52f89f09fccb6b75dde4e92a17edb278a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 5 Jul 2023 11:19:15 +0000 Subject: [PATCH 1220/1997] Correctly grep archives --- tests/ci/stress_tests.lib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/stress_tests.lib b/tests/ci/stress_tests.lib index 2b8ac77b952..190f3f39f9e 100644 --- a/tests/ci/stress_tests.lib +++ b/tests/ci/stress_tests.lib @@ -243,7 +243,7 @@ function check_logs_for_critical_errors() # Remove file fatal_messages.txt if it's empty [ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt - rg -Fa "########################################" /test_output/* > /dev/null \ + rg -Faz "########################################" /test_output/* > /dev/null \ && echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv function get_gdb_log_context() From 7c3c48c8c44a1c53902dd24d540e25e2634a986b Mon Sep 17 00:00:00 2001 From: flynn Date: Wed, 5 Jul 2023 11:20:03 +0000 Subject: [PATCH 1221/1997] Fix use_structure_from_insertion_table_in_table_functions does not work for materialized and alias columns --- src/Interpreters/Context.cpp | 6 +++++- .../0_stateless/02811_insert_schema_inference.reference | 0 .../0_stateless/02811_insert_schema_inference.sql | 9 +++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02811_insert_schema_inference.reference create mode 100644 tests/queries/0_stateless/02811_insert_schema_inference.sql diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 7482450d529..a0abab349b3 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1519,7 +1519,11 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const uint64_t use_structure_from_insertion_table_in_table_functions = getSettingsRef().use_structure_from_insertion_table_in_table_functions; if (use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable()) { - const auto & insert_structure = DatabaseCatalog::instance().getTable(getInsertionTable(), shared_from_this())->getInMemoryMetadataPtr()->getColumns(); + const auto & insert_structure = DatabaseCatalog::instance() + .getTable(getInsertionTable(), shared_from_this()) + ->getInMemoryMetadataPtr() + ->getColumns() + .getInsertable(); DB::ColumnsDescription structure_hint; bool use_columns_from_insert_query = true; diff --git a/tests/queries/0_stateless/02811_insert_schema_inference.reference b/tests/queries/0_stateless/02811_insert_schema_inference.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02811_insert_schema_inference.sql b/tests/queries/0_stateless/02811_insert_schema_inference.sql new file mode 100644 index 00000000000..9de710047f7 --- /dev/null +++ b/tests/queries/0_stateless/02811_insert_schema_inference.sql @@ -0,0 +1,9 @@ +drop table if exists test; +create table test +( + n1 UInt32, + n2 UInt32 alias murmurHash3_32(n1), + n3 UInt32 materialized n2 + 1 +)engine=MergeTree order by n1; +insert into test select * from generateRandom() limit 10; +drop table test; From cf809c25cd0052b1a7d51aea8d5179a1c9b741d2 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 5 Jul 2023 11:24:29 +0000 Subject: [PATCH 1222/1997] fix CLEAR COLUMN query --- src/Storages/MergeTree/MutateTask.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index f23ef82fca8..1346d5937f7 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -301,14 +301,16 @@ getColumnsForNewDataPart( auto it = renamed_columns_from_to.find(name); auto new_name = it == renamed_columns_from_to.end() ? name : it->second; - if (!storage_columns_set.contains(new_name)) + /// Column can be removed only in this data part by CLEAR COLUMN query. + if (!storage_columns_set.contains(new_name) || removed_columns.contains(new_name)) continue; /// In compact part we read all columns and all of them are in @updated_header. /// But in wide part we must keep serialization infos for columns that are not touched by mutation. - if (!updated_header.has(new_name) && isWidePart(source_part)) + if (!updated_header.has(new_name)) { - new_serialization_infos.emplace(new_name, old_info); + if (isWidePart(source_part)) + new_serialization_infos.emplace(new_name, old_info); continue; } From 86014a60a308ec41c7416bdbbfe6b360dcf1617b Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 5 Jul 2023 11:42:02 +0000 Subject: [PATCH 1223/1997] Fixed case with spaces before delimiter --- src/Processors/Formats/Impl/CSVRowInputFormat.cpp | 1 + tests/queries/0_stateless/00301_csv.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index edbc33fb3c3..9731b4ba465 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -341,6 +341,7 @@ bool CSVFormatReader::readField( if (is_last_file_column && format_settings.csv.ignore_extra_columns) { // Skip all fields to next line. + skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter); while (checkChar(format_settings.csv.delimiter, *buf)) { skipField(); diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index dc354433af9..7657745e9f7 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -49,7 +49,7 @@ echo '"Hello", 1, "String1" "Hello", 3, "String3", "2016-01-13" "Hello", 4, , "2016-01-14" "Hello", 5, "String5", "2016-01-15", "2016-01-16" -"Hello", 6, "String6", "line with a +"Hello", 6, "String6" , "line with a break"' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_empty_as_default=1 --input_format_csv_ignore_extra_columns=1 --query="INSERT INTO csv FORMAT CSV"; $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s, n"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; From 7cb7e138c13406f05d733323141649ae13a7f615 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 5 Jul 2023 14:16:46 +0200 Subject: [PATCH 1224/1997] Update --- .github/workflows/master.yml | 4 ++-- .github/workflows/pull_request.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 6996221e1aa..0fbcb95fc12 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -851,8 +851,8 @@ jobs: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRISCV64: - needs: [ DockerHubPush ] - runs-on: [ self-hosted, builder ] + needs: [DockerHubPush] + runs-on: [self-hosted, builder] steps: - name: Set envs run: | diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index fe7c3bba410..f898e764915 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -912,8 +912,8 @@ jobs: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRISCV64: - needs: [ DockerHubPush, FastTest, StyleCheck ] - runs-on: [ self-hosted, builder ] + needs: [DockerHubPush, FastTest, StyleCheck] + runs-on: [self-hosted, builder] steps: - name: Set envs run: | From e4a7229b0985dd6774ec8cd5d7540522301b4092 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 5 Jul 2023 15:18:41 +0300 Subject: [PATCH 1225/1997] Update analyzer_tech_debt.txt --- tests/analyzer_tech_debt.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index 0872033aed0..f7cc13dd2e2 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -36,6 +36,7 @@ 01455_shard_leaf_max_rows_bytes_to_read 01495_subqueries_in_with_statement 01504_rocksdb +01526_client_start_and_exit 01527_dist_sharding_key_dictGet_reload 01528_allow_nondeterministic_optimize_skip_unused_shards 01540_verbatim_partition_pruning @@ -50,6 +51,7 @@ 01624_soft_constraints 01651_bugs_from_15889 01656_test_query_log_factories_info +01676_clickhouse_client_autocomplete 01681_bloom_filter_nullable_column 01700_system_zookeeper_path_in 01710_projection_additional_filters From e957600d5c287c52f93d0f631587852ad0869035 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Wed, 5 Jul 2023 12:28:27 +0000 Subject: [PATCH 1226/1997] wip --- src/Parsers/ASTColumnDeclaration.cpp | 5 ++ src/Parsers/ASTColumnDeclaration.h | 1 + src/Parsers/ASTCreateQuery.h | 3 +- src/Parsers/ParserCreateQuery.cpp | 26 +++++- src/Parsers/ParserCreateQuery.h | 11 ++- .../02811_primary_key_in_columns.sql | 83 +++++++++++++++++++ 6 files changed, 126 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02811_primary_key_in_columns.sql diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index c2396708a73..12d000d5e9f 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -44,6 +44,7 @@ ASTPtr ASTColumnDeclaration::clone() const res->ttl = ttl->clone(); res->children.push_back(res->ttl); } + if (collation) { res->collation = collation->clone(); @@ -76,6 +77,10 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta << (*null_modifier ? "" : "NOT ") << "NULL" << (settings.hilite ? hilite_none : ""); } + if (primary_key_specifier) + settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") + << "PRIMARY KEY" << (settings.hilite ? hilite_none : ""); + if (default_expression) { settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTColumnDeclaration.h b/src/Parsers/ASTColumnDeclaration.h index 45814551db8..9d486667911 100644 --- a/src/Parsers/ASTColumnDeclaration.h +++ b/src/Parsers/ASTColumnDeclaration.h @@ -21,6 +21,7 @@ public: ASTPtr codec; ASTPtr ttl; ASTPtr collation; + bool primary_key_specifier = false; String getID(char delim) const override { return "ColumnDeclaration" + (delim + name); } diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index 230996f610e..ae45a244a03 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -56,6 +56,7 @@ public: ASTExpressionList * constraints = nullptr; ASTExpressionList * projections = nullptr; IAST * primary_key = nullptr; + IAST * primary_key_from_columns = nullptr; String getID(char) const override { return "Columns definition"; } @@ -76,7 +77,7 @@ public: f(reinterpret_cast(&primary_key)); f(reinterpret_cast(&constraints)); f(reinterpret_cast(&projections)); - f(reinterpret_cast(&primary_key)); + f(reinterpret_cast(&primary_key_from_columns)); } }; diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index adf3513ba40..1941bafab0d 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -300,11 +300,21 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E ASTPtr constraints = std::make_shared(); ASTPtr projections = std::make_shared(); ASTPtr primary_key; + ASTPtr primary_key_from_columns; for (const auto & elem : list->children) { - if (elem->as()) + if (auto *cd = elem->as()) + { + if(cd->primary_key_specifier) + { + if(!primary_key_from_columns) + primary_key_from_columns = makeASTFunction("tuple"); + auto column_identifier = std::make_shared(cd->name); + primary_key_from_columns->children.push_back(column_identifier); + } columns->children.push_back(elem); + } else if (elem->as()) indices->children.push_back(elem); else if (elem->as()) @@ -336,6 +346,8 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E res->set(res->projections, projections); if (primary_key) res->set(res->primary_key, primary_key); + if (primary_key_from_columns) + res->set(res->primary_key_from_columns, primary_key_from_columns); node = res; @@ -599,6 +611,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe /// List of columns. if (s_lparen.ignore(pos, expected)) { + /// Columns and all table properties (indices, constraints, projections, primary_key) if (!table_properties_p.parse(pos, columns_list, expected)) return false; @@ -699,6 +712,17 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe query->storage->primary_key = query->columns_list->primary_key; } + if (query->columns_list && (query->columns_list->primary_key_from_columns)) + { + /// If engine is not set will use default one + if (!query->storage) + query->set(query->storage, std::make_shared()); + else if (query->storage->primary_key) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); + + query->storage->primary_key = query->columns_list->primary_key_from_columns; + } + tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 5f79a4b68f6..09935e2b608 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -135,6 +135,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserKeyword s_remove{"REMOVE"}; ParserKeyword s_type{"TYPE"}; ParserKeyword s_collate{"COLLATE"}; + ParserKeyword s_primary_key{"PRIMARY KEY"}; ParserExpression expr_parser; ParserStringLiteral string_literal_parser; ParserLiteral literal_parser; @@ -177,6 +178,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ASTPtr codec_expression; ASTPtr ttl_expression; ASTPtr collation_expression; + bool primary_key_specifier = false; auto null_check_without_moving = [&]() -> bool { @@ -198,6 +200,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E && !s_ephemeral.checkWithoutMoving(pos, expected) && !s_alias.checkWithoutMoving(pos, expected) && !s_auto_increment.checkWithoutMoving(pos, expected) + && !s_primary_key.checkWithoutMoving(pos, expected) && (require_type || (!s_comment.checkWithoutMoving(pos, expected) && !s_codec.checkWithoutMoving(pos, expected)))) @@ -266,7 +269,6 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserDataType().parse(tmp_pos, type, tmp_expected); } } - /// This will rule out unusual expressions like *, t.* that cannot appear in DEFAULT if (default_expression && !dynamic_cast(default_expression.get())) return false; @@ -305,6 +307,11 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E return false; } + if (s_primary_key.ignore(pos, expected)) + { + primary_key_specifier = true; + } + node = column_declaration; if (type) @@ -346,6 +353,8 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E column_declaration->children.push_back(std::move(collation_expression)); } + column_declaration->primary_key_specifier = primary_key_specifier; + return true; } diff --git a/tests/queries/0_stateless/02811_primary_key_in_columns.sql b/tests/queries/0_stateless/02811_primary_key_in_columns.sql new file mode 100644 index 00000000000..df25fdd14ab --- /dev/null +++ b/tests/queries/0_stateless/02811_primary_key_in_columns.sql @@ -0,0 +1,83 @@ +DROP TABLE IF EXISTS pk_test1; +DROP TABLE IF EXISTS pk_test2; +DROP TABLE IF EXISTS pk_test3; +DROP TABLE IF EXISTS pk_test4; +DROP TABLE IF EXISTS pk_test5; +DROP TABLE IF EXISTS pk_test6; +DROP TABLE IF EXISTS pk_test7; +DROP TABLE IF EXISTS pk_test8; +DROP TABLE IF EXISTS pk_test9; +DROP TABLE IF EXISTS pk_test10; +DROP TABLE IF EXISTS pk_test11; +DROP TABLE IF EXISTS pk_test12; +DROP TABLE IF EXISTS pk_test12; +DROP TABLE IF EXISTS pk_test13; +DROP TABLE IF EXISTS pk_test14; +DROP TABLE IF EXISTS pk_test15; +DROP TABLE IF EXISTS pk_test16; +DROP TABLE IF EXISTS pk_test17; +DROP TABLE IF EXISTS pk_test18; +DROP TABLE IF EXISTS pk_test19; +DROP TABLE IF EXISTS pk_test20; +DROP TABLE IF EXISTS pk_test21; +DROP TABLE IF EXISTS pk_test22; +DROP TABLE IF EXISTS pk_test23; + +SET default_table_engine=MergeTree; + +CREATE TABLE pk_test1 (String a PRIMARY KEY, String b, String c); +CREATE TABLE pk_test2 (String a PRIMARY KEY, String b PRIMARY KEY, String c); +CREATE TABLE pk_test3 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY); + +CREATE TABLE pk_test4 (String a, String b PRIMARY KEY, String c PRIMARY KEY); +CREATE TABLE pk_test5 (String a, String b PRIMARY KEY, String c); +CREATE TABLE pk_test6 (String a, String b, String c PRIMARY KEY); + +CREATE TABLE pk_test7 (String a PRIMARY KEY, String b, String c, PRIMARY KEY (a)); +CREATE TABLE pk_test8 (String a PRIMARY KEY, String b PRIMARY KEY, String c, PRIMARY KEY (a)); +CREATE TABLE pk_test9 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY, PRIMARY KEY (a)); + +CREATE TABLE pk_test10 (String a, String b PRIMARY KEY, String c PRIMARY KEY, PRIMARY KEY (a)); +CREATE TABLE pk_test11 (String a, String b PRIMARY KEY, String c, PRIMARY KEY (a)); +CREATE TABLE pk_test12 (String a, String b, String c PRIMARY KEY, PRIMARY KEY (a)); + +CREATE TABLE pk_test12 (String a PRIMARY KEY, String b, String c) PRIMARY KEY (a,b,c); +CREATE TABLE pk_test13 (String a PRIMARY KEY, String b PRIMARY KEY, String c) PRIMARY KEY (a,b,c); +CREATE TABLE pk_test14 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY) PRIMARY KEY (a,b,c); + +CREATE TABLE pk_test15 (String a, String b PRIMARY KEY, String c PRIMARY KEY) PRIMARY KEY (a,b,c); +CREATE TABLE pk_test16 (String a, String b PRIMARY KEY, String c) PRIMARY KEY (a,b,c); +CREATE TABLE pk_test17 (String a, String b, String c PRIMARY KEY) PRIMARY KEY (a,b,c); + +CREATE TABLE pk_test18 (String a PRIMARY KEY, String b, String c) ORDER BY (a,b,c); +CREATE TABLE pk_test19 (String a PRIMARY KEY, String b PRIMARY KEY, String c) ORDER BY (a,b,c); +CREATE TABLE pk_test20 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY) ORDER BY (a,b,c); + +CREATE TABLE pk_test21 (String a, String b PRIMARY KEY, String c PRIMARY KEY) ORDER BY (a,b,c); +CREATE TABLE pk_test22 (String a, String b PRIMARY KEY, String c) ORDER BY (a,b,c); +CREATE TABLE pk_test23 (String a, String b, String c PRIMARY KEY) ORDER BY (a,b,c); + +DROP TABLE IF EXISTS pk_test1; +DROP TABLE IF EXISTS pk_test2; +DROP TABLE IF EXISTS pk_test3; +DROP TABLE IF EXISTS pk_test4; +DROP TABLE IF EXISTS pk_test5; +DROP TABLE IF EXISTS pk_test6; +DROP TABLE IF EXISTS pk_test7; +DROP TABLE IF EXISTS pk_test8; +DROP TABLE IF EXISTS pk_test9; +DROP TABLE IF EXISTS pk_test10; +DROP TABLE IF EXISTS pk_test11; +DROP TABLE IF EXISTS pk_test12; +DROP TABLE IF EXISTS pk_test12; +DROP TABLE IF EXISTS pk_test13; +DROP TABLE IF EXISTS pk_test14; +DROP TABLE IF EXISTS pk_test15; +DROP TABLE IF EXISTS pk_test16; +DROP TABLE IF EXISTS pk_test17; +DROP TABLE IF EXISTS pk_test18; +DROP TABLE IF EXISTS pk_test19; +DROP TABLE IF EXISTS pk_test20; +DROP TABLE IF EXISTS pk_test21; +DROP TABLE IF EXISTS pk_test22; +DROP TABLE IF EXISTS pk_test23; \ No newline at end of file From 5512c307db1d43b5902e00ec13fd007e0882a82c Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 30 Jun 2023 13:23:23 +0200 Subject: [PATCH 1227/1997] system.jemalloc_bins table --- src/CMakeLists.txt | 5 +- src/Storages/System/StorageSystemJemalloc.cpp | 125 ++++++++++++++++++ src/Storages/System/StorageSystemJemalloc.h | 34 +++++ src/Storages/System/attachSystemTables.cpp | 3 + 4 files changed, 164 insertions(+), 3 deletions(-) create mode 100644 src/Storages/System/StorageSystemJemalloc.cpp create mode 100644 src/Storages/System/StorageSystemJemalloc.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ca428fbff3a..f870993f080 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -206,11 +206,10 @@ add_library (clickhouse_new_delete STATIC Common/new_delete.cpp) target_link_libraries (clickhouse_new_delete PRIVATE clickhouse_common_io) if (TARGET ch_contrib::jemalloc) target_link_libraries (clickhouse_new_delete PRIVATE ch_contrib::jemalloc) + target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::jemalloc) + target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc) endif() -if (TARGET ch_contrib::jemalloc) - target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::jemalloc) -endif() target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash) add_subdirectory(Access/Common) diff --git a/src/Storages/System/StorageSystemJemalloc.cpp b/src/Storages/System/StorageSystemJemalloc.cpp new file mode 100644 index 00000000000..2cb666eb5c3 --- /dev/null +++ b/src/Storages/System/StorageSystemJemalloc.cpp @@ -0,0 +1,125 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "config.h" + +#if USE_JEMALLOC +# include +#endif + + +namespace DB +{ + +#if USE_JEMALLOC + +UInt64 getJeMallocValue(const char * name) +{ + UInt64 value{}; + size_t size = sizeof(value); + mallctl(name, &value, &size, nullptr, 0); + return value; +} + +void fillJemallocBins(MutableColumns & res_columns) +{ + /// Bins for small allocations + auto small_bins_count = getJeMallocValue("arenas.nbins"); + UInt16 bin_index = 0; + for (UInt64 bin = 0; bin < small_bins_count; ++bin, ++bin_index) + { + auto size = getJeMallocValue(fmt::format("arenas.bin.{}.size", bin).c_str()); + auto ndalloc = getJeMallocValue(fmt::format("stats.arenas.{}.bins.{}.ndalloc", MALLCTL_ARENAS_ALL, bin).c_str()); + auto nmalloc = getJeMallocValue(fmt::format("stats.arenas.{}.bins.{}.nmalloc", MALLCTL_ARENAS_ALL, bin).c_str()); + + size_t col_num = 0; + res_columns.at(col_num++)->insert(bin_index); + res_columns.at(col_num++)->insert(0); + res_columns.at(col_num++)->insert(size); + res_columns.at(col_num++)->insert(nmalloc); + res_columns.at(col_num++)->insert(ndalloc); + } + + /// Bins for large allocations + auto large_bins_count = getJeMallocValue("arenas.nlextents"); + for (UInt64 bin = 0; bin < large_bins_count; ++bin, ++bin_index) + { + auto size = getJeMallocValue(fmt::format("arenas.lextent.{}.size", bin).c_str()); + auto ndalloc = getJeMallocValue(fmt::format("stats.arenas.{}.lextents.{}.ndalloc", MALLCTL_ARENAS_ALL, bin).c_str()); + auto nmalloc = getJeMallocValue(fmt::format("stats.arenas.{}.lextents.{}.nmalloc", MALLCTL_ARENAS_ALL, bin).c_str()); + + size_t col_num = 0; + res_columns.at(col_num++)->insert(bin_index); + res_columns.at(col_num++)->insert(1); + res_columns.at(col_num++)->insert(size); + res_columns.at(col_num++)->insert(nmalloc); + res_columns.at(col_num++)->insert(ndalloc); + } +} + +#else + +void fillJemallocBins(MutableColumns &) +{ + LOG_INFO(&Poco::Logger::get("StorageSystemJemallocBins"), "jemalloc is not enabled"); +} + +#endif // USE_JEMALLOC + + +StorageSystemJemallocBins::StorageSystemJemallocBins(const StorageID & table_id_) + : IStorage(table_id_) +{ + StorageInMemoryMetadata storage_metadata; + ColumnsDescription desc; + auto columns = getNamesAndTypes(); + for (const auto & col : columns) + { + ColumnDescription col_desc(col.name, col.type); + desc.add(col_desc); + } + storage_metadata.setColumns(desc); + setInMemoryMetadata(storage_metadata); +} + +NamesAndTypesList StorageSystemJemallocBins::getNamesAndTypes() +{ + return { + { "index", std::make_shared() }, + { "large", std::make_shared() }, + { "size", std::make_shared() }, + { "nmalloc", std::make_shared() }, + { "ndalloc", std::make_shared() }, + }; +} + +Pipe StorageSystemJemallocBins::read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo &, + ContextPtr /*context*/, + QueryProcessingStage::Enum /*processed_stage*/, + const size_t /*max_block_size*/, + const size_t /*num_streams*/) +{ + storage_snapshot->check(column_names); + + auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); + MutableColumns res_columns = header.cloneEmptyColumns(); + + fillJemallocBins(res_columns); + + UInt64 num_rows = res_columns.at(0)->size(); + Chunk chunk(std::move(res_columns), num_rows); + + return Pipe(std::make_shared(std::move(header), std::move(chunk))); +} + +} diff --git a/src/Storages/System/StorageSystemJemalloc.h b/src/Storages/System/StorageSystemJemalloc.h new file mode 100644 index 00000000000..a4ac2fbcdcb --- /dev/null +++ b/src/Storages/System/StorageSystemJemalloc.h @@ -0,0 +1,34 @@ +#pragma once + +#include + + +namespace DB +{ + +class Context; + +class StorageSystemJemallocBins final : public IStorage +{ +public: + explicit StorageSystemJemallocBins(const StorageID & table_id_); + + std::string getName() const override { return "SystemJemallocBins"; } + + static NamesAndTypesList getNamesAndTypes(); + + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + bool isSystemStorage() const override { return true; } + + bool supportsTransactions() const override { return true; } +}; + +} diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 7d21d9e39d2..a9873c821ce 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -1,3 +1,4 @@ +#include "Storages/System/StorageSystemJemalloc.h" #include "config.h" #include @@ -82,6 +83,7 @@ #include #include #include +#include #ifdef OS_LINUX #include @@ -187,6 +189,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attach(context, system_database, "certificates"); attach(context, system_database, "named_collections"); attach(context, system_database, "user_processes"); + attach(context, system_database, "jemalloc_bins"); if (has_zookeeper) { From ff0197543e568125c7f3c75f4930d750d741ff6d Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 3 Jul 2023 22:39:42 +0200 Subject: [PATCH 1228/1997] Basic test that stats are non-zero --- .../02810_system_jemalloc_bins.reference | 1 + .../0_stateless/02810_system_jemalloc_bins.sql | 13 +++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 tests/queries/0_stateless/02810_system_jemalloc_bins.reference create mode 100644 tests/queries/0_stateless/02810_system_jemalloc_bins.sql diff --git a/tests/queries/0_stateless/02810_system_jemalloc_bins.reference b/tests/queries/0_stateless/02810_system_jemalloc_bins.reference new file mode 100644 index 00000000000..50d4d226b46 --- /dev/null +++ b/tests/queries/0_stateless/02810_system_jemalloc_bins.reference @@ -0,0 +1 @@ +1 1 1 1 1 diff --git a/tests/queries/0_stateless/02810_system_jemalloc_bins.sql b/tests/queries/0_stateless/02810_system_jemalloc_bins.sql new file mode 100644 index 00000000000..8ecf47e51b5 --- /dev/null +++ b/tests/queries/0_stateless/02810_system_jemalloc_bins.sql @@ -0,0 +1,13 @@ +WITH + (SELECT value IN ('ON', '1') FROM system.build_options WHERE name = 'USE_JEMALLOC') AS jemalloc_enabled, + (SELECT count() FROM system.jemalloc_bins) AS total_bins, + (SELECT count() FROM system.jemalloc_bins WHERE large) AS large_bins, + (SELECT count() FROM system.jemalloc_bins WHERE NOT large) AS small_bins, + (SELECT sum(size * (nmalloc - ndalloc)) FROM system.jemalloc_bins WHERE large) AS large_allocated_bytes, + (SELECT sum(size * (nmalloc - ndalloc)) FROM system.jemalloc_bins WHERE NOT large) AS small_allocated_bytes +SELECT + (total_bins > 0) = jemalloc_enabled, + (large_bins > 0) = jemalloc_enabled, + (small_bins > 0) = jemalloc_enabled, + (large_allocated_bytes > 0) = jemalloc_enabled, + (small_allocated_bytes > 0) = jemalloc_enabled; From bb422b816894769860a60579aea04454f8f1c496 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 3 Jul 2023 23:23:06 +0200 Subject: [PATCH 1229/1997] Added doc --- .../operations/system-tables/jemalloc_bins.md | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 docs/en/operations/system-tables/jemalloc_bins.md diff --git a/docs/en/operations/system-tables/jemalloc_bins.md b/docs/en/operations/system-tables/jemalloc_bins.md new file mode 100644 index 00000000000..dfe2ddb01e2 --- /dev/null +++ b/docs/en/operations/system-tables/jemalloc_bins.md @@ -0,0 +1,45 @@ +--- +slug: /en/operations/system-tables/jemalloc_bins +--- +# jemalloc_bins + +Contains information about memory allocations done via jemalloc allocator in different size classes (bins) aggregated from all arenas. +These statistics might not be absolutely accurate because of thread local caching in jemalloc. + +Columns: + +- `index` (UInt64) — Index of the bin ordered by size +- `large` (Bool) — True for large allocations and False for small +- `size` (UInt64) — Size of allocations in this bin +- `nmalloc` (UInt64) — Number of allocations +- `ndalloc` (UInt64) — Number of deallocations + +**Example** + +Find the sizes of allocations that contributed the most to the current overall memory usage. + +``` sql +SELECT + *, + nmalloc - ndalloc AS active_allocations, + size * active_allocations AS allocated_bytes +FROM system.jemalloc_bins +WHERE allocated_bytes > 0 +ORDER BY allocated_bytes DESC +LIMIT 10 +``` + +``` text +┌─index─┬─large─┬─────size─┬──nmalloc─┬──ndalloc─┬─active_allocations─┬─allocated_bytes─┐ +│ 82 │ 1 │ 50331648 │ 1 │ 0 │ 1 │ 50331648 │ +│ 10 │ 0 │ 192 │ 512336 │ 370710 │ 141626 │ 27192192 │ +│ 69 │ 1 │ 5242880 │ 6 │ 2 │ 4 │ 20971520 │ +│ 3 │ 0 │ 48 │ 16938224 │ 16559484 │ 378740 │ 18179520 │ +│ 28 │ 0 │ 4096 │ 122924 │ 119142 │ 3782 │ 15491072 │ +│ 61 │ 1 │ 1310720 │ 44569 │ 44558 │ 11 │ 14417920 │ +│ 39 │ 1 │ 28672 │ 1285 │ 913 │ 372 │ 10665984 │ +│ 4 │ 0 │ 64 │ 2837225 │ 2680568 │ 156657 │ 10026048 │ +│ 6 │ 0 │ 96 │ 2617803 │ 2531435 │ 86368 │ 8291328 │ +│ 36 │ 1 │ 16384 │ 22431 │ 21970 │ 461 │ 7553024 │ +└───────┴───────┴──────────┴──────────┴──────────┴────────────────────┴─────────────────┘ +``` From e175be1f42be8b67a30ac4ef6f44d2a8c150fb0e Mon Sep 17 00:00:00 2001 From: flynn Date: Wed, 5 Jul 2023 12:34:58 +0000 Subject: [PATCH 1230/1997] remove unused code --- src/Compression/CompressionCodecLZ4.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Compression/CompressionCodecLZ4.cpp b/src/Compression/CompressionCodecLZ4.cpp index a39052f80b7..3dbb6be9a99 100644 --- a/src/Compression/CompressionCodecLZ4.cpp +++ b/src/Compression/CompressionCodecLZ4.cpp @@ -42,7 +42,6 @@ private: UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; mutable LZ4::PerformanceStatistics lz4_stat; - ASTPtr codec_desc; }; From 1124c7ae8bcbb0bcc151f4121219f87290a817ee Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 5 Jul 2023 12:42:36 +0000 Subject: [PATCH 1231/1997] Correctly move config --- docker/test/upgrade/run.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 8fd514eaa93..82a88272df9 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -76,7 +76,8 @@ sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-serv # But we still need default disk because some tables loaded only into it sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ | sed "s|
s3
|
s3
default|" \ - > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp +mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml From 665b6d43342f418a9c32bc31d1a969b53766fc96 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 5 Jul 2023 14:50:15 +0200 Subject: [PATCH 1232/1997] Update aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 526e674a154..636b7a9747d 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -761,6 +761,7 @@ Rollup RowBinary RowBinaryWithNames RowBinaryWithNamesAndTypes +RowBinaryWithDefaults Runtime SATA SELECTs @@ -2118,6 +2119,7 @@ rowNumberInBlock rowbinary rowbinarywithnames rowbinarywithnamesandtypes +rowbinarywithdefaults rsync rsyslog runnable From 8d5ddcbd3094182b44b3641f11acf6ba788faaf7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 5 Jul 2023 15:40:06 +0200 Subject: [PATCH 1233/1997] Remove coverity --- .github/workflows/nightly.yml | 45 ----------------------------------- 1 file changed, 45 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index cf61012f2bc..9de0444bd83 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -75,51 +75,6 @@ jobs: Codebrowser: needs: [DockerHubPush] uses: ./.github/workflows/woboq.yml - BuilderCoverity: - needs: DockerHubPush - runs-on: [self-hosted, builder] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - BUILD_NAME=coverity - CACHES_PATH=${{runner.temp}}/../ccaches - IMAGES_PATH=${{runner.temp}}/images_path - REPO_COPY=${{runner.temp}}/build_check/ClickHouse - TEMP_PATH=${{runner.temp}}/build_check - EOF - echo "COVERITY_TOKEN=${{ secrets.COVERITY_TOKEN }}" >> "$GITHUB_ENV" - - name: Download changed images - uses: actions/download-artifact@v3 - with: - name: changed_images - path: ${{ env.IMAGES_PATH }} - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - submodules: true - - name: Build - run: | - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - - name: Upload Coverity Analysis - if: ${{ success() || failure() }} - run: | - curl --form token="${COVERITY_TOKEN}" \ - --form email='security+coverity@clickhouse.com' \ - --form file="@$TEMP_PATH/$BUILD_NAME/coverity-scan.tar.gz" \ - --form version="${GITHUB_REF#refs/heads/}-${GITHUB_SHA::6}" \ - --form description="Nighly Scan: $(date +'%Y-%m-%dT%H:%M:%S')" \ - https://scan.coverity.com/builds?project=ClickHouse%2FClickHouse - - name: Cleanup - if: always() - run: | - docker ps --quiet | xargs --no-run-if-empty docker kill ||: - docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" SonarCloud: runs-on: [self-hosted, builder] env: From 1da413e64eaa092b2ab685253f4cb32a93dcc53e Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Wed, 5 Jul 2023 14:56:11 +0000 Subject: [PATCH 1234/1997] fix segfault when create invalid EmbeddedRocksdb table --- src/Storages/checkAndGetLiteralArgument.cpp | 10 +++++++++- .../02811_invalid_embedded_rocksdb_create.reference | 0 .../02811_invalid_embedded_rocksdb_create.sql | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.reference create mode 100644 tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql diff --git a/src/Storages/checkAndGetLiteralArgument.cpp b/src/Storages/checkAndGetLiteralArgument.cpp index 1aa942548a7..78ec1e55b64 100644 --- a/src/Storages/checkAndGetLiteralArgument.cpp +++ b/src/Storages/checkAndGetLiteralArgument.cpp @@ -12,7 +12,15 @@ namespace ErrorCodes template T checkAndGetLiteralArgument(const ASTPtr & arg, const String & arg_name) { - return checkAndGetLiteralArgument(*arg->as(), arg_name); + if (arg->as()) + return checkAndGetLiteralArgument(*arg->as(), arg_name); + + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Argument '{}' must be a literal, get {} (value: {})", + arg_name, + arg->getID(), + arg->formatForErrorMessage()); } template diff --git a/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.reference b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql new file mode 100644 index 00000000000..aac2652fbfa --- /dev/null +++ b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql @@ -0,0 +1 @@ +CREATE TABLE dict (`k` String, `v` String) ENGINE = EmbeddedRocksDB(k) PRIMARY KEY k; -- {serverError 36} \ No newline at end of file From 8f1ed5c90de4ada3764ea6384220459359eb7950 Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Wed, 5 Jul 2023 15:04:38 +0000 Subject: [PATCH 1235/1997] add more check + line break --- src/Storages/checkAndGetLiteralArgument.cpp | 6 +++--- .../0_stateless/02811_invalid_embedded_rocksdb_create.sql | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Storages/checkAndGetLiteralArgument.cpp b/src/Storages/checkAndGetLiteralArgument.cpp index 78ec1e55b64..5baf47fe91a 100644 --- a/src/Storages/checkAndGetLiteralArgument.cpp +++ b/src/Storages/checkAndGetLiteralArgument.cpp @@ -12,15 +12,15 @@ namespace ErrorCodes template T checkAndGetLiteralArgument(const ASTPtr & arg, const String & arg_name) { - if (arg->as()) + if (arg && arg->as()) return checkAndGetLiteralArgument(*arg->as(), arg_name); throw Exception( ErrorCodes::BAD_ARGUMENTS, "Argument '{}' must be a literal, get {} (value: {})", arg_name, - arg->getID(), - arg->formatForErrorMessage()); + arg ? arg->getID() : "NULL", + arg ? arg->formatForErrorMessage() : "NULL"); } template diff --git a/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql index aac2652fbfa..bfe4ee0622e 100644 --- a/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql +++ b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql @@ -1 +1 @@ -CREATE TABLE dict (`k` String, `v` String) ENGINE = EmbeddedRocksDB(k) PRIMARY KEY k; -- {serverError 36} \ No newline at end of file +CREATE TABLE dict (`k` String, `v` String) ENGINE = EmbeddedRocksDB(k) PRIMARY KEY k; -- {serverError 36} From 32e0348caa6ee34d1f631fceffbc6a93b09953d2 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Wed, 5 Jul 2023 17:12:56 +0200 Subject: [PATCH 1236/1997] Revert "Publish changes" This reverts commit ed6bfd66fee9aa4c2e06eaf25ca81d02e09f075e. --- docker/packager/binary/build.sh | 4 ---- docker/packager/packager | 1 - 2 files changed, 5 deletions(-) diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 08a9b07f3ce..c0803c74147 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -15,10 +15,6 @@ if [ "$EXTRACT_TOOLCHAIN_DARWIN" = "1" ]; then mkdir -p /build/cmake/toolchain/darwin-x86_64 tar xJf /MacOSX11.0.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1 ln -sf darwin-x86_64 /build/cmake/toolchain/darwin-aarch64 - - if [ "$EXPORT_SOURCES_WITH_SUBMODULES" = "1" ]; then - tar -c /build --exclude-vcs-ignores --exclude-vcs --exclude '/build/build' --exclude '/build/build_docker' --exclude '/build/debian' --exclude '/build/.cache' --exclude '/build/docs' --exclude '/build/tests/integration' | pigz -9 > /output/source_sub.tar.gz - fi fi # Uncomment to debug ccache. Don't put ccache log in /output right away, or it diff --git a/docker/packager/packager b/docker/packager/packager index 42dc52aa37f..1b3df858cd2 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -168,7 +168,6 @@ def parse_env_variables( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake" ) result.append("EXTRACT_TOOLCHAIN_DARWIN=1") - result.append("EXPORT_SOURCES_WITH_SUBMODULES=1") elif is_cross_darwin_arm: cc = compiler[: -len(DARWIN_ARM_SUFFIX)] cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar") From baee73fd96d1b1974ac7ec637c3b22c4f63a27a4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 5 Jul 2023 18:11:25 +0200 Subject: [PATCH 1237/1997] Make shutdown of replicated tables softer --- programs/server/Server.cpp | 64 +++++--- programs/server/Server.h | 11 +- src/Databases/DatabasesCommon.cpp | 2 +- src/Interpreters/InterpreterDropQuery.cpp | 2 +- src/Storages/IStorage.h | 8 +- src/Storages/MergeTree/DataPartsExchange.cpp | 2 +- src/Storages/MergeTree/MergeTreeSettings.h | 1 + .../ReplicatedMergeTreeRestartingThread.cpp | 2 +- src/Storages/StorageBuffer.cpp | 2 +- src/Storages/StorageBuffer.h | 2 +- src/Storages/StorageDistributed.cpp | 2 +- src/Storages/StorageDistributed.h | 2 +- src/Storages/StorageProxy.h | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 137 ++++++++++++------ src/Storages/StorageReplicatedMergeTree.h | 11 +- src/Storages/StorageTableFunction.h | 4 +- .../__init__.py | 1 + .../config/merge_tree_conf.xml | 5 + .../test.py | 74 ++++++++++ 19 files changed, 253 insertions(+), 81 deletions(-) create mode 100644 tests/integration/test_replicated_merge_tree_wait_on_shutdown/__init__.py create mode 100644 tests/integration/test_replicated_merge_tree_wait_on_shutdown/config/merge_tree_conf.xml create mode 100644 tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d2d8a0d07fb..0a311fa4737 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1286,7 +1286,7 @@ try global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config); std::lock_guard lock(servers_lock); - updateServers(*config, server_pool, async_metrics, servers); + updateServers(*config, server_pool, async_metrics, servers, servers_to_start_before_tables); } global_context->updateStorageConfiguration(*config); @@ -1388,10 +1388,15 @@ try } - for (auto & server : servers_to_start_before_tables) { - server.start(); - LOG_INFO(log, "Listening for {}", server.getDescription()); + std::lock_guard lock(servers_lock); + createInterserverServers(config(), interserver_listen_hosts, listen_try, server_pool, async_metrics, servers_to_start_before_tables, /* start_servers= */ false); + + for (auto & server : servers_to_start_before_tables) + { + server.start(); + LOG_INFO(log, "Listening for {}", server.getDescription()); + } } /// Initialize access storages. @@ -1688,7 +1693,7 @@ try { std::lock_guard lock(servers_lock); - createServers(config(), listen_hosts, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers); + createServers(config(), listen_hosts, listen_try, server_pool, async_metrics, servers); if (servers.empty()) throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "No servers started (add valid listen_host and 'tcp_port' or 'http_port' " @@ -1954,7 +1959,6 @@ HTTPContextPtr Server::httpContext() const void Server::createServers( Poco::Util::AbstractConfiguration & config, const Strings & listen_hosts, - const Strings & interserver_listen_hosts, bool listen_try, Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, @@ -2176,6 +2180,23 @@ void Server::createServers( httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); }); } +} + +void Server::createInterserverServers( + Poco::Util::AbstractConfiguration & config, + const Strings & interserver_listen_hosts, + bool listen_try, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers, + bool start_servers) +{ + const Settings & settings = global_context->getSettingsRef(); + + Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0); + Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; + http_params->setTimeout(settings.http_receive_timeout); + http_params->setKeepAliveTimeout(keep_alive_timeout); /// Now iterate over interserver_listen_hosts for (const auto & interserver_listen_host : interserver_listen_hosts) @@ -2224,14 +2245,14 @@ void Server::createServers( #endif }); } - } void Server::updateServers( Poco::Util::AbstractConfiguration & config, Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, - std::vector & servers) + std::vector & servers, + std::vector & servers_to_start_before_tables) { Poco::Logger * log = &logger(); @@ -2256,12 +2277,18 @@ void Server::updateServers( std::erase_if(servers, std::bind_front(check_server, " (from one of previous reload)")); Poco::Util::AbstractConfiguration & previous_config = latest_config ? *latest_config : this->config(); - + std::vector all_servers; for (auto & server : servers) + all_servers.push_back(&server); + + for (auto & server : servers_to_start_before_tables) + all_servers.push_back(&server); + + for (auto * server : all_servers) { - if (!server.isStopping()) + if (!server->isStopping()) { - std::string port_name = server.getPortName(); + std::string port_name = server->getPortName(); bool has_host = false; bool is_http = false; if (port_name.starts_with("protocols.")) @@ -2299,25 +2326,26 @@ void Server::updateServers( /// NOTE: better to compare using getPortName() over using /// dynamic_cast<> since HTTPServer is also used for prometheus and /// internal replication communications. - is_http = server.getPortName() == "http_port" || server.getPortName() == "https_port"; + is_http = server->getPortName() == "http_port" || server->getPortName() == "https_port"; } if (!has_host) - has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server.getListenHost()) != listen_hosts.end(); + has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server->getListenHost()) != listen_hosts.end(); bool has_port = !config.getString(port_name, "").empty(); bool force_restart = is_http && !isSameConfiguration(previous_config, config, "http_handlers"); if (force_restart) - LOG_TRACE(log, " had been changed, will reload {}", server.getDescription()); + LOG_TRACE(log, " had been changed, will reload {}", server->getDescription()); - if (!has_host || !has_port || config.getInt(server.getPortName()) != server.portNumber() || force_restart) + if (!has_host || !has_port || config.getInt(server->getPortName()) != server->portNumber() || force_restart) { - server.stop(); - LOG_INFO(log, "Stopped listening for {}", server.getDescription()); + server->stop(); + LOG_INFO(log, "Stopped listening for {}", server->getDescription()); } } } - createServers(config, listen_hosts, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true); + createServers(config, listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true); + createInterserverServers(config, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true); std::erase_if(servers, std::bind_front(check_server, "")); } diff --git a/programs/server/Server.h b/programs/server/Server.h index e9ae6d8d937..d13378dcd65 100644 --- a/programs/server/Server.h +++ b/programs/server/Server.h @@ -102,6 +102,14 @@ private: void createServers( Poco::Util::AbstractConfiguration & config, const Strings & listen_hosts, + bool listen_try, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers, + bool start_servers = false); + + void createInterserverServers( + Poco::Util::AbstractConfiguration & config, const Strings & interserver_listen_hosts, bool listen_try, Poco::ThreadPool & server_pool, @@ -113,7 +121,8 @@ private: Poco::Util::AbstractConfiguration & config, Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, - std::vector & servers); + std::vector & servers, + std::vector & servers_to_start_before_tables); }; } diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index bb98e2bd3bb..4ba793d858d 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -292,7 +292,7 @@ void DatabaseWithOwnTablesBase::shutdown() for (const auto & kv : tables_snapshot) { - kv.second->flush(); + kv.second->flushAndPrepareForShutdown(); } for (const auto & kv : tables_snapshot) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 0beb4492aef..84ecb1fc909 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -361,7 +361,7 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, std::vector> tables_to_drop; for (auto iterator = database->getTablesIterator(table_context); iterator->isValid(); iterator->next()) { - iterator->table()->flush(); + iterator->table()->flushAndPrepareForShutdown(); tables_to_drop.push_back({iterator->name(), iterator->table()->isDictionary()}); } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index b262d88db57..c0d36836444 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -549,15 +549,15 @@ public: /** * If the storage requires some complicated work on destroying, * then you have two virtual methods: - * - flush() + * - flushAndPrepareForShutdown() * - shutdown() * * @see shutdown() - * @see flush() + * @see flushAndPrepareForShutdown() */ void flushAndShutdown() { - flush(); + flushAndPrepareForShutdown(); shutdown(); } @@ -570,7 +570,7 @@ public: /// Called before shutdown() to flush data to underlying storage /// Data in memory need to be persistent - virtual void flush() {} + virtual void flushAndPrepareForShutdown() {} /// Asks table to stop executing some action identified by action_type /// If table does not support such type of lock, and empty lock is returned diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 7424a248491..fc8f599a06e 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -204,7 +204,7 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write else sendPartFromDisk(part, out, client_protocol_version, false, send_projections); - data.addLastSentPart(part->name); + data.addLastSentPart(part->info); } catch (const NetException &) { diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index dc24327712c..60c3999f87a 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -119,6 +119,7 @@ struct Settings; M(Bool, detach_not_byte_identical_parts, false, "Do not remove non byte-idential parts for ReplicatedMergeTree, instead detach them (maybe useful for further analysis).", 0) \ M(UInt64, max_replicated_fetches_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \ M(UInt64, max_replicated_sends_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \ + M(Milliseconds, wait_for_unique_parts_send_before_shutdown_ms, 0, "Before shutdown table will wait for required amount time for unique parts (exist only on current replica) to be fetched by other replicas (0 means disabled).", 0) \ \ /** Check delay of replicas settings. */ \ M(UInt64, min_relative_delay_to_measure, 120, "Calculate relative replica delay only if absolute delay is not less that this value.", 0) \ diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index d7166b4a3b9..0e381654db0 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -330,7 +330,7 @@ void ReplicatedMergeTreeRestartingThread::activateReplica() void ReplicatedMergeTreeRestartingThread::partialShutdown(bool part_of_full_shutdown) { setReadonly(part_of_full_shutdown); - storage.partialShutdown(); + storage.partialShutdown(part_of_full_shutdown); } diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index d021667f771..9c05afd5284 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -682,7 +682,7 @@ void StorageBuffer::startup() } -void StorageBuffer::flush() +void StorageBuffer::flushAndPrepareForShutdown() { if (!flush_handle) return; diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 8f089a4d580..db3cde93be5 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -92,7 +92,7 @@ public: void startup() override; /// Flush all buffers into the subordinate table and stop background thread. - void flush() override; + void flushAndPrepareForShutdown() override; bool optimize( const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index c46192ab43b..608f65cfeff 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1427,7 +1427,7 @@ ActionLock StorageDistributed::getActionLock(StorageActionBlockType type) return {}; } -void StorageDistributed::flush() +void StorageDistributed::flushAndPrepareForShutdown() { try { diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index f45286341cf..547f61a012b 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -135,7 +135,7 @@ public: void initializeFromDisk(); void shutdown() override; - void flush() override; + void flushAndPrepareForShutdown() override; void drop() override; bool storesDataOnDisk() const override { return data_volume != nullptr; } diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 14b7fc15af2..b243225adb3 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -139,7 +139,7 @@ public: void startup() override { getNested()->startup(); } void shutdown() override { getNested()->shutdown(); } - void flush() override { getNested()->flush(); } + void flushAndPrepareForShutdown() override { getNested()->flushAndPrepareForShutdown(); } ActionLock getActionLock(StorageActionBlockType action_type) override { return getNested()->getActionLock(action_type); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7f282b6c0e6..94727a5495c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -3933,6 +3934,7 @@ void StorageReplicatedMergeTree::addLastSentPart(const MergeTreePartInfo & info) { std::lock_guard lock(last_sent_parts_mutex); last_sent_parts.emplace_back(info); + static constexpr size_t LAST_SENT_PARS_WINDOW_SIZE = 1000; while (last_sent_parts.size() > LAST_SENT_PARS_WINDOW_SIZE) last_sent_parts.pop_front(); } @@ -3950,24 +3952,32 @@ void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(si auto zookeeper = getZooKeeper(); - auto unique_parts_set = findReplicaUniqueParts(replica_name, zookeeper_path, format_version, zookeeper); + auto unique_parts_set = findReplicaUniqueParts(replica_name, zookeeper_path, format_version, zookeeper, log); if (unique_parts_set.empty()) { LOG_INFO(log, "Will not wait for unique parts to be fetched because we don't have any unique parts"); return; } + else + { + LOG_INFO(log, "Will wait for {} unique parts to be fetched", unique_parts_set.size()); + } - auto wait_predicate = [&] () -> void + auto wait_predicate = [&] () -> bool { bool all_fetched = true; - for (const auto & part : unique_parts_set) + for (auto it = unique_parts_set.begin(); it != unique_parts_set.end();) { + const auto & part = *it; + bool found = false; - for (const auto & sent_part : last_sent_parts) + for (const auto & sent_part : last_sent_parts | std::views::reverse) { if (sent_part.contains(part)) { + LOG_TRACE(log, "Part {} was fetched by some replica", part.getPartNameForLogs()); found = true; + it = unique_parts_set.erase(it); break; } } @@ -3981,14 +3991,19 @@ void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(si }; std::unique_lock lock(last_sent_parts_mutex); - if (!last_sent_parts_cv.wait_for(last_sent_parts_cv, std::chrono::duration_cast(wait_ms), wait_predicate)) - LOG_WARNING(log, "Failed to wait for unqiue parts to be fetched in {} ms, {} parts can be left on this replica", wait_ms, unqiue_parts_set.size()); + if (!last_sent_parts_cv.wait_for(lock, std::chrono::milliseconds(wait_ms), wait_predicate)) + LOG_WARNING(log, "Failed to wait for unqiue parts to be fetched in {} ms, {} parts can be left on this replica", wait_ms, unique_parts_set.size()); + else + LOG_INFO(log, "Successfuly waited all the parts"); } -std::vector StorageReplicatedMergeTree::findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_) +std::vector StorageReplicatedMergeTree::findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_) { - if (zookeeper_->exists(fs::path(zookeeper_path_) / "replicas" / replica_name_ / "is_active")) + if (!zookeeper_->exists(fs::path(zookeeper_path_) / "replicas" / replica_name_ / "is_active")) + { + LOG_INFO(log_, "Our replica is not active, nobody will try to fetch anything"); return {}; + } Strings replicas = zookeeper_->getChildren(fs::path(zookeeper_path_) / "replicas"); Strings our_parts; @@ -3996,40 +4011,54 @@ std::vector StorageReplicatedMergeTree::findReplicaUniquePart for (const String & replica : replicas) { if (!zookeeper_->exists(fs::path(zookeeper_path_) / "replicas" / replica / "is_active")) + { + LOG_TRACE(log_, "Replica {} is not active, skipping", replica); continue; + } Strings parts = zookeeper_->getChildren(fs::path(zookeeper_path_) / "replicas" / replica / "parts"); if (replica == replica_name_) { + LOG_TRACE(log_, "Our replica parts collected {}", replica); our_parts = parts; } else { + LOG_TRACE(log_, "Fetching parts for replica {}", replica); data_parts_on_replicas.emplace_back(format_version_); for (const auto & part : parts) { - if (!data_parts_on_replicas.back().getContainingPart(part).empty()) + if (data_parts_on_replicas.back().getContainingPart(part).empty()) data_parts_on_replicas.back().add(part); } } } - NameSet our_unique_parts; + std::vector our_unique_parts; for (const auto & part : our_parts) { + LOG_TRACE(log_, "Looking for part {}", part); bool found = false; for (const auto & active_parts_set : data_parts_on_replicas) { if (!active_parts_set.getContainingPart(part).empty()) { + LOG_TRACE(log_, "Part {} found", part); found = true; break; } } + if (!found) - our_unique_parts.insert(MergeTreePartInfo::fromPartName(part, format_version)); + { + LOG_TRACE(log_, "Part not {} found", part); + our_unique_parts.emplace_back(MergeTreePartInfo::fromPartName(part, format_version_)); + } } + if (!our_parts.empty() && our_unique_parts.empty()) + LOG_TRACE(log_, "All parts found on replica"); + return our_unique_parts; } @@ -4799,39 +4828,9 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) } -void StorageReplicatedMergeTree::partialShutdown() +void StorageReplicatedMergeTree::flushAndPrepareForShutdown() { - ProfileEvents::increment(ProfileEvents::ReplicaPartialShutdown); - - partial_shutdown_called = true; - partial_shutdown_event.set(); - queue.notifySubscribersOnPartialShutdown(); - replica_is_active_node = nullptr; - - LOG_TRACE(log, "Waiting for threads to finish"); - merge_selecting_task->deactivate(); - queue_updating_task->deactivate(); - mutations_updating_task->deactivate(); - mutations_finalizing_task->deactivate(); - - cleanup_thread.stop(); - async_block_ids_cache.stop(); - part_check_thread.stop(); - - /// Stop queue processing - { - auto fetch_lock = fetcher.blocker.cancel(); - auto merge_lock = merger_mutator.merges_blocker.cancel(); - auto move_lock = parts_mover.moves_blocker.cancel(); - background_operations_assignee.finish(); - } - - LOG_TRACE(log, "Threads finished"); -} - -void StorageReplicatedMergeTree::shutdown() -{ - if (shutdown_called.exchange(true)) + if (shutdown_prepared_called.exchange(true)) return; session_expired_callback_handler.reset(); @@ -4860,6 +4859,58 @@ void StorageReplicatedMergeTree::shutdown() } background_moves_assignee.finish(); +} + +void StorageReplicatedMergeTree::partialShutdown(bool part_of_full_shutdown) +{ + ProfileEvents::increment(ProfileEvents::ReplicaPartialShutdown); + + partial_shutdown_called = true; + partial_shutdown_event.set(); + queue.notifySubscribersOnPartialShutdown(); + if (!part_of_full_shutdown) + { + LOG_DEBUG(log, "Reset active node, replica will be inactive"); + replica_is_active_node = nullptr; + } + else + LOG_DEBUG(log, "Will not reset active node, it will be reset completely during full shutdown"); + + LOG_TRACE(log, "Waiting for threads to finish"); + merge_selecting_task->deactivate(); + queue_updating_task->deactivate(); + mutations_updating_task->deactivate(); + mutations_finalizing_task->deactivate(); + + cleanup_thread.stop(); + async_block_ids_cache.stop(); + part_check_thread.stop(); + + /// Stop queue processing + { + auto fetch_lock = fetcher.blocker.cancel(); + auto merge_lock = merger_mutator.merges_blocker.cancel(); + auto move_lock = parts_mover.moves_blocker.cancel(); + background_operations_assignee.finish(); + } + + LOG_TRACE(log, "Threads finished"); +} + +void StorageReplicatedMergeTree::shutdown() +{ + if (shutdown_called.exchange(true)) + return; + + if (!shutdown_prepared_called.load()) + flushAndPrepareForShutdown(); + + auto settings_ptr = getSettings(); + LOG_DEBUG(log, "Data parts exchange still exists {}", data_parts_exchange_endpoint != nullptr); + waitForUniquePartsToBeFetchedByOtherReplicas(settings_ptr->wait_for_unique_parts_send_before_shutdown_ms.totalMilliseconds()); + + replica_is_active_node = nullptr; + auto data_parts_exchange_ptr = std::atomic_exchange(&data_parts_exchange_endpoint, InterserverIOEndpointPtr{}); if (data_parts_exchange_ptr) { diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 4661f0a56da..104062def4b 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -113,7 +113,10 @@ public: void startup() override; void shutdown() override; - void partialShutdown(); + + void flushAndPrepareForShutdown() override; + + void partialShutdown(bool part_of_full_shutdown); ~StorageReplicatedMergeTree() override; static String getDefaultZooKeeperPath(const Poco::Util::AbstractConfiguration & config); @@ -453,9 +456,9 @@ private: Poco::Event partial_shutdown_event {false}; /// Poco::Event::EVENT_MANUALRESET std::atomic shutdown_called {false}; + std::atomic shutdown_prepared_called {false}; - static constexpr size_t LAST_SENT_PARS_WINDOW_SIZE = 1000; - std::mutex last_sent_parts_mutex; + mutable std::mutex last_sent_parts_mutex; std::condition_variable last_sent_parts_cv; std::deque last_sent_parts; @@ -711,7 +714,7 @@ private: */ String findReplicaHavingCoveringPart(LogEntry & entry, bool active); String findReplicaHavingCoveringPart(const String & part_name, bool active, String & found_part_name); - static std::vector findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_); + static std::vector findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_); /** Download the specified part from the specified replica. * If `to_detached`, the part is placed in the `detached` directory. diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 26cbe1f0233..5df050d1d0d 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -79,11 +79,11 @@ public: nested->shutdown(); } - void flush() override + void flushAndPrepareForShutdown() override { std::lock_guard lock{nested_mutex}; if (nested) - nested->flush(); + nested->flushAndPrepareForShutdown(); } void drop() override diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/__init__.py b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/config/merge_tree_conf.xml b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/config/merge_tree_conf.xml new file mode 100644 index 00000000000..8ff3bdf9a2f --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/config/merge_tree_conf.xml @@ -0,0 +1,5 @@ + + + 30000 + + diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py new file mode 100644 index 00000000000..75f0921646e --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager +from multiprocessing.dummy import Pool +import time + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + "node1", main_configs=["config/merge_tree_conf.xml"], with_zookeeper=True, stay_alive=True +) + +node2 = cluster.add_instance( + "node2", main_configs=["config/merge_tree_conf.xml"], with_zookeeper=True, stay_alive=True +) + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + + yield cluster + finally: + cluster.shutdown() + + +def test_shutdown_and_wait(start_cluster): + + for i, node in enumerate([node1, node2]): + node.query(f"CREATE TABLE test_table (value UInt64) ENGINE=ReplicatedMergeTree('/test/table', 'r{i}') ORDER BY tuple()") + + node1.query("INSERT INTO test_table VALUES (0)") + node2.query("SYSTEM SYNC REPLICA test_table") + + assert node1.query("SELECT * FROM test_table") == "0\n" + assert node2.query("SELECT * FROM test_table") == "0\n" + + def soft_shutdown(node): + node.stop_clickhouse(kill=False, stop_wait_sec=60) + + p = Pool(50) + pm = PartitionManager() + + pm.partition_instances(node1, node2) + + def insert(value): + node1.query(f"INSERT INTO test_table VALUES ({value})") + + p.map(insert, range(1, 50)) + + # Start shutdown async + waiter = p.apply_async(soft_shutdown, (node1,)) + # to be sure that shutdown started + time.sleep(5) + + # node 2 partitioned and don't see any data + assert node2.query("SELECT * FROM test_table") == "0\n" + + # Restore network + pm.heal_all() + # wait for shutdown to finish + waiter.get() + + node2.query("SYSTEM SYNC REPLICA test_table", timeout=5) + + # check second replica has all data + assert node2.query("SELECT sum(value) FROM test_table") == "1225\n" + # and nothing in queue + assert node2.query("SELECT count() FROM system.replication_queue") == "0\n" + + # It can happend that the second replica is superfast + assert node1.contains_in_log("Successfuly waited all the parts") or node1.contains_in_log("All parts found on replica") From 085576efc9c256d996dacf0f43185146c46eb194 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 5 Jul 2023 16:22:58 +0000 Subject: [PATCH 1238/1997] Automatic style fix --- .../test.py | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py index 75f0921646e..e3a2e7a0271 100644 --- a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py @@ -9,13 +9,20 @@ import time cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["config/merge_tree_conf.xml"], with_zookeeper=True, stay_alive=True + "node1", + main_configs=["config/merge_tree_conf.xml"], + with_zookeeper=True, + stay_alive=True, ) node2 = cluster.add_instance( - "node2", main_configs=["config/merge_tree_conf.xml"], with_zookeeper=True, stay_alive=True + "node2", + main_configs=["config/merge_tree_conf.xml"], + with_zookeeper=True, + stay_alive=True, ) + @pytest.fixture(scope="module") def start_cluster(): try: @@ -27,9 +34,10 @@ def start_cluster(): def test_shutdown_and_wait(start_cluster): - for i, node in enumerate([node1, node2]): - node.query(f"CREATE TABLE test_table (value UInt64) ENGINE=ReplicatedMergeTree('/test/table', 'r{i}') ORDER BY tuple()") + node.query( + f"CREATE TABLE test_table (value UInt64) ENGINE=ReplicatedMergeTree('/test/table', 'r{i}') ORDER BY tuple()" + ) node1.query("INSERT INTO test_table VALUES (0)") node2.query("SYSTEM SYNC REPLICA test_table") @@ -71,4 +79,6 @@ def test_shutdown_and_wait(start_cluster): assert node2.query("SELECT count() FROM system.replication_queue") == "0\n" # It can happend that the second replica is superfast - assert node1.contains_in_log("Successfuly waited all the parts") or node1.contains_in_log("All parts found on replica") + assert node1.contains_in_log( + "Successfuly waited all the parts" + ) or node1.contains_in_log("All parts found on replica") From 88d3e1723a8a53270c0da62e581217442383cc5c Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 5 Jul 2023 18:31:47 +0200 Subject: [PATCH 1239/1997] Fixes and comments --- programs/server/Server.cpp | 16 ++++++++--- src/Storages/StorageReplicatedMergeTree.cpp | 5 ++++ src/Storages/StorageReplicatedMergeTree.h | 30 ++++++++++++++++++--- 3 files changed, 44 insertions(+), 7 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 0a311fa4737..58cf3e5d210 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1390,6 +1390,10 @@ try { std::lock_guard lock(servers_lock); + /// We should start interserver communications before (and more imporant shutdown after) tables. + /// Because server can wait for a long-running queries (for example in tcp_handler) after interserver handler was already shut down. + /// In this case we will have replicated tables which are unable to send any parts to other replicas, but still can + /// communicate with zookeeper, execute merges, etc. createInterserverServers(config(), interserver_listen_hosts, listen_try, server_pool, async_metrics, servers_to_start_before_tables, /* start_servers= */ false); for (auto & server : servers_to_start_before_tables) @@ -1516,10 +1520,13 @@ try { LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish."); size_t current_connections = 0; - for (auto & server : servers_to_start_before_tables) { - server.stop(); - current_connections += server.currentConnections(); + std::lock_guard lock(servers_lock); + for (auto & server : servers_to_start_before_tables) + { + server.stop(); + current_connections += server.currentConnections(); + } } if (current_connections) @@ -2345,9 +2352,10 @@ void Server::updateServers( } createServers(config, listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true); - createInterserverServers(config, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true); + createInterserverServers(config, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers_to_start_before_tables, /* start_servers= */ true); std::erase_if(servers, std::bind_front(check_server, "")); + std::erase_if(servers_to_start_before_tables, std::bind_front(check_server, "")); } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 94727a5495c..2f165a056a3 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4870,6 +4870,11 @@ void StorageReplicatedMergeTree::partialShutdown(bool part_of_full_shutdown) queue.notifySubscribersOnPartialShutdown(); if (!part_of_full_shutdown) { + /// If we are going to completely shutdown table we allow other + /// replicas to fetch parts which are unique for our replica. + /// + /// Replicas try to fetch part only in case the source replica is active, + /// so don't reset handler here. LOG_DEBUG(log, "Reset active node, replica will be inactive"); replica_is_active_node = nullptr; } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 104062def4b..baa5af824b4 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -112,11 +112,35 @@ public: bool need_check_structure); void startup() override; + + /// To many shutdown methods.... + /// + /// Partial shutdown called if we loose connection to zookeeper. + /// Table can also recover after partial shutdown and continue + /// to work. This method can be called regularly. + void partialShutdown(bool part_of_full_shutdown); + + /// These two methods are called during final table shutdown (DROP/DETACH/overall server shutdown). + /// The shutdown process is splitted into two methods to make it more soft and fast. In database shutdown() + /// looks like: + /// for (table : tables) + /// table->flushAndPrepareForShutdown() + /// + /// for (table : tables) + /// table->shutdown() + /// + /// So we stop producting all the parts first for all tables (fast operation). And after we can wait in shutdown() + /// for other replicas to download parts. + /// + /// In flushAndPrepareForShutdown we cancel all part-producing operations: + /// merges, fetches, moves and so on. If it wasn't called before shutdown() -- shutdown() will + /// call it (defensive programming). + void flushAndPrepareForShutdown() override; + /// In shutdown we completly terminate table -- remove + /// is_active node and interserver handler. Also optionally + /// wait until other replicas will download some parts from our replica. void shutdown() override; - void flushAndPrepareForShutdown() override; - - void partialShutdown(bool part_of_full_shutdown); ~StorageReplicatedMergeTree() override; static String getDefaultZooKeeperPath(const Poco::Util::AbstractConfiguration & config); From 2fa45117edfa4b541ad96f056a58e040c0569e4f Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 5 Jul 2023 18:38:04 +0200 Subject: [PATCH 1240/1997] Beter --- src/Storages/StorageReplicatedMergeTree.cpp | 5 ++++- src/Storages/StorageReplicatedMergeTree.h | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 2f165a056a3..e5abf63a72d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3944,13 +3944,16 @@ void StorageReplicatedMergeTree::addLastSentPart(const MergeTreePartInfo & info) void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(size_t wait_ms) { + if (!shutdown_called.load()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Called waitForUniquePartsToBeFetchedByOtherReplicas before shutdown, it's a bug"); + if (wait_ms == 0) { LOG_INFO(log, "Will not wait for unique parts to be fetched by other replicas because wait time is zero"); return; } - auto zookeeper = getZooKeeper(); + auto zookeeper = getZooKeeperIfTableShutDown(); auto unique_parts_set = findReplicaUniqueParts(replica_name, zookeeper_path, format_version, zookeeper, log); if (unique_parts_set.empty()) diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index baa5af824b4..a1a0717ca64 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -374,6 +374,8 @@ public: return last_sent_parts; } + /// Wait required amount of milliseconds to give other replicas a chance to + /// download unique parts from our replica void waitForUniquePartsToBeFetchedByOtherReplicas(size_t wait_ms); private: From f2d106ffb783b9410cecdfe0e332d660b9fa73fa Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 5 Jul 2023 18:54:33 +0200 Subject: [PATCH 1241/1997] Fix typos --- src/Storages/StorageReplicatedMergeTree.cpp | 4 ++-- src/Storages/StorageReplicatedMergeTree.h | 6 +++--- .../test_replicated_merge_tree_wait_on_shutdown/test.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e5abf63a72d..114465df496 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3995,9 +3995,9 @@ void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(si std::unique_lock lock(last_sent_parts_mutex); if (!last_sent_parts_cv.wait_for(lock, std::chrono::milliseconds(wait_ms), wait_predicate)) - LOG_WARNING(log, "Failed to wait for unqiue parts to be fetched in {} ms, {} parts can be left on this replica", wait_ms, unique_parts_set.size()); + LOG_WARNING(log, "Failed to wait for unique parts to be fetched in {} ms, {} parts can be left on this replica", wait_ms, unique_parts_set.size()); else - LOG_INFO(log, "Successfuly waited all the parts"); + LOG_INFO(log, "Successfully waited all the parts"); } std::vector StorageReplicatedMergeTree::findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_) diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index a1a0717ca64..656e8df6ccb 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -121,7 +121,7 @@ public: void partialShutdown(bool part_of_full_shutdown); /// These two methods are called during final table shutdown (DROP/DETACH/overall server shutdown). - /// The shutdown process is splitted into two methods to make it more soft and fast. In database shutdown() + /// The shutdown process is split into two methods to make it more soft and fast. In database shutdown() /// looks like: /// for (table : tables) /// table->flushAndPrepareForShutdown() @@ -129,14 +129,14 @@ public: /// for (table : tables) /// table->shutdown() /// - /// So we stop producting all the parts first for all tables (fast operation). And after we can wait in shutdown() + /// So we stop producing all the parts first for all tables (fast operation). And after we can wait in shutdown() /// for other replicas to download parts. /// /// In flushAndPrepareForShutdown we cancel all part-producing operations: /// merges, fetches, moves and so on. If it wasn't called before shutdown() -- shutdown() will /// call it (defensive programming). void flushAndPrepareForShutdown() override; - /// In shutdown we completly terminate table -- remove + /// In shutdown we completely terminate table -- remove /// is_active node and interserver handler. Also optionally /// wait until other replicas will download some parts from our replica. void shutdown() override; diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py index e3a2e7a0271..a2a4ec92cf7 100644 --- a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py @@ -80,5 +80,5 @@ def test_shutdown_and_wait(start_cluster): # It can happend that the second replica is superfast assert node1.contains_in_log( - "Successfuly waited all the parts" + "Successfully waited all the parts" ) or node1.contains_in_log("All parts found on replica") From bf190381f5b6fa068948330f54ae9ee583c1ea80 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 5 Jul 2023 17:03:18 +0000 Subject: [PATCH 1242/1997] addJoinedBlock -> addBlockToJoin --- src/Interpreters/ConcurrentHashJoin.cpp | 4 ++-- src/Interpreters/ConcurrentHashJoin.h | 6 +++--- src/Interpreters/DirectJoin.cpp | 2 +- src/Interpreters/DirectJoin.h | 4 ++-- src/Interpreters/FullSortingMergeJoin.h | 4 ++-- src/Interpreters/GraceHashJoin.cpp | 12 ++++++------ src/Interpreters/GraceHashJoin.h | 8 ++++---- src/Interpreters/HashJoin.cpp | 8 ++++---- src/Interpreters/HashJoin.h | 6 +++--- src/Interpreters/IJoin.h | 6 +++--- src/Interpreters/JoinSwitcher.cpp | 8 ++++---- src/Interpreters/JoinSwitcher.h | 2 +- src/Interpreters/MergeJoin.cpp | 2 +- src/Interpreters/MergeJoin.h | 2 +- src/Processors/Transforms/JoiningTransform.cpp | 2 +- src/Storages/StorageJoin.cpp | 4 ++-- 16 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index fc24f0ae029..1a8e0ad96fa 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -49,7 +49,7 @@ ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptrgetOnlyClause().key_names_right, right_block); @@ -77,7 +77,7 @@ bool ConcurrentHashJoin::addJoinedBlock(const Block & right_block, bool check_li if (!lock.owns_lock()) continue; - bool limit_exceeded = !hash_join->data->addJoinedBlock(dispatched_block, check_limits); + bool limit_exceeded = !hash_join->data->addBlockToJoin(dispatched_block, check_limits); dispatched_block = {}; blocks_left--; diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index 5e53f9845aa..1283879971d 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -16,13 +16,13 @@ namespace DB { /** - * Can run addJoinedBlock() parallelly to speedup the join process. On test, it almose linear speedup by + * Can run addBlockToJoin() parallelly to speedup the join process. On test, it almose linear speedup by * the degree of parallelism. * * The default HashJoin is not thread safe for inserting right table's rows and run it in a single thread. When * the right table is large, the join process is too slow. * - * We create multiple HashJoin instances here. In addJoinedBlock(), one input block is split into multiple blocks + * We create multiple HashJoin instances here. In addBlockToJoin(), one input block is split into multiple blocks * corresponding to the HashJoin instances by hashing every row on the join keys. And make a guarantee that every HashJoin * instance is written by only one thread. * @@ -37,7 +37,7 @@ public: ~ConcurrentHashJoin() override = default; const TableJoin & getTableJoin() const override { return *table_join; } - bool addJoinedBlock(const Block & block, bool check_limits) override; + bool addBlockToJoin(const Block & block, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override; void joinBlock(Block & block, std::shared_ptr & not_processed) override; void setTotals(const Block & block) override; diff --git a/src/Interpreters/DirectJoin.cpp b/src/Interpreters/DirectJoin.cpp index cfefd7c5a91..431f216436d 100644 --- a/src/Interpreters/DirectJoin.cpp +++ b/src/Interpreters/DirectJoin.cpp @@ -103,7 +103,7 @@ DirectKeyValueJoin::DirectKeyValueJoin( right_sample_block_with_storage_column_names = right_sample_block_with_storage_column_names_; } -bool DirectKeyValueJoin::addJoinedBlock(const Block &, bool) +bool DirectKeyValueJoin::addBlockToJoin(const Block &, bool) { throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unreachable code reached"); } diff --git a/src/Interpreters/DirectJoin.h b/src/Interpreters/DirectJoin.h index 644b66a9d99..e55ac278705 100644 --- a/src/Interpreters/DirectJoin.h +++ b/src/Interpreters/DirectJoin.h @@ -32,10 +32,10 @@ public: virtual const TableJoin & getTableJoin() const override { return *table_join; } - virtual bool addJoinedBlock(const Block &, bool) override; + virtual bool addBlockToJoin(const Block &, bool) override; virtual void checkTypesOfKeys(const Block &) const override; - /// Join the block with data from left hand of JOIN to the right hand data (that was previously built by calls to addJoinedBlock). + /// Join the block with data from left hand of JOIN to the right hand data (that was previously built by calls to addBlockToJoin). /// Could be called from different threads in parallel. virtual void joinBlock(Block & block, std::shared_ptr &) override; diff --git a/src/Interpreters/FullSortingMergeJoin.h b/src/Interpreters/FullSortingMergeJoin.h index 7318d1d24a1..a6b53a51c04 100644 --- a/src/Interpreters/FullSortingMergeJoin.h +++ b/src/Interpreters/FullSortingMergeJoin.h @@ -30,9 +30,9 @@ public: const TableJoin & getTableJoin() const override { return *table_join; } - bool addJoinedBlock(const Block & /* block */, bool /* check_limits */) override + bool addBlockToJoin(const Block & /* block */, bool /* check_limits */) override { - throw Exception(ErrorCodes::LOGICAL_ERROR, "FullSortingMergeJoin::addJoinedBlock should not be called"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "FullSortingMergeJoin::addBlockToJoin should not be called"); } static bool isSupported(const std::shared_ptr & table_join) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 4218a8ea4e1..f455622c4c8 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -310,13 +310,13 @@ bool GraceHashJoin::isSupported(const std::shared_ptr & table_join) GraceHashJoin::~GraceHashJoin() = default; -bool GraceHashJoin::addJoinedBlock(const Block & block, bool /*check_limits*/) +bool GraceHashJoin::addBlockToJoin(const Block & block, bool /*check_limits*/) { if (current_bucket == nullptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "GraceHashJoin is not initialized"); Block materialized = materializeBlock(block); - addJoinedBlockImpl(std::move(materialized)); + addBlockToJoinImpl(std::move(materialized)); return true; } @@ -596,7 +596,7 @@ IBlocksStreamPtr GraceHashJoin::getDelayedBlocks() while (Block block = right_reader.read()) { num_rows += block.rows(); - addJoinedBlockImpl(std::move(block)); + addBlockToJoinImpl(std::move(block)); } LOG_TRACE(log, "Loaded bucket {} with {}(/{}) rows", @@ -621,7 +621,7 @@ Block GraceHashJoin::prepareRightBlock(const Block & block) return HashJoin::prepareRightBlock(block, hash_join_sample_block); } -void GraceHashJoin::addJoinedBlockImpl(Block block) +void GraceHashJoin::addBlockToJoinImpl(Block block) { block = prepareRightBlock(block); Buckets buckets_snapshot = getCurrentBuckets(); @@ -646,7 +646,7 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) if (!hash_join) hash_join = makeInMemoryJoin(); - hash_join->addJoinedBlock(current_block, /* check_limits = */ false); + hash_join->addBlockToJoin(current_block, /* check_limits = */ false); if (!hasMemoryOverflow(hash_join)) return; @@ -677,7 +677,7 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) hash_join = makeInMemoryJoin(); if (current_block.rows() > 0) - hash_join->addJoinedBlock(current_block, /* check_limits = */ false); + hash_join->addBlockToJoin(current_block, /* check_limits = */ false); } } diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h index b8d83f4cad0..8224f1f1a4a 100644 --- a/src/Interpreters/GraceHashJoin.h +++ b/src/Interpreters/GraceHashJoin.h @@ -23,11 +23,11 @@ class HashJoin; * * The joining algorithm consists of three stages: * - * 1) During the first stage we accumulate blocks of the right table via @addJoinedBlock. + * 1) During the first stage we accumulate blocks of the right table via @addBlockToJoin. * Each input block is split into multiple buckets based on the hash of the row join keys. * The first bucket is added to the in-memory HashJoin, and the remaining buckets are written to disk for further processing. * When the size of HashJoin exceeds the limits, we double the number of buckets. - * There can be multiple threads calling addJoinedBlock, just like @ConcurrentHashJoin. + * There can be multiple threads calling addBlockToJoin, just like @ConcurrentHashJoin. * * 2) At the second stage we process left table blocks via @joinBlock. * Again, each input block is split into multiple buckets by hash. @@ -65,7 +65,7 @@ public: void initialize(const Block & sample_block) override; - bool addJoinedBlock(const Block & block, bool check_limits) override; + bool addBlockToJoin(const Block & block, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override; void joinBlock(Block & block, std::shared_ptr & not_processed) override; @@ -94,7 +94,7 @@ private: InMemoryJoinPtr makeInMemoryJoin(); /// Add right table block to the @join. Calls @rehash on overflow. - void addJoinedBlockImpl(Block block); + void addBlockToJoinImpl(Block block); /// Check that join satisfies limits on rows/bytes in table_join. bool hasMemoryOverflow(size_t total_rows, size_t total_bytes) const; diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 6fe2b8464f5..548039f257a 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -79,8 +79,8 @@ namespace JoinStuff { assert(flags[nullptr].size() <= size); need_flags = true; - // For one disjunct clause case, we don't need to reinit each time we call addJoinedBlock. - // and there is no value inserted in this JoinUsedFlags before addJoinedBlock finish. + // For one disjunct clause case, we don't need to reinit each time we call addBlockToJoin. + // and there is no value inserted in this JoinUsedFlags before addBlockToJoin finish. // So we reinit only when the hash table is rehashed to a larger size. if (flags.empty() || flags[nullptr].size() < size) [[unlikely]] { @@ -729,7 +729,7 @@ Block HashJoin::prepareRightBlock(const Block & block) const return prepareRightBlock(block, savedBlockSample()); } -bool HashJoin::addJoinedBlock(const Block & source_block_, bool check_limits) +bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) { if (!data) throw Exception(ErrorCodes::LOGICAL_ERROR, "Join data was released"); @@ -781,7 +781,7 @@ bool HashJoin::addJoinedBlock(const Block & source_block_, bool check_limits) size_t total_bytes = 0; { if (storage_join_lock) - throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "addJoinedBlock called when HashJoin locked to prevent updates"); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "addBlockToJoin called when HashJoin locked to prevent updates"); data->blocks_allocated_size += block_to_save.allocatedBytes(); data->blocks.emplace_back(std::move(block_to_save)); diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 50eda4482bd..f30bbc3a46c 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -155,11 +155,11 @@ public: /** Add block of data from right hand of JOIN to the map. * Returns false, if some limit was exceeded and you should not insert more data. */ - bool addJoinedBlock(const Block & source_block_, bool check_limits) override; + bool addBlockToJoin(const Block & source_block_, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override; - /** Join data from the map (that was previously built by calls to addJoinedBlock) to the block with data from "left" table. + /** Join data from the map (that was previously built by calls to addBlockToJoin) to the block with data from "left" table. * Could be called from different threads in parallel. */ void joinBlock(Block & block, ExtraBlockPtr & not_processed) override; @@ -406,7 +406,7 @@ private: Poco::Logger * log; /// Should be set via setLock to protect hash table from modification from StorageJoin - /// If set HashJoin instance is not available for modification (addJoinedBlock) + /// If set HashJoin instance is not available for modification (addBlockToJoin) TableLockHolder storage_join_lock = nullptr; void dataMapInit(MapsVariant &); diff --git a/src/Interpreters/IJoin.h b/src/Interpreters/IJoin.h index 83067b0eab7..97b119bd795 100644 --- a/src/Interpreters/IJoin.h +++ b/src/Interpreters/IJoin.h @@ -52,7 +52,7 @@ public: /// Add block of data from right hand of JOIN. /// @returns false, if some limit was exceeded and you should not insert more data. - virtual bool addJoinedBlock(const Block & block, bool check_limits = true) = 0; /// NOLINT + virtual bool addBlockToJoin(const Block & block, bool check_limits = true) = 0; /// NOLINT /* Some initialization may be required before joinBlock() call. * It's better to done in in constructor, but left block exact structure is not known at that moment. @@ -62,7 +62,7 @@ public: virtual void checkTypesOfKeys(const Block & block) const = 0; - /// Join the block with data from left hand of JOIN to the right hand data (that was previously built by calls to addJoinedBlock). + /// Join the block with data from left hand of JOIN to the right hand data (that was previously built by calls to addBlockToJoin). /// Could be called from different threads in parallel. virtual void joinBlock(Block & block, std::shared_ptr & not_processed) = 0; @@ -79,7 +79,7 @@ public: /// Returns true if no data to join with. virtual bool alwaysReturnsEmptySet() const = 0; - /// StorageJoin/Dictionary is already filled. No need to call addJoinedBlock. + /// StorageJoin/Dictionary is already filled. No need to call addBlockToJoin. /// Different query plan is used for such joins. virtual bool isFilled() const { return pipelineType() == JoinPipelineType::FilledRight; } virtual JoinPipelineType pipelineType() const { return JoinPipelineType::FillRightFirst; } diff --git a/src/Interpreters/JoinSwitcher.cpp b/src/Interpreters/JoinSwitcher.cpp index 15702784d74..5ea347549c1 100644 --- a/src/Interpreters/JoinSwitcher.cpp +++ b/src/Interpreters/JoinSwitcher.cpp @@ -19,16 +19,16 @@ JoinSwitcher::JoinSwitcher(std::shared_ptr table_join_, const Block & limits.max_bytes = table_join->defaultMaxBytes(); } -bool JoinSwitcher::addJoinedBlock(const Block & block, bool) +bool JoinSwitcher::addBlockToJoin(const Block & block, bool) { std::lock_guard lock(switch_mutex); if (switched) - return join->addJoinedBlock(block); + return join->addBlockToJoin(block); /// HashJoin with external limits check - join->addJoinedBlock(block, false); + join->addBlockToJoin(block, false); size_t rows = join->getTotalRowCount(); size_t bytes = join->getTotalByteCount(); @@ -48,7 +48,7 @@ bool JoinSwitcher::switchJoin() bool success = true; for (const Block & saved_block : right_blocks) - success = success && join->addJoinedBlock(saved_block); + success = success && join->addBlockToJoin(saved_block); switched = true; return success; diff --git a/src/Interpreters/JoinSwitcher.h b/src/Interpreters/JoinSwitcher.h index eec4787037d..fb5066b2d04 100644 --- a/src/Interpreters/JoinSwitcher.h +++ b/src/Interpreters/JoinSwitcher.h @@ -23,7 +23,7 @@ public: /// Add block of data from right hand of JOIN into current join object. /// If join-in-memory memory limit exceeded switches to join-on-disk and continue with it. /// @returns false, if join-on-disk disk limit exceeded - bool addJoinedBlock(const Block & block, bool check_limits) override; + bool addBlockToJoin(const Block & block, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override { diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index d31510c2fb5..ceef1371f16 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -669,7 +669,7 @@ Block MergeJoin::modifyRightBlock(const Block & src_block) const return block; } -bool MergeJoin::addJoinedBlock(const Block & src_block, bool) +bool MergeJoin::addBlockToJoin(const Block & src_block, bool) { Block block = modifyRightBlock(src_block); diff --git a/src/Interpreters/MergeJoin.h b/src/Interpreters/MergeJoin.h index 8b5d884a0e6..03a661c5b8a 100644 --- a/src/Interpreters/MergeJoin.h +++ b/src/Interpreters/MergeJoin.h @@ -23,7 +23,7 @@ public: MergeJoin(std::shared_ptr table_join_, const Block & right_sample_block); const TableJoin & getTableJoin() const override { return *table_join; } - bool addJoinedBlock(const Block & block, bool check_limits) override; + bool addBlockToJoin(const Block & block, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override; void joinBlock(Block &, ExtraBlockPtr & not_processed) override; diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index bba8ec6fa16..49b90d04b81 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -305,7 +305,7 @@ void FillingRightJoinSideTransform::work() if (for_totals) join->setTotals(block); else - stop_reading = !join->addJoinedBlock(block); + stop_reading = !join->addBlockToJoin(block); set_totals = for_totals; } diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index a238e9ef26c..640706aae17 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -146,7 +146,7 @@ void StorageJoin::mutate(const MutationCommands & commands, ContextPtr context) Block block; while (executor.pull(block)) { - new_data->addJoinedBlock(block, true); + new_data->addBlockToJoin(block, true); if (persistent) backup_stream.write(block); } @@ -257,7 +257,7 @@ void StorageJoin::insertBlock(const Block & block, ContextPtr context) if (!holder) throw Exception(ErrorCodes::DEADLOCK_AVOIDED, "StorageJoin: cannot insert data because current query tries to read from this storage"); - join->addJoinedBlock(block_to_insert, true); + join->addBlockToJoin(block_to_insert, true); } size_t StorageJoin::getSize(ContextPtr context) const From f7640ff5733822a9c6f4e119f6ff2ed7027a885d Mon Sep 17 00:00:00 2001 From: Feng Kaiyu Date: Thu, 6 Jul 2023 01:27:20 +0800 Subject: [PATCH 1243/1997] fix: correct exception message on policies comparison --- src/Disks/StoragePolicy.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index f4be8b8fe86..71922e297df 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -302,7 +302,11 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & volume : getVolumes()) { if (!new_volume_names.contains(volume->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of old one", backQuote(name)); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "New storage policy {} shall contain volumes of old one ({})", + backQuote(new_storage_policy->getName()), + backQuote(name)); std::unordered_set new_disk_names; for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->getDisks()) @@ -310,7 +314,11 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & disk : volume->getDisks()) if (!new_disk_names.contains(disk->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of old one", backQuote(name)); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "New storage policy {} shall contain disks of old one ({})", + backQuote(new_storage_policy->getName()), + backQuote(name)); } } From b60a1c53d638b5c10727d3e5c0e6d5b5b8d5725a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 5 Jul 2023 17:43:09 +0000 Subject: [PATCH 1244/1997] Fix oldest part fetching --- tests/integration/test_multiple_disks/test.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index c0fbe39196d..fa79a9baa90 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -717,9 +717,10 @@ def test_background_move(start_cluster, name, engine): node1.query(f"SYSTEM STOP MERGES {name}") + first_part = None for i in range(5): data = [] # 5MB in total - for i in range(5): + for _ in range(5): data.append(get_random_string(1024 * 1024)) # 1MB row # small jbod size is 40MB, so lets insert 5MB batch 5 times node1.query_with_retry( @@ -728,7 +729,11 @@ def test_background_move(start_cluster, name, engine): ) ) - first_part = get_oldest_part(node1, name) + # we are doing moves in parallel so we need to fetch the name of first part before we add new parts + if i == 0: + first_part = get_oldest_part(node1, name) + + assert first_part is not None used_disks = get_used_disks_for_table(node1, name) From a94498cf1fc0bb097927d0b4a8fca2caa545b958 Mon Sep 17 00:00:00 2001 From: Vitaliy Pashkov <60093578+pashkov-v@users.noreply.github.com> Date: Wed, 5 Jul 2023 20:59:28 +0300 Subject: [PATCH 1245/1997] Minor fixes to odbc.md --- docs/en/engines/table-engines/integrations/odbc.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/odbc.md b/docs/en/engines/table-engines/integrations/odbc.md index e29e56c10b2..71085feb626 100644 --- a/docs/en/engines/table-engines/integrations/odbc.md +++ b/docs/en/engines/table-engines/integrations/odbc.md @@ -54,7 +54,7 @@ $ sudo mysql ``` sql mysql> CREATE USER 'clickhouse'@'localhost' IDENTIFIED BY 'clickhouse'; -mysql> GRANT ALL PRIVILEGES ON *.* TO 'clickhouse'@'clickhouse' WITH GRANT OPTION; +mysql> GRANT ALL PRIVILEGES ON *.* TO 'clickhouse'@'localhost' WITH GRANT OPTION; ``` Then configure the connection in `/etc/odbc.ini`. @@ -66,7 +66,7 @@ DRIVER = /usr/local/lib/libmyodbc5w.so SERVER = 127.0.0.1 PORT = 3306 DATABASE = test -USERNAME = clickhouse +USER = clickhouse PASSWORD = clickhouse ``` @@ -83,6 +83,9 @@ $ isql -v mysqlconn Table in MySQL: ``` text +mysql> CREATE DATABASE test; +Query OK, 1 row affected (0,01 sec) + mysql> CREATE TABLE `test`.`test` ( -> `int_id` INT NOT NULL AUTO_INCREMENT, -> `int_nullable` INT NULL DEFAULT NULL, @@ -91,10 +94,10 @@ mysql> CREATE TABLE `test`.`test` ( -> PRIMARY KEY (`int_id`)); Query OK, 0 rows affected (0,09 sec) -mysql> insert into test (`int_id`, `float`) VALUES (1,2); +mysql> insert into test.test (`int_id`, `float`) VALUES (1,2); Query OK, 1 row affected (0,00 sec) -mysql> select * from test; +mysql> select * from test.test; +------+----------+-----+----------+ | int_id | int_nullable | float | float_nullable | +------+----------+-----+----------+ From ce8b0cae822f7e049eba7e8967122890510a82c5 Mon Sep 17 00:00:00 2001 From: lcjh <120989324@qq.com> Date: Thu, 6 Jul 2023 02:14:48 +0800 Subject: [PATCH 1246/1997] remove duplicate condition --- src/Functions/FunctionUnixTimestamp64.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/FunctionUnixTimestamp64.h b/src/Functions/FunctionUnixTimestamp64.h index 58a23f7266e..a2065465501 100644 --- a/src/Functions/FunctionUnixTimestamp64.h +++ b/src/Functions/FunctionUnixTimestamp64.h @@ -155,7 +155,6 @@ public: if (!((executeType(result_column, arguments, input_rows_count)) || (executeType(result_column, arguments, input_rows_count)) || (executeType(result_column, arguments, input_rows_count)) - || (executeType(result_column, arguments, input_rows_count)) || (executeType(result_column, arguments, input_rows_count)) || (executeType(result_column, arguments, input_rows_count)) || (executeType(result_column, arguments, input_rows_count)) From 44791af7102079b8a3db6a5a2fbe5fbaa8eae3bf Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 5 Jul 2023 22:54:22 +0200 Subject: [PATCH 1247/1997] stop merges properly for replicated tables --- tests/integration/test_multiple_disks/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index fa79a9baa90..4a934447345 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -711,7 +711,7 @@ def test_background_move(start_cluster, name, engine): s1 String ) ENGINE = {engine} ORDER BY tuple() - SETTINGS storage_policy='moving_jbod_with_external' + SETTINGS storage_policy='moving_jbod_with_external, max_replicated_merges_in_queue=0' """ ) @@ -784,7 +784,7 @@ def test_start_stop_moves(start_cluster, name, engine): s1 String ) ENGINE = {engine} ORDER BY tuple() - SETTINGS storage_policy='moving_jbod_with_external' + SETTINGS storage_policy='moving_jbod_with_external', max_replicated_merges_in_queue=0 """ ) From 2ab6c599a234d31c2f59e1aaa35298c1274390b1 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 5 Jul 2023 23:31:44 +0200 Subject: [PATCH 1248/1997] Fix tests --- .../0_stateless/02240_filesystem_query_cache.sql | 15 +++++++++++++++ .../0_stateless/02286_drop_filesystem_cache.sh | 5 ----- .../0_stateless/02344_describe_cache.reference | 2 +- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.sql b/tests/queries/0_stateless/02240_filesystem_query_cache.sql index 760ec1baa87..02cf54b0caa 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.sql +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.sql @@ -9,6 +9,21 @@ SET filesystem_cache_max_download_size=128; DROP TABLE IF EXISTS test; SYSTEM DROP FILESYSTEM CACHE; +CREATE TABLE test (key UInt32, value String) +Engine=MergeTree() +ORDER BY key +SETTINGS min_bytes_for_wide_part = 10485760, + compress_marks=false, + compress_primary_key=false, + disk = disk( + type = cache, + max_size = '128Mi', + path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + cache_on_write_operations= 1, + enable_filesystem_query_cache_limit = 1, + do_not_evict_index_and_mark_files = 0, + delayed_cleanup_interval_ms = 100, + disk = 's3_disk'); INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh index 091bca10bcf..1e1841862e9 100755 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh @@ -67,9 +67,4 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do ON data_paths.cache_path = caches.cache_path" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_022862" - - $CLICKHOUSE_CLIENT -n --query "CREATE TABLE test_022862 (key UInt32, value String) - Engine=MergeTree() - ORDER BY key - SETTINGS storage_policy='${STORAGE_POLICY}_2', min_bytes_for_wide_part = 10485760" done diff --git a/tests/queries/0_stateless/02344_describe_cache.reference b/tests/queries/0_stateless/02344_describe_cache.reference index 5c3d47d87f6..da84cdabf79 100644 --- a/tests/queries/0_stateless/02344_describe_cache.reference +++ b/tests/queries/0_stateless/02344_describe_cache.reference @@ -1 +1 @@ -134217728 10000000 8388608 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ 100 2 0 +134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ 100 2 0 From 5a3aadacde7e82d47ff550601191186c2eab9abb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 5 Jul 2023 23:40:37 +0200 Subject: [PATCH 1249/1997] Fix error --- tests/ci/ci_config.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 1777180a76e..ea7d112c73e 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -179,10 +179,9 @@ CI_CONFIG = { "sanitizer": "", "package_type": "binary", "static_binary_name": "riscv64", - "bundled": "bundled", - "libraries": "static", "tidy": "disable", "with_coverage": False, + "comment": "", }, }, "builds_report_config": { From 9cadcb16a397f984a4b5bbe6695dd8156098a198 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 01:46:30 +0300 Subject: [PATCH 1250/1997] Update 02811_parallel_replicas_prewhere_count.sql --- .../0_stateless/02811_parallel_replicas_prewhere_count.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.sql b/tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.sql index 0cf53158646..141ae947e5e 100644 --- a/tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.sql +++ b/tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.sql @@ -1,14 +1,13 @@ +DROP TABLE IF EXISTS users; CREATE TABLE users (uid Int16, name String, age Int16) ENGINE=MergeTree() ORDER BY uid; INSERT INTO users VALUES (111, 'JFK', 33); INSERT INTO users VALUES (6666, 'KLM', 48); INSERT INTO users VALUES (88888, 'AMS', 50); - SELECT '-- count() ------------------------------'; SELECT count() FROM users PREWHERE uid > 2000; - -- enable parallel replicas but with high granules threshold SET skip_unavailable_shards=1, @@ -19,6 +18,7 @@ cluster_for_parallel_replicas='parallel_replicas', parallel_replicas_for_non_replicated_merge_tree=1, parallel_replicas_min_number_of_granules_to_enable=1000; - SELECT '-- count() with parallel replicas -------'; SELECT count() FROM users PREWHERE uid > 2000; + +DROP TABLE users; From 98da25f1d3f4014fd13d1f53aa3ccee6da21d9f2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 01:17:41 +0200 Subject: [PATCH 1251/1997] Fix build --- cmake/target.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/target.cmake b/cmake/target.cmake index ea4c206fc4f..0791da87bf0 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -44,6 +44,8 @@ if (CMAKE_CROSSCOMPILING) set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_HDFS OFF CACHE INTERNAL "") set (ENABLE_MYSQL OFF CACHE INTERNAL "") + # It might be ok, but we need to update 'sysroot' + set (ENABLE_RUST OFF CACHE INTERNAL "") elseif (ARCH_S390X) set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_SENTRY OFF CACHE INTERNAL "") From 698c49cd51f406d0a9e619b4c7d971857f1fb59b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 02:19:03 +0300 Subject: [PATCH 1252/1997] Update 02811_invalid_embedded_rocksdb_create.sql --- .../0_stateless/02811_invalid_embedded_rocksdb_create.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql index bfe4ee0622e..a87ac5e0de0 100644 --- a/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql +++ b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql @@ -1 +1,2 @@ +-- Tags: no-fasttest CREATE TABLE dict (`k` String, `v` String) ENGINE = EmbeddedRocksDB(k) PRIMARY KEY k; -- {serverError 36} From 75d051dd5554022ee7d9c215543c5ffad5c3df63 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 01:49:53 +0200 Subject: [PATCH 1253/1997] Remove useless packages --- docker/test/sqllogic/run.sh | 4 ++-- docker/test/stateless/Dockerfile | 1 - docker/test/stress/Dockerfile | 3 --- docker/test/upgrade/Dockerfile | 3 --- docker/test/util/Dockerfile | 1 - docs/zh/development/build.md | 7 ------- .../0_stateless/02439_merge_selecting_partitions.sql | 1 - 7 files changed, 2 insertions(+), 18 deletions(-) diff --git a/docker/test/sqllogic/run.sh b/docker/test/sqllogic/run.sh index 8d0252e3c98..444252837a3 100755 --- a/docker/test/sqllogic/run.sh +++ b/docker/test/sqllogic/run.sh @@ -92,8 +92,8 @@ sudo clickhouse stop ||: for _ in $(seq 1 60); do if [[ $(wget --timeout=1 -q 'localhost:8123' -O-) == 'Ok.' ]]; then sleep 1 ; else break; fi ; done -grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||: -pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz & +rg -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||: +zstd < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & # Compressed (FIXME: remove once only github actions will be left) rm /var/log/clickhouse-server/clickhouse-server.log diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 32996140521..e1e84c427ba 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -33,7 +33,6 @@ RUN apt-get update -y \ qemu-user-static \ sqlite3 \ sudo \ - telnet \ tree \ unixodbc \ wget \ diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index e9712f430fd..eddeb04758b 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -8,8 +8,6 @@ RUN apt-get update -y \ apt-get install --yes --no-install-recommends \ bash \ tzdata \ - fakeroot \ - debhelper \ parallel \ expect \ python3 \ @@ -20,7 +18,6 @@ RUN apt-get update -y \ sudo \ openssl \ netcat-openbsd \ - telnet \ brotli \ && apt-get clean diff --git a/docker/test/upgrade/Dockerfile b/docker/test/upgrade/Dockerfile index 8e5890b81a0..9152230af1c 100644 --- a/docker/test/upgrade/Dockerfile +++ b/docker/test/upgrade/Dockerfile @@ -8,8 +8,6 @@ RUN apt-get update -y \ apt-get install --yes --no-install-recommends \ bash \ tzdata \ - fakeroot \ - debhelper \ parallel \ expect \ python3 \ @@ -20,7 +18,6 @@ RUN apt-get update -y \ sudo \ openssl \ netcat-openbsd \ - telnet \ brotli \ && apt-get clean diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index 85e888f1df7..6a4c6aa3057 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -44,7 +44,6 @@ RUN apt-get update \ clang-${LLVM_VERSION} \ clang-tidy-${LLVM_VERSION} \ cmake \ - fakeroot \ gdb \ git \ gperf \ diff --git a/docs/zh/development/build.md b/docs/zh/development/build.md index d76f4b1577c..bb25755a615 100644 --- a/docs/zh/development/build.md +++ b/docs/zh/development/build.md @@ -3,13 +3,6 @@ slug: /zh/development/build --- # 如何构建 ClickHouse 发布包 {#ru-he-gou-jian-clickhouse-fa-bu-bao} -## 安装 Git 和 Pbuilder {#an-zhuang-git-he-pbuilder} - -``` bash -sudo apt-get update -sudo apt-get install git pbuilder debhelper lsb-release fakeroot sudo debian-archive-keyring debian-keyring -``` - ## 拉取 ClickHouse 源码 {#la-qu-clickhouse-yuan-ma} ``` bash diff --git a/tests/queries/0_stateless/02439_merge_selecting_partitions.sql b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql index 88ce2834d6b..bcfcaa2acd3 100644 --- a/tests/queries/0_stateless/02439_merge_selecting_partitions.sql +++ b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql @@ -1,4 +1,3 @@ - drop table if exists rmt; create table rmt (n int, m int) engine=ReplicatedMergeTree('/test/02439/{shard}/{database}', '{replica}') partition by n order by n; From db14b2c54fbd42d1c8123a15d87382fe00938a6a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 02:16:38 +0200 Subject: [PATCH 1254/1997] Remove useless logs --- src/Interpreters/executeQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index c52dab722c9..694226af6b0 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -322,8 +322,8 @@ static std::tuple executeQueryImpl( /// This does not have impact on the final span logs, because these internal queries are issued by external queries, /// we still have enough span logs for the execution of external queries. std::shared_ptr query_span = internal ? nullptr : std::make_shared("query"); - if (query_span) - LOG_DEBUG(&Poco::Logger::get("executeQuery"), "Query span trace_id for opentelemetry log: {}", query_span->trace_id); + if (query_span && query_span->trace_id != UUID{}) + LOG_TRACE(&Poco::Logger::get("executeQuery"), "Query span trace_id for opentelemetry log: {}", query_span->trace_id); auto query_start_time = std::chrono::system_clock::now(); From 45db928e4e31aae6a6d7e8e6b35e0a5a3768375c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 02:52:55 +0200 Subject: [PATCH 1255/1997] Fix style --- src/Functions/fromModifiedJulianDay.cpp | 1 - src/Functions/toModifiedJulianDay.cpp | 2 -- 2 files changed, 3 deletions(-) diff --git a/src/Functions/fromModifiedJulianDay.cpp b/src/Functions/fromModifiedJulianDay.cpp index 8736b1fce7f..695d1b7d63c 100644 --- a/src/Functions/fromModifiedJulianDay.cpp +++ b/src/Functions/fromModifiedJulianDay.cpp @@ -19,7 +19,6 @@ namespace DB namespace ErrorCodes { - extern const int CANNOT_FORMAT_DATETIME; extern const int ILLEGAL_TYPE_OF_ARGUMENT; } diff --git a/src/Functions/toModifiedJulianDay.cpp b/src/Functions/toModifiedJulianDay.cpp index 5b4cd34141c..907c7570ce2 100644 --- a/src/Functions/toModifiedJulianDay.cpp +++ b/src/Functions/toModifiedJulianDay.cpp @@ -17,8 +17,6 @@ namespace DB { extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; - extern const int CANNOT_PARSE_DATE; } template From 5416b7b6df8104440d9d74cbdc68fd0505012654 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 03:04:58 +0200 Subject: [PATCH 1256/1997] Fix incorrect log level = warning --- programs/server/Server.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d2d8a0d07fb..686c3b90dd6 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1146,7 +1146,16 @@ try size_t merges_mutations_memory_usage_soft_limit = server_settings_.merges_mutations_memory_usage_soft_limit; size_t default_merges_mutations_server_memory_usage = static_cast(memory_amount * server_settings_.merges_mutations_memory_usage_to_ram_ratio); - if (merges_mutations_memory_usage_soft_limit == 0 || merges_mutations_memory_usage_soft_limit > default_merges_mutations_server_memory_usage) + if (merges_mutations_memory_usage_soft_limit == 0) + { + merges_mutations_memory_usage_soft_limit = default_merges_mutations_server_memory_usage; + LOG_INFO(log, "Setting merges_mutations_memory_usage_soft_limit was set to {}" + " ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)", + formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit), + formatReadableSizeWithBinarySuffix(memory_amount), + server_settings_.merges_mutations_memory_usage_to_ram_ratio); + } + else if (merges_mutations_memory_usage_soft_limit > default_merges_mutations_server_memory_usage) { merges_mutations_memory_usage_soft_limit = default_merges_mutations_server_memory_usage; LOG_WARNING(log, "Setting merges_mutations_memory_usage_soft_limit was set to {}" From 64d5a85f6e731d9e8baba170aa7441555c030545 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 03:16:06 +0200 Subject: [PATCH 1257/1997] Fix test_replicated_table_attach --- tests/integration/test_replicated_table_attach/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_replicated_table_attach/test.py b/tests/integration/test_replicated_table_attach/test.py index 2d209ddaf79..dee2be3fcf7 100644 --- a/tests/integration/test_replicated_table_attach/test.py +++ b/tests/integration/test_replicated_table_attach/test.py @@ -54,7 +54,7 @@ def test_startup_with_small_bg_pool_partitioned(started_cluster): assert_values() with PartitionManager() as pm: pm.drop_instance_zk_connections(node) - node.restart_clickhouse(stop_start_wait_sec=20) + node.restart_clickhouse(stop_start_wait_sec=300) assert_values() # check that we activate it in the end From e2c9f86f39e83b128d0fc82628bdae2ab0b8080b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 03:31:10 +0200 Subject: [PATCH 1258/1997] Better usability of a test --- tests/queries/0_stateless/02125_many_mutations.sh | 2 ++ tests/queries/0_stateless/02125_many_mutations_2.sh | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/02125_many_mutations.sh b/tests/queries/0_stateless/02125_many_mutations.sh index b42d5bb15d3..54948fa1048 100755 --- a/tests/queries/0_stateless/02125_many_mutations.sh +++ b/tests/queries/0_stateless/02125_many_mutations.sh @@ -7,6 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # "max_parts_to_merge_at_once = 1" prevents merges to start in background before our own OPTIMIZE FINAL +$CLICKHOUSE_CLIENT -q "drop table if exists many_mutations" $CLICKHOUSE_CLIENT -q "create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x settings number_of_mutations_to_delay = 0, number_of_mutations_to_throw = 0, max_parts_to_merge_at_once = 1" $CLICKHOUSE_CLIENT -q "insert into many_mutations values (0, 0), (1, 1)" $CLICKHOUSE_CLIENT -q "system stop merges many_mutations" @@ -49,3 +50,4 @@ $CLICKHOUSE_CLIENT -q "system start merges many_mutations" $CLICKHOUSE_CLIENT -q "optimize table many_mutations final" --optimize_throw_if_noop 1 $CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" $CLICKHOUSE_CLIENT -q "select x, y from many_mutations order by x" +$CLICKHOUSE_CLIENT -q "drop table many_mutations" diff --git a/tests/queries/0_stateless/02125_many_mutations_2.sh b/tests/queries/0_stateless/02125_many_mutations_2.sh index e5e3070a944..0351538b210 100755 --- a/tests/queries/0_stateless/02125_many_mutations_2.sh +++ b/tests/queries/0_stateless/02125_many_mutations_2.sh @@ -7,6 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # "max_parts_to_merge_at_once = 1" prevents merges to start in background before our own OPTIMIZE FINAL +$CLICKHOUSE_CLIENT -q "drop table if exists many_mutations" $CLICKHOUSE_CLIENT -q "create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x settings number_of_mutations_to_delay = 0, number_of_mutations_to_throw = 0, max_parts_to_merge_at_once = 1" $CLICKHOUSE_CLIENT -q "insert into many_mutations select number, number + 1 from numbers(2000)" $CLICKHOUSE_CLIENT -q "system stop merges many_mutations" @@ -51,3 +52,4 @@ $CLICKHOUSE_CLIENT -q "system flush logs" $CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" $CLICKHOUSE_CLIENT -q "select count() from many_mutations" $CLICKHOUSE_CLIENT -q "select * from system.part_log where database = currentDatabase() and table == 'many_mutations' and peak_memory_usage > 1e9" +$CLICKHOUSE_CLIENT -q "drop table many_mutations" From 38c163b0662249b4da83e8b812662bf5b6d1a27a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 03:43:59 +0200 Subject: [PATCH 1259/1997] Improve test --- .../0_stateless/02125_many_mutations.sh | 32 +++++++++---------- .../0_stateless/02125_many_mutations_2.sh | 32 +++++++++++-------- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/tests/queries/0_stateless/02125_many_mutations.sh b/tests/queries/0_stateless/02125_many_mutations.sh index 54948fa1048..5a139e8b01d 100755 --- a/tests/queries/0_stateless/02125_many_mutations.sh +++ b/tests/queries/0_stateless/02125_many_mutations.sh @@ -7,19 +7,17 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # "max_parts_to_merge_at_once = 1" prevents merges to start in background before our own OPTIMIZE FINAL -$CLICKHOUSE_CLIENT -q "drop table if exists many_mutations" -$CLICKHOUSE_CLIENT -q "create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x settings number_of_mutations_to_delay = 0, number_of_mutations_to_throw = 0, max_parts_to_merge_at_once = 1" -$CLICKHOUSE_CLIENT -q "insert into many_mutations values (0, 0), (1, 1)" -$CLICKHOUSE_CLIENT -q "system stop merges many_mutations" - -$CLICKHOUSE_CLIENT -q "select x, y from many_mutations order by x" +$CLICKHOUSE_CLIENT --multiquery -q " +drop table if exists many_mutations; +create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x settings number_of_mutations_to_delay = 0, number_of_mutations_to_throw = 0, max_parts_to_merge_at_once = 1; +insert into many_mutations values (0, 0), (1, 1); +system stop merges many_mutations; +select x, y from many_mutations order by x; +" job() { - for _ in {1..1000} - do - $CLICKHOUSE_CLIENT -q "alter table many_mutations update y = y + 1 where 1" - done + yes "alter table many_mutations update y = y + 1 where 1;" | head -n 1000 | $CLICKHOUSE_CLIENT --multiquery } job & @@ -45,9 +43,11 @@ job & wait -$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" -$CLICKHOUSE_CLIENT -q "system start merges many_mutations" -$CLICKHOUSE_CLIENT -q "optimize table many_mutations final" --optimize_throw_if_noop 1 -$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" -$CLICKHOUSE_CLIENT -q "select x, y from many_mutations order by x" -$CLICKHOUSE_CLIENT -q "drop table many_mutations" +$CLICKHOUSE_CLIENT --multiquery -q " +select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; +system start merges many_mutations; +optimize table many_mutations final SETTINGS optimize_throw_if_noop = 1; +select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; +select x, y from many_mutations order by x; +drop table many_mutations; +" diff --git a/tests/queries/0_stateless/02125_many_mutations_2.sh b/tests/queries/0_stateless/02125_many_mutations_2.sh index 0351538b210..5b779c1b276 100755 --- a/tests/queries/0_stateless/02125_many_mutations_2.sh +++ b/tests/queries/0_stateless/02125_many_mutations_2.sh @@ -7,10 +7,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # "max_parts_to_merge_at_once = 1" prevents merges to start in background before our own OPTIMIZE FINAL -$CLICKHOUSE_CLIENT -q "drop table if exists many_mutations" -$CLICKHOUSE_CLIENT -q "create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x settings number_of_mutations_to_delay = 0, number_of_mutations_to_throw = 0, max_parts_to_merge_at_once = 1" -$CLICKHOUSE_CLIENT -q "insert into many_mutations select number, number + 1 from numbers(2000)" -$CLICKHOUSE_CLIENT -q "system stop merges many_mutations" +$CLICKHOUSE_CLIENT --multiquery -q " +drop table if exists many_mutations; +create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x settings number_of_mutations_to_delay = 0, number_of_mutations_to_throw = 0, max_parts_to_merge_at_once = 1; +insert into many_mutations select number, number + 1 from numbers(2000); +system stop merges many_mutations; +" $CLICKHOUSE_CLIENT -q "select count() from many_mutations" @@ -18,8 +20,8 @@ job() { for i in {1..1000} do - $CLICKHOUSE_CLIENT -q "alter table many_mutations delete where y = ${i} * 2 settings mutations_sync=0" - done + echo "alter table many_mutations delete where y = ${i} * 2 settings mutations_sync = 0;" + done | $CLICKHOUSE_CLIENT --multiquery } job & @@ -45,11 +47,13 @@ job & wait -$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" -$CLICKHOUSE_CLIENT -q "system start merges many_mutations" -$CLICKHOUSE_CLIENT -q "optimize table many_mutations final" --optimize_throw_if_noop 1 -$CLICKHOUSE_CLIENT -q "system flush logs" -$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" -$CLICKHOUSE_CLIENT -q "select count() from many_mutations" -$CLICKHOUSE_CLIENT -q "select * from system.part_log where database = currentDatabase() and table == 'many_mutations' and peak_memory_usage > 1e9" -$CLICKHOUSE_CLIENT -q "drop table many_mutations" +$CLICKHOUSE_CLIENT --multiquery -q " +select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; +system start merges many_mutations; +optimize table many_mutations final SETTINGS optimize_throw_if_noop = 1; +system flush logs; +select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; +select count() from many_mutations; +select * from system.part_log where database = currentDatabase() and table == 'many_mutations' and peak_memory_usage > 1e9; +drop table many_mutations; +" From c178a362c573f7212c8f9986f78e78b209713bee Mon Sep 17 00:00:00 2001 From: flynn Date: Thu, 6 Jul 2023 02:30:37 +0000 Subject: [PATCH 1260/1997] Fix for new analyzer --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 163092f1b7f..34286c266c9 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -6238,7 +6238,11 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node, const auto & insertion_table = scope_context->getInsertionTable(); if (!insertion_table.empty()) { - const auto & insert_structure = DatabaseCatalog::instance().getTable(insertion_table, scope_context)->getInMemoryMetadataPtr()->getColumns(); + const auto & insert_structure = DatabaseCatalog::instance() + .getTable(insertion_table, scope_context) + ->getInMemoryMetadataPtr() + ->getColumns() + .getInsertable(); DB::ColumnsDescription structure_hint; bool use_columns_from_insert_query = true; From d59f68b6009467e891b96e0725ec308aad236c63 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 04:55:46 +0200 Subject: [PATCH 1261/1997] Remove useless code --- src/Access/Common/AccessType.h | 1 - src/Common/SymbolIndex.cpp | 1 - src/Interpreters/InterpreterSystemQuery.cpp | 15 --------------- src/Parsers/ASTSystemQuery.h | 1 - .../0_stateless/01271_show_privileges.reference | 1 - .../02117_show_create_table_system.reference | 6 +++--- 6 files changed, 3 insertions(+), 22 deletions(-) diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index f65a77c1d6a..c06bceb87e3 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -157,7 +157,6 @@ enum class AccessType M(SYSTEM_DROP_CACHE, "DROP CACHE", GROUP, SYSTEM) \ M(SYSTEM_RELOAD_CONFIG, "RELOAD CONFIG", GLOBAL, SYSTEM_RELOAD) \ M(SYSTEM_RELOAD_USERS, "RELOAD USERS", GLOBAL, SYSTEM_RELOAD) \ - M(SYSTEM_RELOAD_SYMBOLS, "RELOAD SYMBOLS", GLOBAL, SYSTEM_RELOAD) \ M(SYSTEM_RELOAD_DICTIONARY, "SYSTEM RELOAD DICTIONARIES, RELOAD DICTIONARY, RELOAD DICTIONARIES", GLOBAL, SYSTEM_RELOAD) \ M(SYSTEM_RELOAD_MODEL, "SYSTEM RELOAD MODELS, RELOAD MODEL, RELOAD MODELS", GLOBAL, SYSTEM_RELOAD) \ M(SYSTEM_RELOAD_FUNCTION, "SYSTEM RELOAD FUNCTIONS, RELOAD FUNCTION, RELOAD FUNCTIONS", GLOBAL, SYSTEM_RELOAD) \ diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp index f1cace5017c..b4ae16670d8 100644 --- a/src/Common/SymbolIndex.cpp +++ b/src/Common/SymbolIndex.cpp @@ -9,7 +9,6 @@ #include -//#include #include #include diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index e1ff8676bc7..c74ff062471 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -470,16 +470,6 @@ BlockIO InterpreterSystemQuery::execute() getContext()->checkAccess(AccessType::SYSTEM_RELOAD_USERS); system_context->getAccessControl().reload(AccessControl::ReloadMode::ALL); break; - case Type::RELOAD_SYMBOLS: - { -#if defined(__ELF__) && !defined(OS_FREEBSD) - getContext()->checkAccess(AccessType::SYSTEM_RELOAD_SYMBOLS); - SymbolIndex::reload(); - break; -#else - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "SYSTEM RELOAD SYMBOLS is not supported on current platform"); -#endif - } case Type::STOP_MERGES: startStopAction(ActionLocks::PartsMerge, false); break; @@ -1056,11 +1046,6 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_RELOAD_USERS); break; } - case Type::RELOAD_SYMBOLS: - { - required_access.emplace_back(AccessType::SYSTEM_RELOAD_SYMBOLS); - break; - } case Type::STOP_MERGES: case Type::START_MERGES: { diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index ebc3e9cd430..528fbdce2c2 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -56,7 +56,6 @@ public: RELOAD_EMBEDDED_DICTIONARIES, RELOAD_CONFIG, RELOAD_USERS, - RELOAD_SYMBOLS, RESTART_DISK, STOP_MERGES, START_MERGES, diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 9e6249bfcb3..f3c07cf11a7 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -108,7 +108,6 @@ SYSTEM DROP S3 CLIENT CACHE ['SYSTEM DROP S3 CLIENT','DROP S3 CLIENT CACHE'] GLO SYSTEM DROP CACHE ['DROP CACHE'] \N SYSTEM SYSTEM RELOAD CONFIG ['RELOAD CONFIG'] GLOBAL SYSTEM RELOAD SYSTEM RELOAD USERS ['RELOAD USERS'] GLOBAL SYSTEM RELOAD -SYSTEM RELOAD SYMBOLS ['RELOAD SYMBOLS'] GLOBAL SYSTEM RELOAD SYSTEM RELOAD DICTIONARY ['SYSTEM RELOAD DICTIONARIES','RELOAD DICTIONARY','RELOAD DICTIONARIES'] GLOBAL SYSTEM RELOAD SYSTEM RELOAD MODEL ['SYSTEM RELOAD MODELS','RELOAD MODEL','RELOAD MODELS'] GLOBAL SYSTEM RELOAD SYSTEM RELOAD FUNCTION ['SYSTEM RELOAD FUNCTIONS','RELOAD FUNCTION','RELOAD FUNCTIONS'] GLOBAL SYSTEM RELOAD diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 3834b05601f..c7aded81ac6 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -297,7 +297,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'REDIS' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD DICTIONARY' = 110, 'SYSTEM RELOAD MODEL' = 111, 'SYSTEM RELOAD FUNCTION' = 112, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 113, 'SYSTEM RELOAD' = 114, 'SYSTEM RESTART DISK' = 115, 'SYSTEM MERGES' = 116, 'SYSTEM TTL MERGES' = 117, 'SYSTEM FETCHES' = 118, 'SYSTEM MOVES' = 119, 'SYSTEM DISTRIBUTED SENDS' = 120, 'SYSTEM REPLICATED SENDS' = 121, 'SYSTEM SENDS' = 122, 'SYSTEM REPLICATION QUEUES' = 123, 'SYSTEM DROP REPLICA' = 124, 'SYSTEM SYNC REPLICA' = 125, 'SYSTEM RESTART REPLICA' = 126, 'SYSTEM RESTORE REPLICA' = 127, 'SYSTEM WAIT LOADING PARTS' = 128, 'SYSTEM SYNC DATABASE REPLICA' = 129, 'SYSTEM SYNC TRANSACTION LOG' = 130, 'SYSTEM SYNC FILE CACHE' = 131, 'SYSTEM FLUSH DISTRIBUTED' = 132, 'SYSTEM FLUSH LOGS' = 133, 'SYSTEM FLUSH' = 134, 'SYSTEM THREAD FUZZER' = 135, 'SYSTEM UNFREEZE' = 136, 'SYSTEM FAILPOINT' = 137, 'SYSTEM' = 138, 'dictGet' = 139, 'displaySecretsInShowAndSelect' = 140, 'addressToLine' = 141, 'addressToLineWithInlines' = 142, 'addressToSymbol' = 143, 'demangle' = 144, 'INTROSPECTION' = 145, 'FILE' = 146, 'URL' = 147, 'REMOTE' = 148, 'MONGO' = 149, 'REDIS' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'AZURE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -582,10 +582,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'REDIS' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD DICTIONARY' = 110, 'SYSTEM RELOAD MODEL' = 111, 'SYSTEM RELOAD FUNCTION' = 112, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 113, 'SYSTEM RELOAD' = 114, 'SYSTEM RESTART DISK' = 115, 'SYSTEM MERGES' = 116, 'SYSTEM TTL MERGES' = 117, 'SYSTEM FETCHES' = 118, 'SYSTEM MOVES' = 119, 'SYSTEM DISTRIBUTED SENDS' = 120, 'SYSTEM REPLICATED SENDS' = 121, 'SYSTEM SENDS' = 122, 'SYSTEM REPLICATION QUEUES' = 123, 'SYSTEM DROP REPLICA' = 124, 'SYSTEM SYNC REPLICA' = 125, 'SYSTEM RESTART REPLICA' = 126, 'SYSTEM RESTORE REPLICA' = 127, 'SYSTEM WAIT LOADING PARTS' = 128, 'SYSTEM SYNC DATABASE REPLICA' = 129, 'SYSTEM SYNC TRANSACTION LOG' = 130, 'SYSTEM SYNC FILE CACHE' = 131, 'SYSTEM FLUSH DISTRIBUTED' = 132, 'SYSTEM FLUSH LOGS' = 133, 'SYSTEM FLUSH' = 134, 'SYSTEM THREAD FUZZER' = 135, 'SYSTEM UNFREEZE' = 136, 'SYSTEM FAILPOINT' = 137, 'SYSTEM' = 138, 'dictGet' = 139, 'displaySecretsInShowAndSelect' = 140, 'addressToLine' = 141, 'addressToLineWithInlines' = 142, 'addressToSymbol' = 143, 'demangle' = 144, 'INTROSPECTION' = 145, 'FILE' = 146, 'URL' = 147, 'REMOTE' = 148, 'MONGO' = 149, 'REDIS' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'AZURE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'REDIS' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD DICTIONARY' = 110, 'SYSTEM RELOAD MODEL' = 111, 'SYSTEM RELOAD FUNCTION' = 112, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 113, 'SYSTEM RELOAD' = 114, 'SYSTEM RESTART DISK' = 115, 'SYSTEM MERGES' = 116, 'SYSTEM TTL MERGES' = 117, 'SYSTEM FETCHES' = 118, 'SYSTEM MOVES' = 119, 'SYSTEM DISTRIBUTED SENDS' = 120, 'SYSTEM REPLICATED SENDS' = 121, 'SYSTEM SENDS' = 122, 'SYSTEM REPLICATION QUEUES' = 123, 'SYSTEM DROP REPLICA' = 124, 'SYSTEM SYNC REPLICA' = 125, 'SYSTEM RESTART REPLICA' = 126, 'SYSTEM RESTORE REPLICA' = 127, 'SYSTEM WAIT LOADING PARTS' = 128, 'SYSTEM SYNC DATABASE REPLICA' = 129, 'SYSTEM SYNC TRANSACTION LOG' = 130, 'SYSTEM SYNC FILE CACHE' = 131, 'SYSTEM FLUSH DISTRIBUTED' = 132, 'SYSTEM FLUSH LOGS' = 133, 'SYSTEM FLUSH' = 134, 'SYSTEM THREAD FUZZER' = 135, 'SYSTEM UNFREEZE' = 136, 'SYSTEM FAILPOINT' = 137, 'SYSTEM' = 138, 'dictGet' = 139, 'displaySecretsInShowAndSelect' = 140, 'addressToLine' = 141, 'addressToLineWithInlines' = 142, 'addressToSymbol' = 143, 'demangle' = 144, 'INTROSPECTION' = 145, 'FILE' = 146, 'URL' = 147, 'REMOTE' = 148, 'MONGO' = 149, 'REDIS' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'AZURE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' From e8718e04cb2cfed00365f6e75c2c4e5bf2baa925 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 05:58:05 +0300 Subject: [PATCH 1262/1997] Update --- src/Common/SymbolIndex.cpp | 7 ------- src/Common/SymbolIndex.h | 1 - 2 files changed, 8 deletions(-) diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp index b4ae16670d8..4c7f3827125 100644 --- a/src/Common/SymbolIndex.cpp +++ b/src/Common/SymbolIndex.cpp @@ -560,13 +560,6 @@ MultiVersion::Version SymbolIndex::instance() return instanceImpl().get(); } -void SymbolIndex::reload() -{ - instanceImpl().set(std::unique_ptr(new SymbolIndex)); - /// Also drop stacktrace cache. - StackTrace::dropCache(); -} - } #endif diff --git a/src/Common/SymbolIndex.h b/src/Common/SymbolIndex.h index 47162331946..773f59b7914 100644 --- a/src/Common/SymbolIndex.h +++ b/src/Common/SymbolIndex.h @@ -24,7 +24,6 @@ protected: public: static MultiVersion::Version instance(); - static void reload(); struct Symbol { From d8e87f6c1df5c113fdd6026466caf8fccebd5150 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 30 Jun 2023 14:48:20 +0800 Subject: [PATCH 1263/1997] Make common macros extendable --- src/Common/CurrentMetrics.cpp | 10 ++++++++-- src/Common/ErrorCodes.cpp | 8 +++++++- src/Common/ProfileEvents.cpp | 7 ++++++- src/Common/StatusInfo.cpp | 7 ++++++- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 105a7c0548f..4f0d55a9cb6 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -2,7 +2,7 @@ /// Available metrics. Add something here as you wish. -#define APPLY_FOR_METRICS(M) \ +#define APPLY_FOR_BUILTIN_METRICS(M) \ M(Query, "Number of executing queries") \ M(Merge, "Number of executing background merges") \ M(Move, "Number of currently executing moves") \ @@ -200,7 +200,13 @@ M(MergeTreeReadTaskRequestsSent, "The current number of callback requests in flight from the remote server back to the initiator server to choose the read task (for MergeTree tables). Measured on the remote server side.") \ M(MergeTreeAllRangesAnnouncementsSent, "The current number of announcement being sent in flight from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side.") \ M(CreatedTimersInQueryProfiler, "Number of Created thread local timers in QueryProfiler") \ - M(ActiveTimersInQueryProfiler, "Number of Active thread local timers in QueryProfiler") + M(ActiveTimersInQueryProfiler, "Number of Active thread local timers in QueryProfiler") \ + +#ifdef APPLY_FOR_EXTERNAL_METRICS + #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M) +#else + #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) +#endif namespace CurrentMetrics { diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 4c08d762df2..87619cdafad 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -13,7 +13,7 @@ * - system.errors table */ -#define APPLY_FOR_ERROR_CODES(M) \ +#define APPLY_FOR_BUILTIN_ERROR_CODES(M) \ M(0, OK) \ M(1, UNSUPPORTED_METHOD) \ M(2, UNSUPPORTED_PARAMETER) \ @@ -589,6 +589,12 @@ M(1002, UNKNOWN_EXCEPTION) \ /* See END */ +#ifdef APPLY_FOR_EXTERNAL_ERROR_CODES + #define APPLY_FOR_ERROR_CODES(M) APPLY_FOR_BUILTIN_ERROR_CODES(M) APPLY_FOR_EXTERNAL_ERROR_CODES(M) +#else + #define APPLY_FOR_ERROR_CODES(M) APPLY_FOR_BUILTIN_ERROR_CODES(M) +#endif + namespace DB { namespace ErrorCodes diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 0838e0366df..ecec1179875 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -4,7 +4,7 @@ /// Available events. Add something here as you wish. -#define APPLY_FOR_EVENTS(M) \ +#define APPLY_FOR_BUILTIN_EVENTS(M) \ M(Query, "Number of queries to be interpreted and potentially executed. Does not include queries that failed to parse or were rejected due to AST size limits, quota limits or limits on the number of simultaneously running queries. May include internal queries initiated by ClickHouse itself. Does not count subqueries.") \ M(SelectQuery, "Same as Query, but only for SELECT queries.") \ M(InsertQuery, "Same as Query, but only for INSERT queries.") \ @@ -536,6 +536,11 @@ The server successfully detected this situation and will download merged part fr M(LogError, "Number of log messages with level Error") \ M(LogFatal, "Number of log messages with level Fatal") \ +#ifdef APPLY_FOR_EXTERNAL_EVENTS + #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M) +#else + #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) +#endif namespace ProfileEvents { diff --git a/src/Common/StatusInfo.cpp b/src/Common/StatusInfo.cpp index 1f9ddfaf4b9..07828cd0aaf 100644 --- a/src/Common/StatusInfo.cpp +++ b/src/Common/StatusInfo.cpp @@ -2,9 +2,14 @@ #include /// Available status. Add something here as you wish. -#define APPLY_FOR_STATUS(M) \ +#define APPLY_FOR_BUILTIN_STATUS(M) \ M(DictionaryStatus, "Dictionary Status.", DB::getStatusEnumAllPossibleValues()) \ +#ifdef APPLY_FOR_EXTERNAL_STATUS + #define APPLY_FOR_STATUS(M) APPLY_FOR_BUILTIN_STATUS(M) APPLY_FOR_EXTERNAL_STATUS(M) +#else + #define APPLY_FOR_STATUS(M) APPLY_FOR_BUILTIN_STATUS(M) +#endif namespace CurrentStatusInfo { From 06553452ed1135a74f00ba9bb177e7c57954ea77 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 30 Jun 2023 22:33:26 +0800 Subject: [PATCH 1264/1997] StatusInfo will be deprecated --- src/Common/StatusInfo.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/Common/StatusInfo.cpp b/src/Common/StatusInfo.cpp index 07828cd0aaf..1f9ddfaf4b9 100644 --- a/src/Common/StatusInfo.cpp +++ b/src/Common/StatusInfo.cpp @@ -2,14 +2,9 @@ #include /// Available status. Add something here as you wish. -#define APPLY_FOR_BUILTIN_STATUS(M) \ +#define APPLY_FOR_STATUS(M) \ M(DictionaryStatus, "Dictionary Status.", DB::getStatusEnumAllPossibleValues()) \ -#ifdef APPLY_FOR_EXTERNAL_STATUS - #define APPLY_FOR_STATUS(M) APPLY_FOR_BUILTIN_STATUS(M) APPLY_FOR_EXTERNAL_STATUS(M) -#else - #define APPLY_FOR_STATUS(M) APPLY_FOR_BUILTIN_STATUS(M) -#endif namespace CurrentStatusInfo { From 5af28315e233561b196a1e05d5bb2d185288c747 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 30 Jun 2023 22:34:47 +0800 Subject: [PATCH 1265/1997] Try to fix style issues --- src/Common/CurrentMetrics.cpp | 4 ++-- src/Common/ErrorCodes.cpp | 4 ++-- src/Common/ProfileEvents.cpp | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 4f0d55a9cb6..8b88555d78a 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -203,9 +203,9 @@ M(ActiveTimersInQueryProfiler, "Number of Active thread local timers in QueryProfiler") \ #ifdef APPLY_FOR_EXTERNAL_METRICS - #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M) + #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M) #else - #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) + #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) #endif namespace CurrentMetrics diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 87619cdafad..ae8d5f8796d 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -590,9 +590,9 @@ /* See END */ #ifdef APPLY_FOR_EXTERNAL_ERROR_CODES - #define APPLY_FOR_ERROR_CODES(M) APPLY_FOR_BUILTIN_ERROR_CODES(M) APPLY_FOR_EXTERNAL_ERROR_CODES(M) + #define APPLY_FOR_ERROR_CODES(M) APPLY_FOR_BUILTIN_ERROR_CODES(M) APPLY_FOR_EXTERNAL_ERROR_CODES(M) #else - #define APPLY_FOR_ERROR_CODES(M) APPLY_FOR_BUILTIN_ERROR_CODES(M) + #define APPLY_FOR_ERROR_CODES(M) APPLY_FOR_BUILTIN_ERROR_CODES(M) #endif namespace DB diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index ecec1179875..c8570b7921b 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -537,9 +537,9 @@ The server successfully detected this situation and will download merged part fr M(LogFatal, "Number of log messages with level Fatal") \ #ifdef APPLY_FOR_EXTERNAL_EVENTS - #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M) + #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M) #else - #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) + #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) #endif namespace ProfileEvents From 546f12dc85fdbbcf3396767917bd9dbbf8522c41 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 6 Jul 2023 05:05:27 +0000 Subject: [PATCH 1266/1997] Fix inserts to MongoDB tables --- src/Storages/StorageMongoDB.cpp | 60 ++++++++++++++++++- .../integration/test_storage_mongodb/test.py | 6 ++ 2 files changed, 63 insertions(+), 3 deletions(-) diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 3287e3272e3..45b8aceb058 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -19,6 +19,8 @@ #include #include +#include + namespace DB { @@ -127,9 +129,7 @@ public: for (const auto j : collections::range(0, num_cols)) { - WriteBufferFromOwnString ostr; - data_types[j]->getDefaultSerialization()->serializeText(*columns[j], i, ostr, FormatSettings{}); - document->add(data_names[j], ostr.str()); + insertValueIntoMongoDB(*document, data_names[j], *data_types[j], *columns[j], i); } documents.push_back(std::move(document)); @@ -151,6 +151,60 @@ public: } private: + + void insertValueIntoMongoDB( + Poco::MongoDB::Document & document, + const std::string & name, + const IDataType & data_type, + const IColumn & column, + size_t idx) + { + WhichDataType which(data_type); + + if (which.isArray()) + { + const ColumnArray & column_array = assert_cast(column); + const ColumnArray::Offsets & offsets = column_array.getOffsets(); + + size_t offset = offsets[idx - 1]; + size_t next_offset = offsets[idx]; + + const IColumn & nested_column = column_array.getData(); + + const auto * array_type = assert_cast(&data_type); + const DataTypePtr & nested_type = array_type->getNestedType(); + + Poco::MongoDB::Array::Ptr array = new Poco::MongoDB::Array(); + for (size_t i = 0; i + offset < next_offset; ++i) + { + insertValueIntoMongoDB(*array, Poco::NumberFormatter::format(i), *nested_type, nested_column, i + offset); + } + + document.add(name, array); + return; + } + + /// MongoDB does not support UInt64 type, so just cast it to Int64 + if (which.isNativeUInt()) + document.add(name, static_cast(column.getUInt(idx))); + else if (which.isNativeInt()) + document.add(name, static_cast(column.getInt(idx))); + else if (which.isFloat32()) + document.add(name, static_cast(column.getFloat32(idx))); + else if (which.isFloat64()) + document.add(name, static_cast(column.getFloat64(idx))); + else if (which.isDate()) + document.add(name, Poco::Timestamp(DateLUT::instance().fromDayNum(DayNum(column.getUInt(idx))) * 1000000)); + else if (which.isDateTime()) + document.add(name, Poco::Timestamp(column.getUInt(idx) * 1000000)); + else + { + WriteBufferFromOwnString ostr; + data_type.getDefaultSerialization()->serializeText(column, idx, ostr, FormatSettings{}); + document.add(name, ostr.str()); + } + } + String collection_name; String db_name; StorageMetadataPtr metadata_snapshot; diff --git a/tests/integration/test_storage_mongodb/test.py b/tests/integration/test_storage_mongodb/test.py index 6ce71fb91fa..0abaa7a8214 100644 --- a/tests/integration/test_storage_mongodb/test.py +++ b/tests/integration/test_storage_mongodb/test.py @@ -244,6 +244,12 @@ def test_arrays(started_cluster): == "[]\n" ) + # Test INSERT SELECT + node.query("INSERT INTO arrays_mongo_table SELECT * FROM arrays_mongo_table") + + assert node.query("SELECT COUNT() FROM arrays_mongo_table") == "200\n" + assert node.query("SELECT COUNT(DISTINCT *) FROM arrays_mongo_table") == "100\n" + node.query("DROP TABLE arrays_mongo_table") arrays_mongo_table.drop() From 24b5c9c204dcc0f3c181d13528d46d012dae86c9 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 6 Jul 2023 06:05:43 +0000 Subject: [PATCH 1267/1997] Use one setting input_format_csv_allow_variable_number_of_colums and code in RowInput --- docs/en/interfaces/formats.md | 3 +- .../operations/settings/settings-formats.md | 10 +--- docs/ru/interfaces/formats.md | 3 +- docs/ru/operations/settings/settings.md | 10 +--- src/Core/Settings.h | 3 +- src/Formats/FormatFactory.cpp | 3 +- src/Formats/FormatSettings.h | 3 +- .../Formats/Impl/CSVRowInputFormat.cpp | 58 ++++++------------- .../Formats/Impl/CSVRowInputFormat.h | 6 +- .../RowInputFormatWithNamesAndTypes.cpp | 23 ++++++++ .../Formats/RowInputFormatWithNamesAndTypes.h | 4 ++ 11 files changed, 58 insertions(+), 68 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 79790cef5b2..34f9abb91d4 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -471,8 +471,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`. -- [input_format_csv_ignore_extra_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_ignore_extra_columns) - ignore extra columns in CSV input (if file has more columns than expected). Default value - `false`. -- [input_format_csv_missing_as_default](/docs/en/operations/settings/settings-formats.md/#input_format_csv_missing_as_default) - treat missing fields in CSV input as default values. Default value - `false`. +- [input_format_csv_allow_variable_number_of_colums](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_colums) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 6b05f41666c..43e410ceee8 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -931,15 +931,9 @@ Result ```text " string " ``` -### input_format_csv_ignore_extra_columns {#input_format_csv_ignore_extra_columns} +### input_format_csv_allow_variable_number_of_colums {#input_format_csv_allow_variable_number_of_colums} -Ignore extra columns in CSV input (if file has more columns than expected). - -Disabled by default. - -### input_format_csv_missing_as_default {#input_format_csv_missing_as_default} - -Treat missing fields in CSV input as default values. +ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Disabled by default. diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 7e3bb3f7d26..e7c57fff749 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -402,8 +402,7 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR - [input_format_csv_skip_first_lines](../operations/settings/settings.md#input_format_csv_skip_first_lines) - пропустить указанное количество строк в начале данных. Значение по умолчанию - `0`. - [input_format_csv_detect_header](../operations/settings/settings.md#input_format_csv_detect_header) - обнаружить заголовок с именами и типами в формате CSV. Значение по умолчанию - `true`. - [input_format_csv_trim_whitespaces](../operations/settings/settings.md#input_format_csv_trim_whitespaces) - удалить пробелы и символы табуляции из строк без кавычек. Значение по умолчанию - `true`. -- [input_format_csv_ignore_extra_columns](../operations/settings/settings.md/#input_format_csv_ignore_extra_columns) - игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается). Значение по умолчанию - `false`. -- [input_format_csv_missing_as_default](../operations/settings/settings.md/#input_format_csv_missing_as_default) - рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. Значение по умолчанию - `false`. +- [input_format_csv_allow_variable_number_of_colums](../operations/settings/settings.md/#input_format_csv_allow_variable_number_of_colums) - игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается) и рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. Значение по умолчанию - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index e679ce6abe1..ddc101c6991 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1727,15 +1727,9 @@ echo ' string ' | ./clickhouse local -q "select * from table FORMAT CSV" --in " string " ``` -## input_format_csv_ignore_extra_columns {#input_format_csv_ignore_extra_columns} +## input_format_csv_allow_variable_number_of_colums {#input_format_csv_allow_variable_number_of_colums} -Игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается). - -Выключено по умолчанию. - -## input_format_csv_missing_as_default {#input_format_csv_missing_as_default} - -Рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. +Игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается) и рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. Выключено по умолчанию. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 044b3c34dc2..df2a916b7cf 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1009,8 +1009,7 @@ class IColumn; M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \ \ M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \ - M(Bool, input_format_csv_ignore_extra_columns, false, "Ignore extra columns in CSV input (if file has more columns than expected)", 0) \ - M(Bool, input_format_csv_missing_as_default, false, "Treat missing fields in CSV input as default values", 0) \ + M(Bool, input_format_csv_allow_variable_number_of_colums, false, "Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values", 0) \ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 04b095a92d6..af9823dde73 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -72,8 +72,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.skip_trailing_empty_lines = settings.input_format_csv_skip_trailing_empty_lines; format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces; format_settings.csv.allow_whitespace_or_tab_as_delimiter = settings.input_format_csv_allow_whitespace_or_tab_as_delimiter; - format_settings.csv.ignore_extra_columns = settings.input_format_csv_ignore_extra_columns; - format_settings.csv.missing_as_default = settings.input_format_csv_missing_as_default; + format_settings.csv.allow_variable_number_of_colums = settings.input_format_csv_allow_variable_number_of_colums; format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter; format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter; format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 4bdc9077a0b..653578f8496 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -140,8 +140,7 @@ struct FormatSettings bool skip_trailing_empty_lines = false; bool trim_whitespaces = true; bool allow_whitespace_or_tab_as_delimiter = false; - bool ignore_extra_columns = false; - bool missing_as_default = false; + bool allow_variable_number_of_colums = false; } csv; struct HiveText diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 9731b4ba465..57e05ae7cd3 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -155,18 +155,7 @@ CSVFormatReader::CSVFormatReader(PeekableReadBuffer & buf_, const FormatSettings void CSVFormatReader::skipFieldDelimiter() { skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter); - - bool res = checkChar(format_settings.csv.delimiter, *buf); - if (res) - return; - - if (!format_settings.csv.missing_as_default) - { - char err[2] = {format_settings.csv.delimiter, '\0'}; - throwAtAssertionFailed(err, *buf); - } - else - current_row_has_missing_fields = true; + assertChar(format_settings.csv.delimiter, *buf); } template @@ -206,7 +195,6 @@ void CSVFormatReader::skipRowEndDelimiter() return; skipEndOfLine(*buf); - current_row_has_missing_fields = false; } void CSVFormatReader::skipHeaderRow() @@ -295,6 +283,11 @@ bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) return true; } +bool CSVFormatReader::allowVariableNumberOfColumns() +{ + return format_settings.csv.allow_variable_number_of_colums; +} + bool CSVFormatReader::readField( IColumn & column, const DataTypePtr & type, @@ -308,8 +301,6 @@ bool CSVFormatReader::readField( const bool at_delimiter = !buf->eof() && *buf->position() == format_settings.csv.delimiter; const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n' || *buf->position() == '\r'); - bool res = false; - /// Note: Tuples are serialized in CSV as separate columns, but with empty_as_default or null_as_default /// only one empty or NULL column will be expected if (format_settings.csv.empty_as_default && (at_delimiter || at_last_column_line_end)) @@ -321,34 +312,18 @@ bool CSVFormatReader::readField( /// they do not contain empty unquoted fields, so this check /// works for tuples as well. column.insertDefault(); - } - else if (current_row_has_missing_fields) - { - column.insertDefault(); - } - else if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) - { - /// If value is null but type is not nullable then use default value instead. - res = SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization); - } - else - { - /// Read the column normally. - serialization->deserializeTextCSV(column, *buf, format_settings); - res = true; + return false; } - if (is_last_file_column && format_settings.csv.ignore_extra_columns) + if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) { - // Skip all fields to next line. - skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter); - while (checkChar(format_settings.csv.delimiter, *buf)) - { - skipField(); - skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter); - } + /// If value is null but type is not nullable then use default value instead. + return SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization); } - return res; + + /// Read the column normally. + serialization->deserializeTextCSV(column, *buf, format_settings); + return true; } void CSVFormatReader::skipPrefixBeforeHeader() @@ -377,6 +352,11 @@ bool CSVFormatReader::checkForSuffix() return false; } +bool CSVFormatReader::checkForEndOfRow() +{ + return buf->eof() || *buf->position() == '\n' || *buf->position() == '\r'; +} + CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesSchemaReader( buf, diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index 82e03c453e7..8ccf04feed3 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -69,6 +69,9 @@ public: void skipRowEndDelimiter() override; void skipPrefixBeforeHeader() override; + bool checkForEndOfRow() override; + bool allowVariableNumberOfColumns() override; + std::vector readNames() override { return readHeaderRow(); } std::vector readTypes() override { return readHeaderRow(); } std::vector readHeaderRow() { return readRowImpl(); } @@ -89,9 +92,6 @@ public: protected: PeekableReadBuffer * buf; - -private: - bool current_row_has_missing_fields = false; }; class CSVSchemaReader : public FormatWithNamesAndTypesSchemaReader diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp index eaedbbb4a1e..fb49779e0af 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp @@ -227,7 +227,30 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE format_reader->skipField(file_column); if (!is_last_file_column) + { + if (format_reader->allowVariableNumberOfColumns() && format_reader->checkForEndOfRow()) + { + ++file_column; + while (file_column < column_mapping->column_indexes_for_input_fields.size()) + { + const auto & rem_column_index = column_mapping->column_indexes_for_input_fields[file_column]; + columns[*rem_column_index]->insertDefault(); + ++file_column; + } + } + else + format_reader->skipFieldDelimiter(); + } + } + + if (format_reader->allowVariableNumberOfColumns() && !format_reader->checkForEndOfRow()) + { + do + { format_reader->skipFieldDelimiter(); + format_reader->skipField(1); + } + while (!format_reader->checkForEndOfRow()); } format_reader->skipRowEndDelimiter(); diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h index 5648acd392d..b5103d3db39 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h @@ -119,6 +119,10 @@ public: /// Check suffix. virtual bool checkForSuffix() { return in->eof(); } + virtual bool checkForEndOfRow() { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method checkForEndOfRow is not implemented"); } + + virtual bool allowVariableNumberOfColumns() { return false; } + const FormatSettings & getFormatSettings() const { return format_settings; } virtual void setReadBuffer(ReadBuffer & in_) { in = &in_; } From d86ceef663cd0d3fcd8532ae63539e85bc4b210b Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Thu, 6 Jul 2023 14:14:48 +0800 Subject: [PATCH 1268/1997] Implement log file names rendering --- src/Loggers/Loggers.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 0c3a7bd615d..1e169190ca4 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -34,6 +34,16 @@ static std::string createDirectory(const std::string & file) return path; } +static std::string renderFileNameTemplate(time_t now, const std::string & file_path) +{ + fs::path path{file_path}; + std::tm buf; + localtime_r(&now, &buf); + std::stringstream ss; + ss << std::put_time(&buf, file_path.c_str()); + return path.replace_filename(ss.str()); +} + #ifndef WITHOUT_TEXT_LOG void Loggers::setTextLog(std::shared_ptr log, int max_priority) { @@ -68,9 +78,12 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log /// The maximum (the most verbose) of those will be used as default for Poco loggers int max_log_level = 0; - const auto log_path = config.getString("logger.log", ""); - if (!log_path.empty()) + time_t now = std::time({}); + + const auto log_path_prop = config.getString("logger.log", ""); + if (!log_path_prop.empty()) { + const auto log_path = renderFileNameTemplate(now, log_path_prop); createDirectory(log_path); std::string ext; @@ -109,9 +122,10 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log split->addChannel(log, "log"); } - const auto errorlog_path = config.getString("logger.errorlog", ""); - if (!errorlog_path.empty()) + const auto errorlog_path_prop = config.getString("logger.errorlog", ""); + if (!errorlog_path_prop.empty()) { + const auto errorlog_path = renderFileNameTemplate(now, errorlog_path_prop); createDirectory(errorlog_path); // NOTE: we don't use notice & critical in the code, so in practice error log collects fatal & error & warning. From 479efaa79acd23e72fb06413fd84d4b7091bd019 Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Thu, 6 Jul 2023 14:16:18 +0800 Subject: [PATCH 1269/1997] Add clickhouse_log_file and clickhouse_error_log_file args to add_instance() --- tests/integration/helpers/cluster.py | 35 +++++++++++++++++----------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 21398790be3..5b583b865de 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -64,6 +64,13 @@ DEFAULT_ENV_NAME = ".env" SANITIZER_SIGN = "==================" +CLICKHOUSE_START_COMMAND = ( + "clickhouse server --config-file=/etc/clickhouse-server/{main_config_file}" +) + +CLICKHOUSE_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.log" + +CLICKHOUSE_ERROR_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.err.log" # to create docker-compose env file def _create_env_file(path, variables): @@ -1497,6 +1504,8 @@ class ClickHouseCluster: with_postgres=False, with_postgres_cluster=False, with_postgresql_java_client=False, + clickhouse_log_file=CLICKHOUSE_LOG_FILE, + clickhouse_error_log_file=CLICKHOUSE_ERROR_LOG_FILE, with_hdfs=False, with_kerberized_hdfs=False, with_mongo=False, @@ -1563,6 +1572,13 @@ class ClickHouseCluster: "LLVM_PROFILE_FILE" ] = "/var/lib/clickhouse/server_%h_%p_%m.profraw" + clickhouse_start_command = CLICKHOUSE_START_COMMAND + if clickhouse_log_file: + clickhouse_start_command += " --log-file=" + clickhouse_log_file + if clickhouse_error_log_file: + clickhouse_start_command += " --errorlog-file=" + clickhouse_error_log_file + logging.debug(f"clickhouse_start_command: {clickhouse_start_command}") + instance = ClickHouseInstance( cluster=self, base_path=self.base_dir, @@ -1592,10 +1608,10 @@ class ClickHouseCluster: with_redis=with_redis, with_minio=with_minio, with_azurite=with_azurite, - with_cassandra=with_cassandra, with_jdbc_bridge=with_jdbc_bridge, with_hive=with_hive, with_coredns=with_coredns, + with_cassandra=with_cassandra, server_bin_path=self.server_bin_path, odbc_bridge_bin_path=self.odbc_bridge_bin_path, library_bridge_bin_path=self.library_bridge_bin_path, @@ -1604,6 +1620,10 @@ class ClickHouseCluster: with_postgres=with_postgres, with_postgres_cluster=with_postgres_cluster, with_postgresql_java_client=with_postgresql_java_client, + clickhouse_start_command=clickhouse_start_command, + main_config_name=main_config_name, + users_config_name=users_config_name, + copy_common_configs=copy_common_configs, hostname=hostname, env_variables=env_variables, image=image, @@ -1612,9 +1632,6 @@ class ClickHouseCluster: ipv4_address=ipv4_address, ipv6_address=ipv6_address, with_installed_binary=with_installed_binary, - main_config_name=main_config_name, - users_config_name=users_config_name, - copy_common_configs=copy_common_configs, external_dirs=external_dirs, tmpfs=tmpfs or [], config_root_name=config_root_name, @@ -3046,16 +3063,6 @@ class ClickHouseCluster: subprocess_check_call(self.base_zookeeper_cmd + ["start", n]) -CLICKHOUSE_START_COMMAND = ( - "clickhouse server --config-file=/etc/clickhouse-server/{main_config_file}" - " --log-file=/var/log/clickhouse-server/clickhouse-server.log " - " --errorlog-file=/var/log/clickhouse-server/clickhouse-server.err.log" -) - -CLICKHOUSE_STAY_ALIVE_COMMAND = "bash -c \"trap 'pkill tail' INT TERM; {} --daemon; coproc tail -f /dev/null; wait $$!\"".format( - CLICKHOUSE_START_COMMAND -) - DOCKER_COMPOSE_TEMPLATE = """ version: '2.3' services: From fef71ab0b8759f7a659c4bb8c1be03a89df92f79 Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Thu, 6 Jul 2023 14:16:46 +0800 Subject: [PATCH 1270/1997] Add inegration test --- .../__init__.py | 58 +++++++++++++++++++ .../configs/config-file-template.xml | 6 ++ .../test.py | 0 3 files changed, 64 insertions(+) create mode 100644 tests/integration/test_render_log_file_name_templates/__init__.py create mode 100644 tests/integration/test_render_log_file_name_templates/configs/config-file-template.xml create mode 100644 tests/integration/test_render_log_file_name_templates/test.py diff --git a/tests/integration/test_render_log_file_name_templates/__init__.py b/tests/integration/test_render_log_file_name_templates/__init__.py new file mode 100644 index 00000000000..9fa87056d2c --- /dev/null +++ b/tests/integration/test_render_log_file_name_templates/__init__.py @@ -0,0 +1,58 @@ +import pytest +import logging +from helpers.cluster import ClickHouseCluster +from datetime import datetime + + +log_dir = "/var/log/clickhouse-server/" +cluster = ClickHouseCluster(__file__) + + +@pytest.fixture(scope="module") +def started_cluster(): + cluster.add_instance( + "file-names-from-config", + main_configs=["configs/config-file-template.xml"], + clickhouse_log_file=None, + clickhouse_error_log_file=None, + ) + cluster.add_instance( + "file-names-from-params", + clickhouse_log_file=log_dir + "clickhouse-server-%Y-%m.log", + clickhouse_error_log_file=log_dir + "clickhouse-server-%Y-%m.err.log", + ) + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_check_file_names(started_cluster): + now = datetime.now() + log_file = ( + log_dir + f"clickhouse-server-{now.strftime('%Y')}-{now.strftime('%m')}.log" + ) + err_log_file = ( + log_dir + f"clickhouse-server-{now.strftime('%Y')}-{now.strftime('%m')}.err.log" + ) + logging.debug(f"log_file {log_file} err_log_file {err_log_file}") + + for name, instance in started_cluster.instances.items(): + files = instance.exec_in_container( + ["bash", "-c", f"ls -lh {log_dir}"], nothrow=True + ) + + logging.debug(f"check instance '{name}': {log_dir} contains: {files}") + + assert ( + instance.exec_in_container(["bash", "-c", f"ls {log_file}"], nothrow=True) + == log_file + "\n" + ) + + assert ( + instance.exec_in_container( + ["bash", "-c", f"ls {err_log_file}"], nothrow=True + ) + == err_log_file + "\n" + ) diff --git a/tests/integration/test_render_log_file_name_templates/configs/config-file-template.xml b/tests/integration/test_render_log_file_name_templates/configs/config-file-template.xml new file mode 100644 index 00000000000..ba408eb9823 --- /dev/null +++ b/tests/integration/test_render_log_file_name_templates/configs/config-file-template.xml @@ -0,0 +1,6 @@ + + + /var/log/clickhouse-server/clickhouse-server-%Y-%m.log + /var/log/clickhouse-server/clickhouse-server-%Y-%m.err.log + + diff --git a/tests/integration/test_render_log_file_name_templates/test.py b/tests/integration/test_render_log_file_name_templates/test.py new file mode 100644 index 00000000000..e69de29bb2d From af0de3d614c41ce24ad631b1e12328efb4dd444b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 6 Jul 2023 06:17:11 +0000 Subject: [PATCH 1271/1997] Small fixes --- tests/integration/test_multiple_disks/test.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index 4a934447345..5561d63840b 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -711,7 +711,7 @@ def test_background_move(start_cluster, name, engine): s1 String ) ENGINE = {engine} ORDER BY tuple() - SETTINGS storage_policy='moving_jbod_with_external, max_replicated_merges_in_queue=0' + SETTINGS storage_policy='moving_jbod_with_external', max_replicated_merges_in_queue=0 """ ) @@ -735,8 +735,6 @@ def test_background_move(start_cluster, name, engine): assert first_part is not None - used_disks = get_used_disks_for_table(node1, name) - retry = 20 i = 0 # multiple moves can be assigned in parallel so we can move later parts before the oldest @@ -745,9 +743,6 @@ def test_background_move(start_cluster, name, engine): time.sleep(0.5) i += 1 - used_disks = get_used_disks_for_table(node1, name) - assert sum(1 for x in used_disks if x == "jbod1") <= 2 - # first (oldest) part was moved to external assert get_disk_for_part(node1, name, first_part) == "external" @@ -861,9 +856,6 @@ def test_start_stop_moves(start_cluster, name, engine): # first (oldest) part moved to external assert get_disk_for_part(node1, name, first_part) == "external" - used_disks = get_used_disks_for_table(node1, name) - assert sum(1 for x in used_disks if x == "jbod1") <= 2 - node1.query(f"SYSTEM START MERGES {name}") finally: node1.query_with_retry(f"DROP TABLE IF EXISTS {name} SYNC") From 085f7caccffa20717ac6d96e13a5e8baae84db98 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 6 Jul 2023 06:30:47 +0000 Subject: [PATCH 1272/1997] Move config changes after configure --- docker/test/upgrade/run.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 82a88272df9..b8061309342 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -67,6 +67,13 @@ start stop mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log +# Start server from previous release +# Let's enable S3 storage by default +export USE_S3_STORAGE_FOR_MERGE_TREE=1 +# Previous version may not be ready for fault injections +export ZOOKEEPER_FAULT_INJECTION=0 +configure + # force_sync=false doesn't work correctly on some older versions sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ | sed "s|false|true|" \ @@ -81,13 +88,6 @@ mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/cli sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml -# Start server from previous release -# Let's enable S3 storage by default -export USE_S3_STORAGE_FOR_MERGE_TREE=1 -# Previous version may not be ready for fault injections -export ZOOKEEPER_FAULT_INJECTION=0 -configure - # it contains some new settings, but we can safely remove it rm /etc/clickhouse-server/config.d/merge_tree.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml From b9fffacc653fb9175af03cbb8f53766b0272ddbc Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 6 Jul 2023 06:31:09 +0000 Subject: [PATCH 1273/1997] Fix build --- src/Storages/StorageMongoDB.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 45b8aceb058..21543541f36 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -186,9 +186,9 @@ private: /// MongoDB does not support UInt64 type, so just cast it to Int64 if (which.isNativeUInt()) - document.add(name, static_cast(column.getUInt(idx))); + document.add(name, static_cast(column.getUInt(idx))); else if (which.isNativeInt()) - document.add(name, static_cast(column.getInt(idx))); + document.add(name, static_cast(column.getInt(idx))); else if (which.isFloat32()) document.add(name, static_cast(column.getFloat32(idx))); else if (which.isFloat64()) From 1336a9ec6770ae709f956a019c778346b8475162 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Thu, 6 Jul 2023 09:09:55 +0200 Subject: [PATCH 1274/1997] Better naming --- .../operations/system-tables/jemalloc_bins.md | 30 +++++++++---------- src/Storages/System/StorageSystemJemalloc.cpp | 4 +-- .../02810_system_jemalloc_bins.sql | 4 +-- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/docs/en/operations/system-tables/jemalloc_bins.md b/docs/en/operations/system-tables/jemalloc_bins.md index dfe2ddb01e2..06d9ba57dfc 100644 --- a/docs/en/operations/system-tables/jemalloc_bins.md +++ b/docs/en/operations/system-tables/jemalloc_bins.md @@ -11,8 +11,8 @@ Columns: - `index` (UInt64) — Index of the bin ordered by size - `large` (Bool) — True for large allocations and False for small - `size` (UInt64) — Size of allocations in this bin -- `nmalloc` (UInt64) — Number of allocations -- `ndalloc` (UInt64) — Number of deallocations +- `allocations` (UInt64) — Number of allocations +- `deallocations` (UInt64) — Number of deallocations **Example** @@ -21,7 +21,7 @@ Find the sizes of allocations that contributed the most to the current overall m ``` sql SELECT *, - nmalloc - ndalloc AS active_allocations, + allocations - deallocations AS active_allocations, size * active_allocations AS allocated_bytes FROM system.jemalloc_bins WHERE allocated_bytes > 0 @@ -30,16 +30,16 @@ LIMIT 10 ``` ``` text -┌─index─┬─large─┬─────size─┬──nmalloc─┬──ndalloc─┬─active_allocations─┬─allocated_bytes─┐ -│ 82 │ 1 │ 50331648 │ 1 │ 0 │ 1 │ 50331648 │ -│ 10 │ 0 │ 192 │ 512336 │ 370710 │ 141626 │ 27192192 │ -│ 69 │ 1 │ 5242880 │ 6 │ 2 │ 4 │ 20971520 │ -│ 3 │ 0 │ 48 │ 16938224 │ 16559484 │ 378740 │ 18179520 │ -│ 28 │ 0 │ 4096 │ 122924 │ 119142 │ 3782 │ 15491072 │ -│ 61 │ 1 │ 1310720 │ 44569 │ 44558 │ 11 │ 14417920 │ -│ 39 │ 1 │ 28672 │ 1285 │ 913 │ 372 │ 10665984 │ -│ 4 │ 0 │ 64 │ 2837225 │ 2680568 │ 156657 │ 10026048 │ -│ 6 │ 0 │ 96 │ 2617803 │ 2531435 │ 86368 │ 8291328 │ -│ 36 │ 1 │ 16384 │ 22431 │ 21970 │ 461 │ 7553024 │ -└───────┴───────┴──────────┴──────────┴──────────┴────────────────────┴─────────────────┘ +┌─index─┬─large─┬─────size─┬─allocactions─┬─deallocations─┬─active_allocations─┬─allocated_bytes─┐ +│ 82 │ 1 │ 50331648 │ 1 │ 0 │ 1 │ 50331648 │ +│ 10 │ 0 │ 192 │ 512336 │ 370710 │ 141626 │ 27192192 │ +│ 69 │ 1 │ 5242880 │ 6 │ 2 │ 4 │ 20971520 │ +│ 3 │ 0 │ 48 │ 16938224 │ 16559484 │ 378740 │ 18179520 │ +│ 28 │ 0 │ 4096 │ 122924 │ 119142 │ 3782 │ 15491072 │ +│ 61 │ 1 │ 1310720 │ 44569 │ 44558 │ 11 │ 14417920 │ +│ 39 │ 1 │ 28672 │ 1285 │ 913 │ 372 │ 10665984 │ +│ 4 │ 0 │ 64 │ 2837225 │ 2680568 │ 156657 │ 10026048 │ +│ 6 │ 0 │ 96 │ 2617803 │ 2531435 │ 86368 │ 8291328 │ +│ 36 │ 1 │ 16384 │ 22431 │ 21970 │ 461 │ 7553024 │ +└───────┴───────┴──────────┴──────────────┴───────────────┴────────────────────┴─────────────────┘ ``` diff --git a/src/Storages/System/StorageSystemJemalloc.cpp b/src/Storages/System/StorageSystemJemalloc.cpp index 2cb666eb5c3..4348349ebbc 100644 --- a/src/Storages/System/StorageSystemJemalloc.cpp +++ b/src/Storages/System/StorageSystemJemalloc.cpp @@ -95,8 +95,8 @@ NamesAndTypesList StorageSystemJemallocBins::getNamesAndTypes() { "index", std::make_shared() }, { "large", std::make_shared() }, { "size", std::make_shared() }, - { "nmalloc", std::make_shared() }, - { "ndalloc", std::make_shared() }, + { "allocations", std::make_shared() }, + { "deallocations", std::make_shared() }, }; } diff --git a/tests/queries/0_stateless/02810_system_jemalloc_bins.sql b/tests/queries/0_stateless/02810_system_jemalloc_bins.sql index 8ecf47e51b5..03062e70aa3 100644 --- a/tests/queries/0_stateless/02810_system_jemalloc_bins.sql +++ b/tests/queries/0_stateless/02810_system_jemalloc_bins.sql @@ -3,8 +3,8 @@ WITH (SELECT count() FROM system.jemalloc_bins) AS total_bins, (SELECT count() FROM system.jemalloc_bins WHERE large) AS large_bins, (SELECT count() FROM system.jemalloc_bins WHERE NOT large) AS small_bins, - (SELECT sum(size * (nmalloc - ndalloc)) FROM system.jemalloc_bins WHERE large) AS large_allocated_bytes, - (SELECT sum(size * (nmalloc - ndalloc)) FROM system.jemalloc_bins WHERE NOT large) AS small_allocated_bytes + (SELECT sum(size * (allocations - deallocations)) FROM system.jemalloc_bins WHERE large) AS large_allocated_bytes, + (SELECT sum(size * (allocations - deallocations)) FROM system.jemalloc_bins WHERE NOT large) AS small_allocated_bytes SELECT (total_bins > 0) = jemalloc_enabled, (large_bins > 0) = jemalloc_enabled, From 32f5a7830229b53df80f9e788b860066a4a86947 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 6 Jul 2023 07:32:46 +0000 Subject: [PATCH 1275/1997] Fix setting name --- docs/en/interfaces/formats.md | 2 +- docs/en/operations/settings/settings-formats.md | 2 +- docs/ru/interfaces/formats.md | 2 +- docs/ru/operations/settings/settings.md | 2 +- src/Core/Settings.h | 2 +- src/Formats/FormatFactory.cpp | 2 +- src/Formats/FormatSettings.h | 2 +- src/Processors/Formats/Impl/CSVRowInputFormat.cpp | 2 +- tests/queries/0_stateless/00301_csv.reference | 4 ++-- tests/queries/0_stateless/00301_csv.sh | 8 ++++---- 10 files changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 34f9abb91d4..ed2f010a632 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -471,7 +471,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`. -- [input_format_csv_allow_variable_number_of_colums](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_colums) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`. +- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 43e410ceee8..3eea5ef4ad9 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -931,7 +931,7 @@ Result ```text " string " ``` -### input_format_csv_allow_variable_number_of_colums {#input_format_csv_allow_variable_number_of_colums} +### input_format_csv_allow_variable_number_of_columns {#input_format_csv_allow_variable_number_of_columns} ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index e7c57fff749..e232b63f049 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -402,7 +402,7 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR - [input_format_csv_skip_first_lines](../operations/settings/settings.md#input_format_csv_skip_first_lines) - пропустить указанное количество строк в начале данных. Значение по умолчанию - `0`. - [input_format_csv_detect_header](../operations/settings/settings.md#input_format_csv_detect_header) - обнаружить заголовок с именами и типами в формате CSV. Значение по умолчанию - `true`. - [input_format_csv_trim_whitespaces](../operations/settings/settings.md#input_format_csv_trim_whitespaces) - удалить пробелы и символы табуляции из строк без кавычек. Значение по умолчанию - `true`. -- [input_format_csv_allow_variable_number_of_colums](../operations/settings/settings.md/#input_format_csv_allow_variable_number_of_colums) - игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается) и рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. Значение по умолчанию - `false`. +- [input_format_csv_allow_variable_number_of_columns](../operations/settings/settings.md/#input_format_csv_allow_variable_number_of_columns) - игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается) и рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. Значение по умолчанию - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index ddc101c6991..42e21f6140b 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1727,7 +1727,7 @@ echo ' string ' | ./clickhouse local -q "select * from table FORMAT CSV" --in " string " ``` -## input_format_csv_allow_variable_number_of_colums {#input_format_csv_allow_variable_number_of_colums} +## input_format_csv_allow_variable_number_of_columns {#input_format_csv_allow_variable_number_of_columns} Игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается) и рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index df2a916b7cf..7f8a52c69fa 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1009,7 +1009,7 @@ class IColumn; M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \ \ M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \ - M(Bool, input_format_csv_allow_variable_number_of_colums, false, "Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values", 0) \ + M(Bool, input_format_csv_allow_variable_number_of_columns, false, "Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values", 0) \ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index af9823dde73..182abc84ffe 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -72,7 +72,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.skip_trailing_empty_lines = settings.input_format_csv_skip_trailing_empty_lines; format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces; format_settings.csv.allow_whitespace_or_tab_as_delimiter = settings.input_format_csv_allow_whitespace_or_tab_as_delimiter; - format_settings.csv.allow_variable_number_of_colums = settings.input_format_csv_allow_variable_number_of_colums; + format_settings.csv.allow_variable_number_of_columns = settings.input_format_csv_allow_variable_number_of_columns; format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter; format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter; format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 653578f8496..dd4608227d0 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -140,7 +140,7 @@ struct FormatSettings bool skip_trailing_empty_lines = false; bool trim_whitespaces = true; bool allow_whitespace_or_tab_as_delimiter = false; - bool allow_variable_number_of_colums = false; + bool allow_variable_number_of_columns = false; } csv; struct HiveText diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 57e05ae7cd3..60f1cbe1f80 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -285,7 +285,7 @@ bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) bool CSVFormatReader::allowVariableNumberOfColumns() { - return format_settings.csv.allow_variable_number_of_colums; + return format_settings.csv.allow_variable_number_of_columns; } bool CSVFormatReader::readField( diff --git a/tests/queries/0_stateless/00301_csv.reference b/tests/queries/0_stateless/00301_csv.reference index 804ccf0c713..ec8c5f2b371 100644 --- a/tests/queries/0_stateless/00301_csv.reference +++ b/tests/queries/0_stateless/00301_csv.reference @@ -14,14 +14,14 @@ default-eof 1 2019-06-19 2016-01-01 01:02:03 NUL 2016-01-02 01:02:03 Nhello \N \N -=== Test input_format_csv_ignore_extra_columns +=== Test ignore extra columns Hello 1 String1 Hello 2 String2 Hello 3 String3 Hello 4 String4 Hello 5 String5 Hello 6 String6 -=== Test input_format_csv_missing_as_default +=== Test missing as default 0 0 33 \N 55 Default 0 0 33 \N 55 Default Hello 0 0 33 \N 55 Default diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index 7657745e9f7..776bd39fc03 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -41,7 +41,7 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s NULLS LAST"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; -echo === Test input_format_csv_ignore_extra_columns +echo === Test ignore extra columns $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 3, d String DEFAULT 'String4') ENGINE = Memory"; echo '"Hello", 1, "String1" @@ -50,12 +50,12 @@ echo '"Hello", 1, "String1" "Hello", 4, , "2016-01-14" "Hello", 5, "String5", "2016-01-15", "2016-01-16" "Hello", 6, "String6" , "line with a -break"' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_empty_as_default=1 --input_format_csv_ignore_extra_columns=1 --query="INSERT INTO csv FORMAT CSV"; +break"' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_empty_as_default=1 --input_format_csv_allow_variable_number_of_columns=1 --query="INSERT INTO csv FORMAT CSV"; $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s, n"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; -echo === Test input_format_csv_missing_as_default +echo === Test missing as default $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (f1 String, f2 UInt64, f3 UInt256, f4 UInt64 Default 33, f5 Nullable(UInt64), f6 Nullable(UInt64) Default 55, f7 String DEFAULT 'Default') ENGINE = Memory"; echo ' @@ -65,6 +65,6 @@ echo ' "Hello", 1, 3, 2 "Hello",1,4,2,3,4,"String" "Hello", 1, 4, 2, 3, 4, "String" -"Hello", 1, 5, 2, 3, 4, "String",'| $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_missing_as_default=1 --query="INSERT INTO csv FORMAT CSV"; +"Hello", 1, 5, 2, 3, 4, "String",'| $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_allow_variable_number_of_columns=1 --query="INSERT INTO csv FORMAT CSV"; $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY f1, f2, f3, f4, f5 NULLS FIRST, f6, f7"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; From 24e77083b38fbfdbec0d5a6fa8da65cb6a33a602 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 6 Jul 2023 09:50:44 +0000 Subject: [PATCH 1276/1997] Commit tests --- src/Parsers/ParserCreateQuery.cpp | 3 +- .../02811_primary_key_in_columns.reference | 0 .../02811_primary_key_in_columns.sql | 50 +++++++++---------- 3 files changed, 27 insertions(+), 26 deletions(-) create mode 100644 tests/queries/0_stateless/02811_primary_key_in_columns.reference diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 1941bafab0d..60e15cb92f4 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -311,7 +311,7 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E if(!primary_key_from_columns) primary_key_from_columns = makeASTFunction("tuple"); auto column_identifier = std::make_shared(cd->name); - primary_key_from_columns->children.push_back(column_identifier); + primary_key_from_columns->children[0]->as()->children.push_back(column_identifier); } columns->children.push_back(elem); } @@ -710,6 +710,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); query->storage->primary_key = query->columns_list->primary_key; + } if (query->columns_list && (query->columns_list->primary_key_from_columns)) diff --git a/tests/queries/0_stateless/02811_primary_key_in_columns.reference b/tests/queries/0_stateless/02811_primary_key_in_columns.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02811_primary_key_in_columns.sql b/tests/queries/0_stateless/02811_primary_key_in_columns.sql index df25fdd14ab..0519f4c820b 100644 --- a/tests/queries/0_stateless/02811_primary_key_in_columns.sql +++ b/tests/queries/0_stateless/02811_primary_key_in_columns.sql @@ -23,39 +23,39 @@ DROP TABLE IF EXISTS pk_test21; DROP TABLE IF EXISTS pk_test22; DROP TABLE IF EXISTS pk_test23; -SET default_table_engine=MergeTree; +SET default_table_engine='MergeTree'; -CREATE TABLE pk_test1 (String a PRIMARY KEY, String b, String c); -CREATE TABLE pk_test2 (String a PRIMARY KEY, String b PRIMARY KEY, String c); -CREATE TABLE pk_test3 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY); +CREATE TABLE pk_test1 (a String PRIMARY KEY, b String, c String); +CREATE TABLE pk_test2 (a String PRIMARY KEY, b String PRIMARY KEY, c String); +CREATE TABLE pk_test3 (a String PRIMARY KEY, b String PRIMARY KEY, c String PRIMARY KEY); -CREATE TABLE pk_test4 (String a, String b PRIMARY KEY, String c PRIMARY KEY); -CREATE TABLE pk_test5 (String a, String b PRIMARY KEY, String c); -CREATE TABLE pk_test6 (String a, String b, String c PRIMARY KEY); +CREATE TABLE pk_test4 (a String, b String PRIMARY KEY, c String PRIMARY KEY); +CREATE TABLE pk_test5 (a String, b String PRIMARY KEY, c String); +CREATE TABLE pk_test6 (a String, b String, c String PRIMARY KEY); -CREATE TABLE pk_test7 (String a PRIMARY KEY, String b, String c, PRIMARY KEY (a)); -CREATE TABLE pk_test8 (String a PRIMARY KEY, String b PRIMARY KEY, String c, PRIMARY KEY (a)); -CREATE TABLE pk_test9 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY, PRIMARY KEY (a)); +CREATE TABLE pk_test7 (a String PRIMARY KEY, b String, c String, PRIMARY KEY (a)); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test8 (a String PRIMARY KEY, b String PRIMARY KEY, c String, PRIMARY KEY (a)); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test9 (a String PRIMARY KEY, b String PRIMARY KEY, c String PRIMARY KEY, PRIMARY KEY (a)); -- { clientError BAD_ARGUMENTS } -CREATE TABLE pk_test10 (String a, String b PRIMARY KEY, String c PRIMARY KEY, PRIMARY KEY (a)); -CREATE TABLE pk_test11 (String a, String b PRIMARY KEY, String c, PRIMARY KEY (a)); -CREATE TABLE pk_test12 (String a, String b, String c PRIMARY KEY, PRIMARY KEY (a)); +CREATE TABLE pk_test10 (a String, b String PRIMARY KEY, c String PRIMARY KEY, PRIMARY KEY (a)); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test11 (a String, b String PRIMARY KEY, c String, PRIMARY KEY (a)); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test12 (a String, b String, c String PRIMARY KEY, PRIMARY KEY (a)); -- { clientError BAD_ARGUMENTS } -CREATE TABLE pk_test12 (String a PRIMARY KEY, String b, String c) PRIMARY KEY (a,b,c); -CREATE TABLE pk_test13 (String a PRIMARY KEY, String b PRIMARY KEY, String c) PRIMARY KEY (a,b,c); -CREATE TABLE pk_test14 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY) PRIMARY KEY (a,b,c); +CREATE TABLE pk_test12 (a String PRIMARY KEY, b String, c String) PRIMARY KEY (a,b,c); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test13 (a String PRIMARY KEY, b String PRIMARY KEY, c String) PRIMARY KEY (a,b,c); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test14 (a String PRIMARY KEY, b String PRIMARY KEY, c String PRIMARY KEY) PRIMARY KEY (a,b,c); -- { clientError BAD_ARGUMENTS } -CREATE TABLE pk_test15 (String a, String b PRIMARY KEY, String c PRIMARY KEY) PRIMARY KEY (a,b,c); -CREATE TABLE pk_test16 (String a, String b PRIMARY KEY, String c) PRIMARY KEY (a,b,c); -CREATE TABLE pk_test17 (String a, String b, String c PRIMARY KEY) PRIMARY KEY (a,b,c); +CREATE TABLE pk_test15 (a String, b String PRIMARY KEY, c String PRIMARY KEY) PRIMARY KEY (a,b,c); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test16 (a String, b String PRIMARY KEY, c String) PRIMARY KEY (a,b,c); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test17 (a String, b String, c String PRIMARY KEY) PRIMARY KEY (a,b,c); -- { clientError BAD_ARGUMENTS } -CREATE TABLE pk_test18 (String a PRIMARY KEY, String b, String c) ORDER BY (a,b,c); -CREATE TABLE pk_test19 (String a PRIMARY KEY, String b PRIMARY KEY, String c) ORDER BY (a,b,c); -CREATE TABLE pk_test20 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY) ORDER BY (a,b,c); +CREATE TABLE pk_test18 (a String PRIMARY KEY, b String, c String) ORDER BY (a,b,c); +CREATE TABLE pk_test19 (a String PRIMARY KEY, b String PRIMARY KEY, c String) ORDER BY (a,b,c); +CREATE TABLE pk_test20 (a String PRIMARY KEY, b String PRIMARY KEY, c String PRIMARY KEY) ORDER BY (a,b,c); -CREATE TABLE pk_test21 (String a, String b PRIMARY KEY, String c PRIMARY KEY) ORDER BY (a,b,c); -CREATE TABLE pk_test22 (String a, String b PRIMARY KEY, String c) ORDER BY (a,b,c); -CREATE TABLE pk_test23 (String a, String b, String c PRIMARY KEY) ORDER BY (a,b,c); +CREATE TABLE pk_test21 (a String, b String PRIMARY KEY, c String PRIMARY KEY) ORDER BY (a,b,c); -- { serverError BAD_ARGUMENTS } +CREATE TABLE pk_test22 (a String, b String PRIMARY KEY, c String) ORDER BY (a,b,c); -- { serverError BAD_ARGUMENTS } +CREATE TABLE pk_test23 (a String, b String, c String PRIMARY KEY) ORDER BY (a,b,c); -- { serverError BAD_ARGUMENTS } DROP TABLE IF EXISTS pk_test1; DROP TABLE IF EXISTS pk_test2; From e80f2a0acc91d9003880f4054f05b3e23b9a2679 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 6 Jul 2023 09:55:30 +0000 Subject: [PATCH 1277/1997] Define default_table_engine in sqllogictest --- tests/sqllogic/connection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/sqllogic/connection.py b/tests/sqllogic/connection.py index a9976a7beca..5e2634787d8 100644 --- a/tests/sqllogic/connection.py +++ b/tests/sqllogic/connection.py @@ -62,6 +62,7 @@ def default_clickhouse_odbc_conn_str(): return str( OdbcConnectingArgs.create_from_kw( dsn="ClickHouse DSN (ANSI)", + Url="http://localhost:8123/query?default_format=ODBCDriver2&default_table_engine=MergeTree" ) ) From 12ebb30781e8427a1e797464c3bd4675787c87e9 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 6 Jul 2023 10:34:34 +0000 Subject: [PATCH 1278/1997] style --- src/Parsers/ParserCreateQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 60e15cb92f4..c4c02ab7417 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -306,9 +306,9 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E { if (auto *cd = elem->as()) { - if(cd->primary_key_specifier) + if (cd->primary_key_specifier) { - if(!primary_key_from_columns) + if (!primary_key_from_columns) primary_key_from_columns = makeASTFunction("tuple"); auto column_identifier = std::make_shared(cd->name); primary_key_from_columns->children[0]->as()->children.push_back(column_identifier); From c7ccf23a24a7fb2bb1245b76fc9169649cd474c3 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 6 Jul 2023 10:44:06 +0000 Subject: [PATCH 1279/1997] Update CREATE TABLE docs --- .../mergetree-family/mergetree.md | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 67043ef1062..4f506126682 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -37,8 +37,8 @@ The [Merge](/docs/en/engines/table-engines/special/merge.md/#merge) engine does ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2], + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [TTL expr1] [CODEC(codec1)] [[NOT] NULL|PRIMARY KEY], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [TTL expr2] [CODEC(codec2)] [[NOT] NULL|PRIMARY KEY], ... INDEX index_name1 expr1 TYPE type1(...) [GRANULARITY value1], INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2], @@ -439,41 +439,41 @@ Syntax: `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, - `number_of_hash_functions` — The number of hash functions used in the Bloom filter. - `random_seed` — The seed for Bloom filter hash functions. -Users can create [UDF](/docs/en/sql-reference/statements/create/function.md) to estimate the parameters set of `ngrambf_v1`. Query statements are as follows: +Users can create [UDF](/docs/en/sql-reference/statements/create/function.md) to estimate the parameters set of `ngrambf_v1`. Query statements are as follows: ```sql -CREATE FUNCTION bfEstimateFunctions [ON CLUSTER cluster] -AS -(total_nubmer_of_all_grams, size_of_bloom_filter_in_bits) -> round((size_of_bloom_filter_in_bits / total_nubmer_of_all_grams) * log(2)); - -CREATE FUNCTION bfEstimateBmSize [ON CLUSTER cluster] -AS -(total_nubmer_of_all_grams, probability_of_false_positives) -> ceil((total_nubmer_of_all_grams * log(probability_of_false_positives)) / log(1 / pow(2, log(2)))); - -CREATE FUNCTION bfEstimateFalsePositive [ON CLUSTER cluster] -AS -(total_nubmer_of_all_grams, number_of_hash_functions, size_of_bloom_filter_in_bytes) -> pow(1 - exp(-number_of_hash_functions/ (size_of_bloom_filter_in_bytes / total_nubmer_of_all_grams)), number_of_hash_functions); - -CREATE FUNCTION bfEstimateGramNumber [ON CLUSTER cluster] -AS +CREATE FUNCTION bfEstimateFunctions [ON CLUSTER cluster] +AS +(total_nubmer_of_all_grams, size_of_bloom_filter_in_bits) -> round((size_of_bloom_filter_in_bits / total_nubmer_of_all_grams) * log(2)); + +CREATE FUNCTION bfEstimateBmSize [ON CLUSTER cluster] +AS +(total_nubmer_of_all_grams, probability_of_false_positives) -> ceil((total_nubmer_of_all_grams * log(probability_of_false_positives)) / log(1 / pow(2, log(2)))); + +CREATE FUNCTION bfEstimateFalsePositive [ON CLUSTER cluster] +AS +(total_nubmer_of_all_grams, number_of_hash_functions, size_of_bloom_filter_in_bytes) -> pow(1 - exp(-number_of_hash_functions/ (size_of_bloom_filter_in_bytes / total_nubmer_of_all_grams)), number_of_hash_functions); + +CREATE FUNCTION bfEstimateGramNumber [ON CLUSTER cluster] +AS (number_of_hash_functions, probability_of_false_positives, size_of_bloom_filter_in_bytes) -> ceil(size_of_bloom_filter_in_bytes / (-number_of_hash_functions / log(1 - exp(log(probability_of_false_positives) / number_of_hash_functions)))) -``` +``` To use those functions,we need to specify two parameter at least. -For example, if there 4300 ngrams in the granule and we expect false positives to be less than 0.0001. The other parameters can be estimated by executing following queries: - +For example, if there 4300 ngrams in the granule and we expect false positives to be less than 0.0001. The other parameters can be estimated by executing following queries: + ```sql --- estimate number of bits in the filter -SELECT bfEstimateBmSize(4300, 0.0001) / 8 as size_of_bloom_filter_in_bytes; +SELECT bfEstimateBmSize(4300, 0.0001) / 8 as size_of_bloom_filter_in_bytes; ┌─size_of_bloom_filter_in_bytes─┐ │ 10304 │ └───────────────────────────────┘ - + --- estimate number of hash functions SELECT bfEstimateFunctions(4300, bfEstimateBmSize(4300, 0.0001)) as number_of_hash_functions - + ┌─number_of_hash_functions─┐ │ 13 │ └──────────────────────────┘ @@ -991,7 +991,7 @@ use a local disk to cache data from a table stored at a URL. Neither the cache d nor the web storage is configured in the ClickHouse configuration files; both are configured in the CREATE/ATTACH query settings. -In the settings highlighted below notice that the disk of `type=web` is nested within +In the settings highlighted below notice that the disk of `type=web` is nested within the disk of `type=cache`. ```sql @@ -1308,7 +1308,7 @@ configuration file. In this sample configuration: - the disk is of type `web` - the data is hosted at `http://nginx:80/test1/` -- a cache on local storage is used +- a cache on local storage is used ```xml From 86fc70223693db8aac9edfa7c85e7e80286042ec Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 6 Jul 2023 15:14:18 +0300 Subject: [PATCH 1280/1997] Add skipWhitespacesAndTabs() Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> --- src/Processors/Formats/Impl/CSVRowInputFormat.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 60f1cbe1f80..79ce2549b4d 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -354,6 +354,7 @@ bool CSVFormatReader::checkForSuffix() bool CSVFormatReader::checkForEndOfRow() { + skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter); return buf->eof() || *buf->position() == '\n' || *buf->position() == '\r'; } From c23e29d6aa836980337683800c6c2b029cfb7c40 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 20 Jun 2023 20:27:56 +0200 Subject: [PATCH 1281/1997] don't account session's memory in thread/user mem tracker --- src/Common/MemoryTrackerSwitcher.h | 42 ++++++++++++++++++++ src/IO/HTTPCommon.cpp | 4 ++ src/Interpreters/AsynchronousInsertQueue.cpp | 2 +- src/Interpreters/AsynchronousInsertQueue.h | 35 +++------------- src/Server/InterserverIOHTTPHandler.cpp | 1 + 5 files changed, 54 insertions(+), 30 deletions(-) create mode 100644 src/Common/MemoryTrackerSwitcher.h diff --git a/src/Common/MemoryTrackerSwitcher.h b/src/Common/MemoryTrackerSwitcher.h new file mode 100644 index 00000000000..0fefcbb280a --- /dev/null +++ b/src/Common/MemoryTrackerSwitcher.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +struct MemoryTrackerSwitcher +{ + explicit MemoryTrackerSwitcher(MemoryTracker * new_tracker) + { + if (!current_thread) + throw Exception(ErrorCodes::LOGICAL_ERROR, "current_thread is not initialized"); + + auto * thread_tracker = CurrentThread::getMemoryTracker(); + prev_untracked_memory = current_thread->untracked_memory; + prev_memory_tracker_parent = thread_tracker->getParent(); + + current_thread->untracked_memory = 0; + thread_tracker->setParent(new_tracker); + } + + ~MemoryTrackerSwitcher() + { + CurrentThread::flushUntrackedMemory(); + auto * thread_tracker = CurrentThread::getMemoryTracker(); + + current_thread->untracked_memory = prev_untracked_memory; + thread_tracker->setParent(prev_memory_tracker_parent); + } + + MemoryTracker * prev_memory_tracker_parent = nullptr; + Int64 prev_untracked_memory = 0; +}; + +} diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index f3e2064c8bf..1731b4022ea 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -107,6 +108,9 @@ namespace ObjectPtr allocObject() override { + /// Pool is global, we shouldn't attribute this memory to query/user. + MemoryTrackerSwitcher switcher{&total_memory_tracker}; + auto session = makeHTTPSessionImpl(host, port, https, true, resolve_host); if (!proxy_host.empty()) { diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index dc2310cfebf..e6417de53b4 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -125,7 +125,7 @@ void AsynchronousInsertQueue::InsertData::Entry::finish(std::exception_ptr excep // Entries data must be destroyed in context of user who runs async insert. // Each entry in the list may correspond to a different user, // so we need to switch current thread's MemoryTracker. - UserMemoryTrackerSwitcher switcher(user_memory_tracker); + MemoryTrackerSwitcher switcher(user_memory_tracker); bytes = ""; } diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index bc60c86d067..f18db69a7bb 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -1,10 +1,12 @@ #pragma once -#include -#include -#include #include +#include #include +#include +#include +#include + #include namespace DB @@ -60,31 +62,6 @@ private: UInt128 calculateHash() const; }; - struct UserMemoryTrackerSwitcher - { - explicit UserMemoryTrackerSwitcher(MemoryTracker * new_tracker) - { - auto * thread_tracker = CurrentThread::getMemoryTracker(); - prev_untracked_memory = current_thread->untracked_memory; - prev_memory_tracker_parent = thread_tracker->getParent(); - - current_thread->untracked_memory = 0; - thread_tracker->setParent(new_tracker); - } - - ~UserMemoryTrackerSwitcher() - { - CurrentThread::flushUntrackedMemory(); - auto * thread_tracker = CurrentThread::getMemoryTracker(); - - current_thread->untracked_memory = prev_untracked_memory; - thread_tracker->setParent(prev_memory_tracker_parent); - } - - MemoryTracker * prev_memory_tracker_parent; - Int64 prev_untracked_memory; - }; - struct InsertData { struct Entry @@ -114,7 +91,7 @@ private: // so we need to switch current thread's MemoryTracker parent on each iteration. while (it != entries.end()) { - UserMemoryTrackerSwitcher switcher((*it)->user_memory_tracker); + MemoryTrackerSwitcher switcher((*it)->user_memory_tracker); it = entries.erase(it); } } diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp index ea71d954cc0..9741592868a 100644 --- a/src/Server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -80,6 +80,7 @@ void InterserverIOHTTPHandler::processQuery(HTTPServerRequest & request, HTTPSer void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { setThreadName("IntersrvHandler"); + ThreadStatus thread_status; /// In order to work keep-alive. if (request.getVersion() == HTTPServerRequest::HTTP_1_1) From aec720563612e3d7faa09bcb2c4b2cc4e5e8935c Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 3 Jul 2023 23:11:32 +0200 Subject: [PATCH 1282/1997] rework pool usage --- src/IO/HTTPCommon.cpp | 44 ++++++++----- src/IO/HTTPCommon.h | 12 ++++ src/IO/ReadBufferFromS3.cpp | 29 ++++++--- src/IO/ReadWriteBufferFromHTTP.cpp | 65 ++++++++----------- .../Formats/Impl/AvroRowInputFormat.cpp | 22 +++---- 5 files changed, 95 insertions(+), 77 deletions(-) diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 1731b4022ea..2f5e0a172a0 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -41,6 +42,7 @@ namespace ErrorCodes extern const int RECEIVED_ERROR_TOO_MANY_REQUESTS; extern const int FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME; extern const int UNSUPPORTED_URI_SCHEME; + extern const int LOGICAL_ERROR; } @@ -271,27 +273,17 @@ namespace auto retry_timeout = timeouts.connection_timeout.totalMicroseconds(); auto session = pool_ptr->second->get(retry_timeout); - /// We store exception messages in session data. - /// Poco HTTPSession also stores exception, but it can be removed at any time. const auto & session_data = session->sessionData(); - if (!session_data.empty()) + if (session_data.empty() || !Poco::AnyCast(&session_data)) { - auto msg = Poco::AnyCast(session_data); - if (!msg.empty()) - { - LOG_TRACE((&Poco::Logger::get("HTTPCommon")), "Failed communicating with {} with error '{}' will try to reconnect session", host, msg); + session->reset(); - if (resolve_host) - { - updateHostIfIpChanged(session, DNSResolver::instance().resolveHost(host).toString()); - } - } - /// Reset the message, once it has been printed, - /// otherwise you will get report for failed parts on and on, - /// even for different tables (since they uses the same session). - session->attachSessionData({}); + if (resolve_host) + updateHostIfIpChanged(session, DNSResolver::instance().resolveHost(host).toString()); } + session->attachSessionData({}); + setTimeouts(*session, timeouts); return session; @@ -388,4 +380,24 @@ Exception HTTPException::makeExceptionMessage( uri, static_cast(http_status), reason, body); } +void markSessionForReuse(Poco::Net::HTTPSession & session) +{ + const auto & session_data = session.sessionData(); + if (!session_data.empty() && !Poco::AnyCast(&session_data)) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Data of an unexpected type ({}) is attached to the session", session_data.type().name()); + + session.attachSessionData(HTTPSessionReuseTag{}); +} + +void markSessionForReuse(HTTPSessionPtr session) +{ + markSessionForReuse(*session); +} + +void markSessionForReuse(PooledHTTPSessionPtr session) +{ + markSessionForReuse(static_cast(*session)); +} + } diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h index db8fc2a2a40..4733f366c8a 100644 --- a/src/IO/HTTPCommon.h +++ b/src/IO/HTTPCommon.h @@ -55,6 +55,18 @@ private: using PooledHTTPSessionPtr = PoolBase::Entry; // SingleEndpointHTTPSessionPool::Entry using HTTPSessionPtr = std::shared_ptr; +/// If a session have this tag attached, it will be reused without calling `reset()` on it. +/// All pooled sessions don't have this tag attached after being taken from a pool. +/// If the request and the response were fully written/read, the client code should add this tag +/// explicitly by calling `markSessionForReuse()`. +struct HTTPSessionReuseTag +{ +}; + +void markSessionForReuse(HTTPSessionPtr session); +void markSessionForReuse(PooledHTTPSessionPtr session); + + void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout); /// Create session object to perform requests and set required parameters. diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index fdbe1a4ba57..5c562d32fbc 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -1,3 +1,4 @@ +#include #include #include "config.h" @@ -35,31 +36,41 @@ namespace ProfileEvents namespace { -void resetSession(Aws::S3::Model::GetObjectResult & read_result) +DB::PooledHTTPSessionPtr getSession(Aws::S3::Model::GetObjectResult & read_result) { if (auto * session_aware_stream = dynamic_cast *>(&read_result.GetBody())) - { - auto & session - = static_cast(*static_cast(session_aware_stream->getSession())); - session.reset(); - } + return static_cast(session_aware_stream->getSession()); else if (!dynamic_cast *>(&read_result.GetBody())) - { throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session of unexpected type encountered"); + return {}; +} + +void resetSession(Aws::S3::Model::GetObjectResult & read_result) +{ + if (auto session = getSession(read_result); !session.isNull()) + { + auto & http_session = static_cast(*session); + http_session.reset(); } } void resetSessionIfNeeded(bool read_all_range_successfully, std::optional & read_result) { - if (!read_all_range_successfully && read_result) + if (!read_result) + return; + + if (!read_all_range_successfully) { /// When we abandon a session with an ongoing GetObject request and there is another one trying to delete the same object this delete /// operation will hang until GetObject's session idle timeouts. So we have to call `reset()` on GetObject's session session immediately. resetSession(*read_result); ProfileEvents::increment(ProfileEvents::ReadBufferFromS3ResetSessions); } - else + else if (auto session = getSession(*read_result); !session.isNull()) + { + DB::markSessionForReuse(session); ProfileEvents::increment(ProfileEvents::ReadBufferFromS3PreservedSessions); + } } } diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index cf1159bfb4b..b834c17ab6c 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -1,5 +1,7 @@ #include "ReadWriteBufferFromHTTP.h" +#include + namespace ProfileEvents { extern const Event ReadBufferSeekCancelConnection; @@ -146,30 +148,20 @@ std::istream * ReadWriteBufferFromHTTPBase::callImpl( LOG_TRACE(log, "Sending request to {}", uri_.toString()); auto sess = current_session->getSession(); - try - { - auto & stream_out = sess->sendRequest(request); + auto & stream_out = sess->sendRequest(request); - if (out_stream_callback) - out_stream_callback(stream_out); + if (out_stream_callback) + out_stream_callback(stream_out); - auto result_istr = receiveResponse(*sess, request, response, true); - response.getCookies(cookies); + auto result_istr = receiveResponse(*sess, request, response, true); + response.getCookies(cookies); - /// we can fetch object info while the request is being processed - /// and we don't want to override any context used by it - if (!for_object_info) - content_encoding = response.get("Content-Encoding", ""); + /// we can fetch object info while the request is being processed + /// and we don't want to override any context used by it + if (!for_object_info) + content_encoding = response.get("Content-Encoding", ""); - return result_istr; - } - catch (const Poco::Exception & e) - { - /// We use session data storage as storage for exception text - /// Depend on it we can deduce to reconnect session or reresolve session host - sess->attachSessionData(e.message()); - throw; - } + return result_istr; } template @@ -429,23 +421,10 @@ void ReadWriteBufferFromHTTPBase::initialize() if (!read_range.end && response.hasContentLength()) file_info = parseFileInfo(response, withPartialContent(read_range) ? getOffset() : 0); - try - { - impl = std::make_unique(*istr, buffer_size); + impl = std::make_unique(*istr, buffer_size); - if (use_external_buffer) - { - setupExternalBuffer(); - } - } - catch (const Poco::Exception & e) - { - /// We use session data storage as storage for exception text - /// Depend on it we can deduce to reconnect session or reresolve session host - auto sess = session->getSession(); - sess->attachSessionData(e.message()); - throw; - } + if (use_external_buffer) + setupExternalBuffer(); } template @@ -460,7 +439,11 @@ bool ReadWriteBufferFromHTTPBase::nextImpl() if ((read_range.end && getOffset() > read_range.end.value()) || (file_info && file_info->file_size && getOffset() >= file_info->file_size.value())) + { + /// Response was fully read. + markSessionForReuse(session->getSession()); return false; + } if (impl) { @@ -582,7 +565,11 @@ bool ReadWriteBufferFromHTTPBase::nextImpl() std::rethrow_exception(exception); if (!result) + { + /// Eof is reached, i.e response was fully read. + markSessionForReuse(session->getSession()); return false; + } internal_buffer = impl->buffer(); working_buffer = internal_buffer; @@ -635,12 +622,14 @@ size_t ReadWriteBufferFromHTTPBase::readBigAt(char * to, si bool cancelled; size_t r = copyFromIStreamWithProgressCallback(*result_istr, to, n, progress_callback, &cancelled); + if (!cancelled) + /// Response was fully read. + markSessionForReuse(sess); + return r; } catch (const Poco::Exception & e) { - sess->attachSessionData(e.message()); - LOG_ERROR( log, "HTTP request (positioned) to `{}` with range [{}, {}) failed at try {}/{}: {}", diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 1ec7491658e..4cd73cb23b5 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -935,23 +935,17 @@ private: request.setHost(url.getHost()); auto session = makePooledHTTPSession(url, timeouts, 1); - std::istream * response_body{}; - try - { - session->sendRequest(request); + session->sendRequest(request); + + Poco::Net::HTTPResponse response; + std::istream * response_body = receiveResponse(*session, request, response, false); - Poco::Net::HTTPResponse response; - response_body = receiveResponse(*session, request, response, false); - } - catch (const Poco::Exception & e) - { - /// We use session data storage as storage for exception text - /// Depend on it we can deduce to reconnect session or reresolve session host - session->attachSessionData(e.message()); - throw; - } Poco::JSON::Parser parser; auto json_body = parser.parse(*response_body).extract(); + + /// Response was fully read. + markSessionForReuse(session); + auto schema = json_body->getValue("schema"); LOG_TRACE((&Poco::Logger::get("AvroConfluentRowInputFormat")), "Successfully fetched schema id = {}\n{}", id, schema); return avro::compileJsonSchemaFromString(schema); From 5a6957d95e46861f39bdb1c39e442951b1e26d47 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 6 Jul 2023 13:02:03 +0000 Subject: [PATCH 1283/1997] Disable ThinLTO on non-Linux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cross-compiling on Linux for Mac failed with CMake parameters -DCMAKE_BUILD_TYPE=None -DENABLE_CLICKHOUSE_SELF_EXTRACTING=1 -DENABLE_TESTS=0 (see below). This happened e.g. in #51243. The problem was that ThinLTO enabled/disabled depends on ENABLE_TESTS (see the top-level CMakeLists.txt). If ENABLE_TESTS=0 then ThinLTO is activated. On Linux, building/linking works with or without ThinLTO but on Mac building/linking the self-extracting compressor binary doesn’t work if ThinLTO is on. This is quite weird, as a workaround restrict ThinLTO to Linux. ------- [185/187] Linking CXX static library base/glibc-compatibility/libglibc-compatibility.a [186/187] Linking CXX static library contrib/zstd-cmake/lib_zstd.a [187/187] Linking CXX executable utils/self-extracting-executable/pre_compressor -- Configuring done -- Generating done -- Build files have been written to: /home/ubuntu/repo/ch4/build [0/2] Re-checking globbed directories... [108/108] Linking CXX executable utils/self-extracting-executable/pre_compressor FAILED: utils/self-extracting-executable/pre_compressor : && /usr/bin/clang++-16 --target=x86_64-apple-darwin -std=c++20 -fdiagnostics-color=always -Xclang -fuse-ctor-homing -Wno-enum-constexpr-conversion -fsized-deallocation -gdwarf-aranges -pipe -mssse3 -msse4.1 -msse4.2 -mpclmul -mpopcnt -fasynchronous-unwind-tables -ffile-prefix-map=/home/ubuntu/repo/ch4=. -falign-functions=32 -mbranches-within-32B-boundaries -stdlib=libc++ -fdiagnostics-absolute-paths -fstrict -vtable-pointers -Wall -Wextra -Wframe-larger-than=65536 -Weverything -Wpedantic -Wno-zero-length-array -Wno-c++98-compat-pedantic -Wno-c++98-compat -Wno-c++20-compat -Wno-sign-conversion -Wno-implicit-int-conversion -Wno-implicit-int-float-conversion -Wno-ctad-maybe-unsupported -Wno-disabled-macro-expansion -Wno-documentation-unknown-command -Wno-double-promotion -Wno-exit-time-destructors -Wno-float-equal -Wn o-global-constructors -Wno-missing-prototypes -Wno-missing-variable-declarations -Wno-padded -Wno-switch-enum -Wno-undefined-func-template -Wno-unused-template -Wno-vla -Wno-weak-template-vtables -Wno-weak-vtables -Wno-thread-safety-negative -Wno-enum-constexpr-conversion -Wno-unsafe-buffer-usage -O2 -g -DNDEBUG -O3 -g -gdwarf-4 -flto=thin -fwhole-program-vtables -isysroot /home/ubuntu/repo/ch4/cmake/darwin/.. /toolchain/darwin-x86_64 -mmacosx-version-min=10.15 -Wl,-headerpad_max_install_names --ld-path=/home/ubuntu/cctools/bin/x86_64-apple-darwin-ld -rdynamic -Wl,-U,_inside_main -flto=thin -fwhole-program-vtables utils/self-extracting-executable/CMakeFiles/pre_compressor.dir/compressor.cpp.o -o utils/self-extracting-executable/pre_compressor contrib/zstd-cmake/lib_zstd.a contrib/libcxx-cmake/libcxx.a contrib/lib cxxabi-cmake/libcxxabi.a -nodefaultlibs -lc -lm -lpthread -ldl && : clang: warning: argument unused during compilation: '-stdlib=libc++' [-Wunused-command-line-argument] ld: warning: ignoring file utils/self-extracting-executable/CMakeFiles/pre_compressor.dir/compressor.cpp.o, building for macOS-x86_64 but attempting to link with file built for unknown-unsupported file format ( 0xDE 0xC0 0x17 0x0B 0x00 0x00 0x00 0x00 0x14 0x00 0x00 0x00 0x88 0x3E 0x03 0x00 ) ld: warning: ignoring file contrib/zstd-cmake/lib_zstd.a, building for macOS-x86_64 but attempting to link with file built for macOS-x86_64 ld: warning: ignoring file contrib/libcxxabi-cmake/libcxxabi.a, building for macOS-x86_64 but attempting to link with file built for unknown-unsupported file format ( 0x21 0x3C 0x61 0x72 0x63 0x68 0x3E 0x0A 0x23 0x31 0x2F 0x31 0x32 0x20 0x20 0x20 ) ld: warning: ignoring file contrib/libcxx-cmake/libcxx.a, building for macOS-x86_64 but attempting to link with file built for unknown-unsupported file format ( 0x21 0x3C 0x61 0x72 0x63 0x68 0x3E 0x0A 0x23 0x31 0x2F 0x31 0x32 0x20 0x20 0x20 ) Undefined symbols for architecture x86_64: "_main", referenced from: implicit entry/start for main executable ld: symbol(s) not found for architecture x86_64 clang: error: linker command failed with exit code 1 (use -v to see invocation) ninja: build stopped: subcommand failed. --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d6ed75bb29..06ee98b5ee1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -344,9 +344,9 @@ if (COMPILER_CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-absolute-paths") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-absolute-paths") - if (NOT ENABLE_TESTS AND NOT SANITIZE) + if (NOT ENABLE_TESTS AND NOT SANITIZE AND OS_LINUX) # https://clang.llvm.org/docs/ThinLTO.html - # Applies to clang only. + # Applies to clang and linux only. # Disabled when building with tests or sanitizers. option(ENABLE_THINLTO "Clang-specific link time optimization" ON) endif() From 28332076054cc77660a4dbc3e13dcea1999a6342 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 6 Jul 2023 13:09:49 +0000 Subject: [PATCH 1284/1997] Edit tests to test last commit --- tests/queries/0_stateless/00301_csv.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index 776bd39fc03..80053c99a17 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -44,7 +44,7 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; echo === Test ignore extra columns $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 3, d String DEFAULT 'String4') ENGINE = Memory"; -echo '"Hello", 1, "String1" +echo '"Hello", 1, "String1" "Hello", 2, "String2", "Hello", 3, "String3", "2016-01-13" "Hello", 4, , "2016-01-14" From dee71d2e2f8cdd6be4a82f26e7af9b8a75453091 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 6 Jul 2023 13:16:31 +0000 Subject: [PATCH 1285/1997] Add first version of hasSubsequence() --- src/Functions/HasSubsequenceImpl.h | 131 ++++++++++++++++++ src/Functions/hasSubsequence.cpp | 29 ++++ .../hasSubsequenceCaseInsensitive.cpp | 28 ++++ src/Functions/like.cpp | 1 - .../02809_has_subsequence.reference | 16 +++ .../0_stateless/02809_has_subsequence.sql | 19 +++ 6 files changed, 223 insertions(+), 1 deletion(-) create mode 100644 src/Functions/HasSubsequenceImpl.h create mode 100644 src/Functions/hasSubsequence.cpp create mode 100644 src/Functions/hasSubsequenceCaseInsensitive.cpp create mode 100644 tests/queries/0_stateless/02809_has_subsequence.reference create mode 100644 tests/queries/0_stateless/02809_has_subsequence.sql diff --git a/src/Functions/HasSubsequenceImpl.h b/src/Functions/HasSubsequenceImpl.h new file mode 100644 index 00000000000..3a29ef68b0b --- /dev/null +++ b/src/Functions/HasSubsequenceImpl.h @@ -0,0 +1,131 @@ +#pragma once + + +namespace DB +{ +namespace +{ + +template +struct HasSubsequenceImpl +{ + using ResultType = UInt8; + + static constexpr bool use_default_implementation_for_constants = false; + static constexpr bool supports_start_pos = false; + static constexpr auto name = Name::name; + + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {};} + + /// Find one substring in many strings. + static void vectorConstant( + const ColumnString::Chars & /*haystack_data*/, + const ColumnString::Offsets & /*haystack_offsets*/, + const std::string & /*needle*/, + const ColumnPtr & /*start_pos*/, + PaddedPODArray & res, + [[maybe_unused]] ColumnUInt8 * /*res_null*/) + { + size_t size = res.size(); + for (size_t i = 0; i < size; ++i) + { + res[i] = 0; + } + } + + /// Search each time for a different single substring inside each time different string. + static void vectorVector( + const ColumnString::Chars & haystack_data, + const ColumnString::Offsets & haystack_offsets, + const ColumnString::Chars & needle_data, + const ColumnString::Offsets & needle_offsets, + const ColumnPtr & /*start_pos*/, + PaddedPODArray & res, + ColumnUInt8 * /*res_null*/) + { + ColumnString::Offset prev_haystack_offset = 0; + ColumnString::Offset prev_needle_offset = 0; + + size_t size = haystack_offsets.size(); + + for (size_t i = 0; i < size; ++i) + { + size_t needle_size = needle_offsets[i] - prev_needle_offset - 1; + size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1; + + if (0 == needle_size) + { + res[i] = 1; + } + else + { + const char * needle = reinterpret_cast(&needle_data[prev_needle_offset]); + const char * haystack = reinterpret_cast(&haystack_data[prev_haystack_offset]); + res[i] = impl(haystack, haystack_size, needle, needle_size); + } + + prev_haystack_offset = haystack_offsets[i]; + prev_needle_offset = needle_offsets[i]; + } + } + + /// Find many substrings in single string. + static void constantVector( + const String & /*haystack*/, + const ColumnString::Chars & /*needle_data*/, + const ColumnString::Offsets & needle_offsets, + const ColumnPtr & /*start_pos*/, + PaddedPODArray & res, + ColumnUInt8 * /*res_null*/) + { + size_t size = needle_offsets.size(); + + for (size_t i = 0; i < size; ++i) + { + res[i] = 0; + } + } + + static UInt8 impl(const char * haystack, size_t haystack_size, const char * needle, size_t needle_size) + { + size_t j = 0; + for (size_t i = 0; (i < haystack_size) && (j < needle_size); i++) + if (needle[j] == haystack[i]) + ++j; + return j == needle_size; + } + + static void constantConstant( + std::string haystack, + std::string needle, + const ColumnPtr & /*start_pos*/, + PaddedPODArray & res, + ColumnUInt8 * /*res_null*/) + { + size_t size = res.size(); + Impl::toLowerIfNeed(haystack); + Impl::toLowerIfNeed(needle); + + UInt8 result = impl(haystack.c_str(), haystack.size(), needle.c_str(), needle.size()); + + for (size_t i = 0; i < size; ++i) + { + res[i] = result; + } + } + template + static void vectorFixedConstant(Args &&...) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name); + } + + template + static void vectorFixedVector(Args &&...) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name); + } +}; + +} + +} diff --git a/src/Functions/hasSubsequence.cpp b/src/Functions/hasSubsequence.cpp new file mode 100644 index 00000000000..da2aaddcf50 --- /dev/null +++ b/src/Functions/hasSubsequence.cpp @@ -0,0 +1,29 @@ +#include +#include +#include + + +namespace DB +{ +namespace +{ + +struct HasSubsequenceCaseSensitiveASCII +{ + static void toLowerIfNeed(std::string & /*s*/) { } +}; + +struct NameHasSubsequence +{ + static constexpr auto name = "hasSubsequence"; +}; + +using FunctionHasSubsequence = FunctionsStringSearch>; +} + +REGISTER_FUNCTION(hasSubsequence) +{ + factory.registerFunction({}, FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/hasSubsequenceCaseInsensitive.cpp b/src/Functions/hasSubsequenceCaseInsensitive.cpp new file mode 100644 index 00000000000..f5c13a7cf8c --- /dev/null +++ b/src/Functions/hasSubsequenceCaseInsensitive.cpp @@ -0,0 +1,28 @@ +#include +#include +#include + +namespace DB +{ +namespace +{ + +struct HasSubsequenceCaseInsensitiveASCII +{ + static void toLowerIfNeed(std::string & s) { std::transform(std::begin(s), std::end(s), std::begin(s), tolower); } +}; + +struct NameHasSubsequenceCaseInsensitive +{ + static constexpr auto name = "hasSubsequenceCaseInsensitive"; +}; + +using FunctionHasSubsequenceCaseInsensitive = FunctionsStringSearch>; +} + +REGISTER_FUNCTION(hasSubsequenceCaseInsensitive) +{ + factory.registerFunction({}, FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/like.cpp b/src/Functions/like.cpp index 3a3345051d4..5a86e37a92d 100644 --- a/src/Functions/like.cpp +++ b/src/Functions/like.cpp @@ -1,4 +1,3 @@ -#include "FunctionsStringSearch.h" #include "FunctionFactory.h" #include "like.h" diff --git a/tests/queries/0_stateless/02809_has_subsequence.reference b/tests/queries/0_stateless/02809_has_subsequence.reference new file mode 100644 index 00000000000..827caa105d0 --- /dev/null +++ b/tests/queries/0_stateless/02809_has_subsequence.reference @@ -0,0 +1,16 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +1 +1 +1 +0 \ No newline at end of file diff --git a/tests/queries/0_stateless/02809_has_subsequence.sql b/tests/queries/0_stateless/02809_has_subsequence.sql new file mode 100644 index 00000000000..63ffb49dc54 --- /dev/null +++ b/tests/queries/0_stateless/02809_has_subsequence.sql @@ -0,0 +1,19 @@ +select hasSubsequence('garbage', ''); +select hasSubsequence('garbage', 'g'); +select hasSubsequence('garbage', 'a'); +select hasSubsequence('garbage', 'e'); +select hasSubsequence('garbage', 'gr'); +select hasSubsequence('garbage', 'ab'); +select hasSubsequence('garbage', 'be'); +select hasSubsequence('garbage', 'arg'); +select hasSubsequence('garbage', 'garbage'); + +select hasSubsequence('garbage', 'garbage1'); +select hasSubsequence('garbage', 'arbw'); +select hasSubsequence('garbage', 'ARG'); + +select hasSubsequenceCaseInsensitive('garbage', 'ARG'); + +select hasSubsequence(materialize('garbage'), materialize('')); +select hasSubsequence(materialize('garbage'), materialize('arg')); +select hasSubsequence(materialize('garbage'), materialize('garbage1')); \ No newline at end of file From 810d1ee0694cc769170f4b08c58aa4c2c5b0807a Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 6 Jul 2023 13:48:57 +0000 Subject: [PATCH 1286/1997] Fix tests --- src/Processors/Formats/IRowInputFormat.h | 2 +- .../Formats/Impl/ArrowBlockInputFormat.h | 2 +- .../Impl/JSONColumnsBlockInputFormatBase.h | 2 +- src/Processors/Formats/Impl/NativeFormat.cpp | 2 +- .../Formats/Impl/ORCBlockInputFormat.h | 2 +- .../Formats/Impl/ParallelParsingInputFormat.h | 2 +- .../Formats/Impl/ParquetBlockInputFormat.h | 2 +- .../Formats/Impl/ValuesBlockInputFormat.h | 2 +- src/Storages/HDFS/StorageHDFS.cpp | 22 +++++++++++++------ 9 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/Processors/Formats/IRowInputFormat.h b/src/Processors/Formats/IRowInputFormat.h index b7b1b0b29a6..00888cfa5e9 100644 --- a/src/Processors/Formats/IRowInputFormat.h +++ b/src/Processors/Formats/IRowInputFormat.h @@ -85,7 +85,7 @@ private: size_t num_errors = 0; BlockMissingValues block_missing_values; - size_t approx_bytes_read_for_chunk; + size_t approx_bytes_read_for_chunk = 0; }; } diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h index df77994c3d5..2db8bd6c59c 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h @@ -50,7 +50,7 @@ private: int record_batch_current = 0; BlockMissingValues block_missing_values; - size_t approx_bytes_read_for_chunk; + size_t approx_bytes_read_for_chunk = 0; const FormatSettings format_settings; diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h index 5ab20c796ea..bb52e2aa516 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h @@ -67,7 +67,7 @@ protected: Serializations serializations; std::unique_ptr reader; BlockMissingValues block_missing_values; - size_t approx_bytes_read_for_chunk; + size_t approx_bytes_read_for_chunk = 0; }; diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp index f8c9a39eedf..65ea87479a3 100644 --- a/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/src/Processors/Formats/Impl/NativeFormat.cpp @@ -66,7 +66,7 @@ private: std::unique_ptr reader; Block header; BlockMissingValues block_missing_values; - size_t approx_bytes_read_for_chunk; + size_t approx_bytes_read_for_chunk = 0; }; class NativeOutputFormat final : public IOutputFormat diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.h b/src/Processors/Formats/Impl/ORCBlockInputFormat.h index 98561e72e61..7097ea3ac08 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.h @@ -52,7 +52,7 @@ private: std::vector include_indices; BlockMissingValues block_missing_values; - size_t approx_bytes_read_for_chunk; + size_t approx_bytes_read_for_chunk = 0; const FormatSettings format_settings; const std::unordered_set & skip_stripes; diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h index 4495680f5b2..f61dc3fbc78 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h @@ -202,7 +202,7 @@ private: const size_t max_block_size; BlockMissingValues last_block_missing_values; - size_t last_approx_bytes_read_for_chunk; + size_t last_approx_bytes_read_for_chunk = 0; /// Non-atomic because it is used in one thread. std::optional next_block_in_current_unit; diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index a14c51f8b9f..dc14edf2099 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -273,7 +273,7 @@ private: std::unique_ptr pool; BlockMissingValues previous_block_missing_values; - size_t previous_approx_bytes_read_for_chunk; + size_t previous_approx_bytes_read_for_chunk = 0; std::exception_ptr background_exception = nullptr; std::atomic is_stopped{0}; diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h index d540a24fa70..8f8d44ec088 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h @@ -96,7 +96,7 @@ private: Serializations serializations; BlockMissingValues block_missing_values; - size_t approx_bytes_read_for_chunk; + size_t approx_bytes_read_for_chunk = 0; }; class ValuesSchemaReader : public IRowSchemaReader diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index b9be01cf2ae..e583d2e30b7 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -301,18 +301,26 @@ public: StorageHDFS::PathWithInfo next() { - size_t current_index = index.fetch_add(1); - if (current_index >= uris.size()) - return {"", {}}; + String uri; + hdfsFileInfo * hdfs_info; + do + { + size_t current_index = index.fetch_add(1); + if (current_index >= uris.size()) + return {"", {}}; + + uri = uris[current_index]; + auto path_and_uri = getPathFromUriAndUriWithoutPath(uri); + hdfs_info = hdfsGetPathInfo(fs.get(), path_and_uri.first.c_str()); + } + /// Skip non-existed files. + while (String(hdfsGetLastError()).find("FileNotFoundException") != std::string::npos); - auto uri = uris[current_index]; - auto path_and_uri = getPathFromUriAndUriWithoutPath(uri); - auto * hdfs_info = hdfsGetPathInfo(fs.get(), path_and_uri.first.c_str()); std::optional info; if (hdfs_info) { info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; - if (file_progress_callback && hdfs_info) + if (file_progress_callback) file_progress_callback(FileProgress(0, hdfs_info->mSize)); } From 6bbaade4a63524c4c1c4376e18d8fa1f3e3914a9 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 6 Jul 2023 13:15:38 +0200 Subject: [PATCH 1287/1997] Update sccache, do not fail on connection error --- docker/test/util/Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index 85e888f1df7..b255a2cc23d 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -94,7 +94,10 @@ RUN mkdir /tmp/ccache \ && rm -rf /tmp/ccache ARG TARGETARCH -ARG SCCACHE_VERSION=v0.4.1 +ARG SCCACHE_VERSION=v0.5.4 +ENV SCCACHE_IGNORE_SERVER_IO_ERROR=1 +# sccache requires a value for the region. So by default we use The Default Region +ENV SCCACHE_REGION=us-east-1 RUN arch=${TARGETARCH:-amd64} \ && case $arch in \ amd64) rarch=x86_64 ;; \ From 67e2dee7e2ea926d6a0a6ab35b31b2515f518426 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 6 Jul 2023 14:29:58 +0000 Subject: [PATCH 1288/1997] Allow SETTINGS before FORMAT in DESCRIBE TABLE query --- src/Parsers/ParserDescribeTableQuery.cpp | 20 +++++++++++++++---- src/Parsers/ParserQueryWithOutput.cpp | 2 +- src/Parsers/ParserTablePropertiesQuery.cpp | 2 -- src/Storages/StorageDistributed.cpp | 1 - src/Storages/getStructureOfRemoteTable.cpp | 1 - .../02789_describe_table_settings.reference | 10 ++++++++++ .../02789_describe_table_settings.sql | 3 +++ 7 files changed, 30 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/02789_describe_table_settings.reference create mode 100644 tests/queries/0_stateless/02789_describe_table_settings.sql diff --git a/src/Parsers/ParserDescribeTableQuery.cpp b/src/Parsers/ParserDescribeTableQuery.cpp index ad6d2c5bcc6..fcfc4799dbe 100644 --- a/src/Parsers/ParserDescribeTableQuery.cpp +++ b/src/Parsers/ParserDescribeTableQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -16,8 +17,10 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex ParserKeyword s_describe("DESCRIBE"); ParserKeyword s_desc("DESC"); ParserKeyword s_table("TABLE"); + ParserKeyword s_settings("SETTINGS"); ParserToken s_dot(TokenType::Dot); ParserIdentifier name_p; + ParserSetQuery parser_settings(true); ASTPtr database; ASTPtr table; @@ -29,12 +32,21 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex s_table.ignore(pos, expected); - ASTPtr table_expression; - if (!ParserTableExpression().parse(pos, table_expression, expected)) + if (!ParserTableExpression().parse(pos, query->table_expression, expected)) return false; - query->children.push_back(std::move(table_expression)); - query->table_expression = query->children.back(); + /// For compatibility with SELECTs, where SETTINGS can be in front of FORMAT + ASTPtr settings; + if (s_settings.ignore(pos, expected)) + { + if (!parser_settings.parse(pos, query->settings_ast, expected)) + return false; + } + + query->children.push_back(query->table_expression); + + if (query->settings_ast) + query->children.push_back(query->settings_ast); node = query; diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index 6796f4528c4..5dc713ca8c6 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -150,7 +150,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec // SETTINGS key1 = value1, key2 = value2, ... ParserKeyword s_settings("SETTINGS"); - if (s_settings.ignore(pos, expected)) + if (!query_with_output.settings_ast && s_settings.ignore(pos, expected)) { ParserSetQuery parser_settings(true); if (!parser_settings.parse(pos, query_with_output.settings_ast, expected)) diff --git a/src/Parsers/ParserTablePropertiesQuery.cpp b/src/Parsers/ParserTablePropertiesQuery.cpp index b73ce8de359..94f264fcc89 100644 --- a/src/Parsers/ParserTablePropertiesQuery.cpp +++ b/src/Parsers/ParserTablePropertiesQuery.cpp @@ -14,8 +14,6 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & { ParserKeyword s_exists("EXISTS"); ParserKeyword s_temporary("TEMPORARY"); - ParserKeyword s_describe("DESCRIBE"); - ParserKeyword s_desc("DESC"); ParserKeyword s_show("SHOW"); ParserKeyword s_create("CREATE"); ParserKeyword s_database("DATABASE"); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index b91ad0b963a..b6359bbb251 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -60,7 +60,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index e5fc01be9f4..ec8f27feeda 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include diff --git a/tests/queries/0_stateless/02789_describe_table_settings.reference b/tests/queries/0_stateless/02789_describe_table_settings.reference new file mode 100644 index 00000000000..c2bf9219f4d --- /dev/null +++ b/tests/queries/0_stateless/02789_describe_table_settings.reference @@ -0,0 +1,10 @@ +"id","Nullable(Int64)","","","","","" +"age","LowCardinality(UInt8)","","","","","" +"name","Nullable(String)","","","","","" +"status","Nullable(String)","","","","","" +"hobbies","Array(Nullable(String))","","","","","" +"id","Nullable(Int64)","","","","","" +"age","LowCardinality(UInt8)","","","","","" +"name","Nullable(String)","","","","","" +"status","Nullable(String)","","","","","" +"hobbies","Array(Nullable(String))","","","","","" diff --git a/tests/queries/0_stateless/02789_describe_table_settings.sql b/tests/queries/0_stateless/02789_describe_table_settings.sql new file mode 100644 index 00000000000..64b5b21fea8 --- /dev/null +++ b/tests/queries/0_stateless/02789_describe_table_settings.sql @@ -0,0 +1,3 @@ +DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1 FORMAT CSV; +DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') FORMAT CSV SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1; +DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') FORMAT CSV SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1 SETTINGS max_threads=0; -- { clientError SYNTAX_ERROR } From 7644f0b37c88cd924f20ecec4acc599e50491423 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 6 Jul 2023 14:44:06 +0000 Subject: [PATCH 1289/1997] Cosmetics: move code around --- src/IO/VarInt.h | 282 +++++++++++++++++++++++------------------------- 1 file changed, 132 insertions(+), 150 deletions(-) diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index f6441391c8f..a88347d68eb 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -12,24 +12,77 @@ namespace DB /// Variable-Length Quantity (VLQ) Base-128 compression, also known as Variable Byte (VB) or Varint encoding. -/// Write UInt64 in variable length format (base128) -void writeVarUInt(UInt64 x, std::ostream & ostr); -void writeVarUInt(UInt64 x, WriteBuffer & ostr); -char * writeVarUInt(UInt64 x, char * ostr); - -/// Read UInt64, written in variable length format (base128) -void readVarUInt(UInt64 & x, std::istream & istr); -void readVarUInt(UInt64 & x, ReadBuffer & istr); -const char * readVarUInt(UInt64 & x, const char * istr, size_t size); - -/// Get the length of an variable-length-encoded integer -size_t getLengthOfVarUInt(UInt64 x); -size_t getLengthOfVarInt(Int64 x); - [[noreturn]] void throwReadAfterEOF(); [[noreturn]] void throwValueTooLargeForVarIntEncoding(UInt64 x); -/// Write Int64 in variable length format (base128) + +/// NOTE: Due to historical reasons, only values up to 1<<63-1 can be safely encoded/decoded (bigger values are not idempotent under +/// encoding/decoding). This cannot be changed without breaking backward compatibility (some drivers, e.g. clickhouse-rs (Rust), have the +/// same limitation, others support the full 1<<64 range, e.g. clickhouse-driver (Python)) +constexpr UInt64 VAR_UINT_MAX = (1ULL<<63) - 1; + +inline void writeVarUInt(UInt64 x, WriteBuffer & ostr) +{ + if (x > VAR_UINT_MAX) [[unlikely]] + throwValueTooLargeForVarIntEncoding(x); + + for (size_t i = 0; i < 9; ++i) + { + uint8_t byte = x & 0x7F; + if (x > 0x7F) + byte |= 0x80; + + ostr.nextIfAtEnd(); + *ostr.position() = byte; + ++ostr.position(); + + x >>= 7; + if (!x) + return; + } +} + +inline void writeVarUInt(UInt64 x, std::ostream & ostr) +{ + if (x > VAR_UINT_MAX) [[unlikely]] + throwValueTooLargeForVarIntEncoding(x); + + for (size_t i = 0; i < 9; ++i) + { + uint8_t byte = x & 0x7F; + if (x > 0x7F) + byte |= 0x80; + + ostr.put(byte); + + x >>= 7; + if (!x) + return; + } +} + +inline char * writeVarUInt(UInt64 x, char * ostr) +{ + if (x > VAR_UINT_MAX) [[unlikely]] + throwValueTooLargeForVarIntEncoding(x); + + for (size_t i = 0; i < 9; ++i) + { + uint8_t byte = x & 0x7F; + if (x > 0x7F) + byte |= 0x80; + + *ostr = byte; + ++ostr; + + x >>= 7; + if (!x) + return ostr; + } + + return ostr; +} + template inline void writeVarInt(Int64 x, Out & ostr) { @@ -41,8 +94,71 @@ inline char * writeVarInt(Int64 x, char * ostr) return writeVarUInt(static_cast((x << 1) ^ (x >> 63)), ostr); } +namespace impl +{ + +template +inline void readVarUInt(UInt64 & x, ReadBuffer & istr) +{ + x = 0; + for (size_t i = 0; i < 9; ++i) + { + if constexpr (!fast) + if (istr.eof()) [[unlikely]] + throwReadAfterEOF(); + + UInt64 byte = *istr.position(); + ++istr.position(); + x |= (byte & 0x7F) << (7 * i); + + if (!(byte & 0x80)) + return; + } +} + +} + +inline void readVarUInt(UInt64 & x, ReadBuffer & istr) +{ + if (istr.buffer().end() - istr.position() >= 9) + return impl::readVarUInt(x, istr); + return impl::readVarUInt(x, istr); +} + +inline void readVarUInt(UInt64 & x, std::istream & istr) +{ + x = 0; + for (size_t i = 0; i < 9; ++i) + { + UInt64 byte = istr.get(); + x |= (byte & 0x7F) << (7 * i); + + if (!(byte & 0x80)) + return; + } +} + +inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size) +{ + const char * end = istr + size; + + x = 0; + for (size_t i = 0; i < 9; ++i) + { + if (istr == end) [[unlikely]] + throwReadAfterEOF(); + + UInt64 byte = *istr; + ++istr; + x |= (byte & 0x7F) << (7 * i); + + if (!(byte & 0x80)) + return istr; + } + + return istr; +} -/// Read Int64, written in variable length format (base128) template inline void readVarInt(Int64 & x, In & istr) { @@ -57,9 +173,6 @@ inline const char * readVarInt(Int64 & x, const char * istr, size_t size) return res; } - -/// For [U]Int32, [U]Int16, size_t. - inline void readVarUInt(UInt32 & x, ReadBuffer & istr) { UInt64 tmp; @@ -97,137 +210,6 @@ inline void readVarUInt(T & x, ReadBuffer & istr) x = tmp; } -template -inline void readVarUIntImpl(UInt64 & x, ReadBuffer & istr) -{ - x = 0; - for (size_t i = 0; i < 9; ++i) - { - if constexpr (!fast) - if (istr.eof()) [[unlikely]] - throwReadAfterEOF(); - - UInt64 byte = *istr.position(); - ++istr.position(); - x |= (byte & 0x7F) << (7 * i); - - if (!(byte & 0x80)) - return; - } -} - -inline void readVarUInt(UInt64 & x, ReadBuffer & istr) -{ - if (istr.buffer().end() - istr.position() >= 9) - return readVarUIntImpl(x, istr); - return readVarUIntImpl(x, istr); -} - - -inline void readVarUInt(UInt64 & x, std::istream & istr) -{ - x = 0; - for (size_t i = 0; i < 9; ++i) - { - UInt64 byte = istr.get(); - x |= (byte & 0x7F) << (7 * i); - - if (!(byte & 0x80)) - return; - } -} - -inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size) -{ - const char * end = istr + size; - - x = 0; - for (size_t i = 0; i < 9; ++i) - { - if (istr == end) [[unlikely]] - throwReadAfterEOF(); - - UInt64 byte = *istr; - ++istr; - x |= (byte & 0x7F) << (7 * i); - - if (!(byte & 0x80)) - return istr; - } - - return istr; -} - -/// NOTE: Due to historical reasons, only values up to 1<<63-1 can be safely encoded/decoded (bigger values are not idempotent under -/// encoding/decoding). This cannot be changed without breaking backward compatibility (some drivers, e.g. clickhouse-rs (Rust), have the -/// same limitation, others support the full 1<<64 range, e.g. clickhouse-driver (Python)) -constexpr UInt64 VAR_UINT_MAX = (1ULL<<63) - 1; - -inline void writeVarUInt(UInt64 x, WriteBuffer & ostr) -{ - if (x > VAR_UINT_MAX) [[unlikely]] - throwValueTooLargeForVarIntEncoding(x); - - for (size_t i = 0; i < 9; ++i) - { - uint8_t byte = x & 0x7F; - if (x > 0x7F) - byte |= 0x80; - - ostr.nextIfAtEnd(); - *ostr.position() = byte; - ++ostr.position(); - - x >>= 7; - if (!x) - return; - } -} - - -inline void writeVarUInt(UInt64 x, std::ostream & ostr) -{ - if (x > VAR_UINT_MAX) [[unlikely]] - throwValueTooLargeForVarIntEncoding(x); - - for (size_t i = 0; i < 9; ++i) - { - uint8_t byte = x & 0x7F; - if (x > 0x7F) - byte |= 0x80; - - ostr.put(byte); - - x >>= 7; - if (!x) - return; - } -} - - -inline char * writeVarUInt(UInt64 x, char * ostr) -{ - if (x > VAR_UINT_MAX) [[unlikely]] - throwValueTooLargeForVarIntEncoding(x); - - for (size_t i = 0; i < 9; ++i) - { - uint8_t byte = x & 0x7F; - if (x > 0x7F) - byte |= 0x80; - - *ostr = byte; - ++ostr; - - x >>= 7; - if (!x) - return ostr; - } - - return ostr; -} - - inline size_t getLengthOfVarUInt(UInt64 x) { return x < (1ULL << 7) ? 1 From 3f744c1e14ba7350c2dab4a8ccf145c26762f0c3 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 6 Jul 2023 14:47:40 +0000 Subject: [PATCH 1290/1997] Cosmetics: rename template parameter --- src/IO/VarInt.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index a88347d68eb..9099b5e7f6a 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -97,13 +97,13 @@ inline char * writeVarInt(Int64 x, char * ostr) namespace impl { -template +template inline void readVarUInt(UInt64 & x, ReadBuffer & istr) { x = 0; for (size_t i = 0; i < 9; ++i) { - if constexpr (!fast) + if constexpr (check_eof) if (istr.eof()) [[unlikely]] throwReadAfterEOF(); @@ -121,8 +121,8 @@ inline void readVarUInt(UInt64 & x, ReadBuffer & istr) inline void readVarUInt(UInt64 & x, ReadBuffer & istr) { if (istr.buffer().end() - istr.position() >= 9) - return impl::readVarUInt(x, istr); - return impl::readVarUInt(x, istr); + return impl::readVarUInt(x, istr); + return impl::readVarUInt(x, istr); } inline void readVarUInt(UInt64 & x, std::istream & istr) From 9a295eca46fea2c88d1c1767fc4625b31c999572 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 6 Jul 2023 14:28:50 +0000 Subject: [PATCH 1291/1997] Incorporate review feedback --- docs/en/sql-reference/statements/show.md | 24 ++--- .../InterpreterShowIndexesQuery.cpp | 4 +- .../0_stateless/02724_show_indexes.reference | 88 +++++++++---------- 3 files changed, 58 insertions(+), 58 deletions(-) diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index 1a1e4dbd2c7..1c399d2072b 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -289,18 +289,18 @@ The statement produces a result table with the following structure: - table - The name of the table. (String) - non_unique - Always `1` as ClickHouse does not support uniqueness constraints. (UInt8) - key_name - The name of the index, `PRIMARY` if the index is a primary key index. (String) -- column_name - For a primary key index, the name of the column. For a data skipping index: '' (empty string), see field "expression". (String) - seq_in_index - For a primary key index, the position of the column starting from `1`. For a data skipping index: always `1`. (UInt8) +- column_name - For a primary key index, the name of the column. For a data skipping index: `''` (empty string), see field "expression". (String) - collation - The sorting of the column in the index: `A` if ascending, `D` if descending, `NULL` if unsorted. (Nullable(String)) - cardinality - An estimation of the index cardinality (number of unique values in the index). Currently always 0. (UInt64) - sub_part - Always `NULL` because ClickHouse does not support index prefixes like MySQL. (Nullable(String)) - packed - Always `NULL` because ClickHouse does not support packed indexes (like MySQL). (Nullable(String)) - null - Currently unused - index_type - The index type, e.g. `PRIMARY`, `MINMAX`, `BLOOM_FILTER` etc. (String) -- comment - Additional information about the index, currently always `` (empty string). (String) -- index_comment - `` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field (like in MySQL). (String) +- comment - Additional information about the index, currently always `''` (empty string). (String) +- index_comment - `''` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field (like in MySQL). (String) - visible - If the index is visible to the optimizer, always `YES`. (String) -- expression - For a data skipping index, the index expression. For a primary key index: '' (empty string). (String) +- expression - For a data skipping index, the index expression. For a primary key index: `''` (empty string). (String) **Examples** @@ -313,14 +313,14 @@ SHOW INDEX FROM 'tbl' Result: ``` text -┌─table─┬─non_unique─┬─key_name─┬─column_name─┬─seq_in_index─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐ -│ tbl │ 1 │ blf_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ BLOOM_FILTER │ │ │ YES │ d, b │ -│ tbl │ 1 │ mm1_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ a, c, d │ -│ tbl │ 1 │ mm2_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ c, d, e │ -│ tbl │ 1 │ PRIMARY │ c │ 1 │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ │ -│ tbl │ 1 │ PRIMARY │ a │ 2 │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ │ -│ tbl │ 1 │ set_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ SET │ │ │ YES │ e │ -└───────┴────────────┴──────────┴─────────────┴──────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘ +┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐ +│ tbl │ 1 │ blf_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ BLOOM_FILTER │ │ │ YES │ d, b │ +│ tbl │ 1 │ mm1_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ a, c, d │ +│ tbl │ 1 │ mm2_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ c, d, e │ +│ tbl │ 1 │ PRIMARY │ 1 │ c │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ │ +│ tbl │ 1 │ PRIMARY │ 2 │ a │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ │ +│ tbl │ 1 │ set_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ SET │ │ │ YES │ e │ +└───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘ ``` **See also** diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp index 35f32a79310..149420006fb 100644 --- a/src/Interpreters/InterpreterShowIndexesQuery.cpp +++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp @@ -42,8 +42,8 @@ FROM ( name AS table, 1 AS non_unique, 'PRIMARY' AS key_name, - arrayJoin(splitByString(', ', primary_key)) AS column_name, row_number() over (order by column_name) AS seq_in_index, + arrayJoin(splitByString(', ', primary_key)) AS column_name, 'A' AS collation, 0 AS cardinality, NULL AS sub_part, @@ -63,8 +63,8 @@ FROM ( table AS table, 1 AS non_unique, name AS key_name, - '' AS column_name, 1 AS seq_in_index, + '' AS column_name, NULL AS collation, 0 AS cardinality, NULL AS sub_part, diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference index cee0598d625..e41f2521f5c 100644 --- a/tests/queries/0_stateless/02724_show_indexes.reference +++ b/tests/queries/0_stateless/02724_show_indexes.reference @@ -1,51 +1,51 @@ --- Aliases of SHOW INDEX -tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d -tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY a 1 A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY c 2 A 0 \N \N \N PRIMARY YES -tbl 1 set_idx 1 \N 0 \N \N \N SET YES e -tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d -tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY a 1 A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY c 2 A 0 \N \N \N PRIMARY YES -tbl 1 set_idx 1 \N 0 \N \N \N SET YES e -tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d -tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY a 1 A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY c 2 A 0 \N \N \N PRIMARY YES -tbl 1 set_idx 1 \N 0 \N \N \N SET YES e -tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d -tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY a 1 A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY c 2 A 0 \N \N \N PRIMARY YES -tbl 1 set_idx 1 \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- EXTENDED -tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d -tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY a 1 A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY c 2 A 0 \N \N \N PRIMARY YES -tbl 1 set_idx 1 \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- WHERE --- Check with weird table names -$4@^7 1 PRIMARY c 1 A 0 \N \N \N PRIMARY YES -NULL 1 PRIMARY c 1 A 0 \N \N \N PRIMARY YES -\' 1 PRIMARY c 1 A 0 \N \N \N PRIMARY YES -\' 1 PRIMARY c 1 A 0 \N \N \N PRIMARY YES +$4@^7 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES +NULL 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES +\' 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES +\' 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES --- Original table -tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b -tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d -tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY a 1 A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY c 2 A 0 \N \N \N PRIMARY YES -tbl 1 set_idx 1 \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- Equally named table in other database -tbl 1 mmi_idx 1 \N 0 \N \N \N MINMAX YES b -tbl 1 PRIMARY a 1 A 0 \N \N \N PRIMARY YES +tbl 1 mmi_idx 1 \N 0 \N \N \N MINMAX YES b +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES --- Short form -tbl 1 mmi_idx 1 \N 0 \N \N \N MINMAX YES b -tbl 1 PRIMARY a 1 A 0 \N \N \N PRIMARY YES +tbl 1 mmi_idx 1 \N 0 \N \N \N MINMAX YES b +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES From 9ae0dc730c586a37f8fdbbd880267ec11c2c8e51 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Thu, 6 Jul 2023 17:15:26 +0200 Subject: [PATCH 1292/1997] Review fixes + enable in ci --- docker/test/upgrade/run.sh | 2 + .../ReplicatedMergeTreeRestartingThread.cpp | 7 +- .../ReplicatedMergeTreeRestartingThread.h | 1 + src/Storages/StorageReplicatedMergeTree.cpp | 99 +++++++++---------- src/Storages/StorageReplicatedMergeTree.h | 14 ++- ...le_wait_for_shutdown_replicated_tables.xml | 5 + tests/config/install.sh | 1 + 7 files changed, 66 insertions(+), 63 deletions(-) create mode 100644 tests/config/config.d/enable_wait_for_shutdown_replicated_tables.xml diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 82a88272df9..07e6e7dd0ec 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -61,6 +61,7 @@ configure # it contains some new settings, but we can safely remove it rm /etc/clickhouse-server/config.d/merge_tree.xml +rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml start @@ -90,6 +91,7 @@ configure # it contains some new settings, but we can safely remove it rm /etc/clickhouse-server/config.d/merge_tree.xml +rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml start diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 0e381654db0..e43cc879e93 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -329,8 +329,8 @@ void ReplicatedMergeTreeRestartingThread::activateReplica() void ReplicatedMergeTreeRestartingThread::partialShutdown(bool part_of_full_shutdown) { - setReadonly(part_of_full_shutdown); - storage.partialShutdown(part_of_full_shutdown); + setReadonly(/* on_shutdown = */ part_of_full_shutdown); + storage.partialShutdown(); } @@ -341,8 +341,7 @@ void ReplicatedMergeTreeRestartingThread::shutdown(bool part_of_full_shutdown) task->deactivate(); LOG_TRACE(log, "Restarting thread finished"); - /// Stop other tasks. - partialShutdown(part_of_full_shutdown); + setReadonly(part_of_full_shutdown); } void ReplicatedMergeTreeRestartingThread::setReadonly(bool on_shutdown) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index 9e99baab4c3..01a877a07e5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -36,6 +36,7 @@ public: void shutdown(bool part_of_full_shutdown); void run(); + private: StorageReplicatedMergeTree & storage; String log_name; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 114465df496..88bd788b9ef 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3942,17 +3942,26 @@ void StorageReplicatedMergeTree::addLastSentPart(const MergeTreePartInfo & info) last_sent_parts_cv.notify_all(); } -void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(size_t wait_ms) +void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(StorageReplicatedMergeTree::ShutdownDeadline shutdown_deadline_) { if (!shutdown_called.load()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Called waitForUniquePartsToBeFetchedByOtherReplicas before shutdown, it's a bug"); + auto settings_ptr = getSettings(); + + auto wait_ms = settings_ptr->wait_for_unique_parts_send_before_shutdown_ms.totalMilliseconds(); if (wait_ms == 0) { LOG_INFO(log, "Will not wait for unique parts to be fetched by other replicas because wait time is zero"); return; } + if (shutdown_deadline_ <= std::chrono::system_clock::now()) + { + LOG_INFO(log, "Will not wait for unique parts to be fetched by other replicas because shutdown_deadline already passed"); + return; + } + auto zookeeper = getZooKeeperIfTableShutDown(); auto unique_parts_set = findReplicaUniqueParts(replica_name, zookeeper_path, format_version, zookeeper, log); @@ -3968,7 +3977,6 @@ void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(si auto wait_predicate = [&] () -> bool { - bool all_fetched = true; for (auto it = unique_parts_set.begin(); it != unique_parts_set.end();) { const auto & part = *it; @@ -3985,22 +3993,19 @@ void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(si } } if (!found) - { - all_fetched = false; break; - } } - return all_fetched; + return unique_parts_set.empty(); }; std::unique_lock lock(last_sent_parts_mutex); - if (!last_sent_parts_cv.wait_for(lock, std::chrono::milliseconds(wait_ms), wait_predicate)) + if (!last_sent_parts_cv.wait_until(lock, shutdown_deadline_, wait_predicate)) LOG_WARNING(log, "Failed to wait for unique parts to be fetched in {} ms, {} parts can be left on this replica", wait_ms, unique_parts_set.size()); else LOG_INFO(log, "Successfully waited all the parts"); } -std::vector StorageReplicatedMergeTree::findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_) +std::set StorageReplicatedMergeTree::findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_) { if (!zookeeper_->exists(fs::path(zookeeper_path_) / "replicas" / replica_name_ / "is_active")) { @@ -4027,26 +4032,25 @@ std::vector StorageReplicatedMergeTree::findReplicaUniquePart } else { - LOG_TRACE(log_, "Fetching parts for replica {}", replica); - data_parts_on_replicas.emplace_back(format_version_); - for (const auto & part : parts) - { - if (data_parts_on_replicas.back().getContainingPart(part).empty()) - data_parts_on_replicas.back().add(part); - } + LOG_TRACE(log_, "Fetching parts for replica {}: [{}]", replica, fmt::join(parts, ", ")); + data_parts_on_replicas.emplace_back(format_version_, parts); } } - std::vector our_unique_parts; + if (data_parts_on_replicas.empty()) + { + LOG_TRACE(log_, "Has no active replicas, will no try to wait for fetch"); + return {}; + } + + std::set our_unique_parts; for (const auto & part : our_parts) { - LOG_TRACE(log_, "Looking for part {}", part); bool found = false; for (const auto & active_parts_set : data_parts_on_replicas) { if (!active_parts_set.getContainingPart(part).empty()) { - LOG_TRACE(log_, "Part {} found", part); found = true; break; } @@ -4054,8 +4058,8 @@ std::vector StorageReplicatedMergeTree::findReplicaUniquePart if (!found) { - LOG_TRACE(log_, "Part not {} found", part); - our_unique_parts.emplace_back(MergeTreePartInfo::fromPartName(part, format_version_)); + LOG_TRACE(log_, "Part not {} found on other replicas", part); + our_unique_parts.emplace(MergeTreePartInfo::fromPartName(part, format_version_)); } } @@ -4836,9 +4840,7 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() if (shutdown_prepared_called.exchange(true)) return; - session_expired_callback_handler.reset(); - stopOutdatedDataPartsLoadingTask(); - + auto settings_ptr = getSettings(); /// Cancel fetches, merges and mutations to force the queue_task to finish ASAP. fetcher.blocker.cancelForever(); merger_mutator.merges_blocker.cancelForever(); @@ -4850,39 +4852,17 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() attach_thread->shutdown(); restarting_thread.shutdown(/* part_of_full_shutdown */true); - background_operations_assignee.finish(); - part_moves_between_shards_orchestrator.shutdown(); - - { - auto lock = queue.lockQueue(); - /// Cancel logs pulling after background task were cancelled. It's still - /// required because we can trigger pullLogsToQueue during manual OPTIMIZE, - /// MUTATE, etc. query. - queue.pull_log_blocker.cancelForever(); - } - background_moves_assignee.finish(); - + shutdown_deadline.emplace(std::chrono::system_clock::now() + std::chrono::milliseconds(settings_ptr->wait_for_unique_parts_send_before_shutdown_ms.totalMilliseconds())); } -void StorageReplicatedMergeTree::partialShutdown(bool part_of_full_shutdown) +void StorageReplicatedMergeTree::partialShutdown() { ProfileEvents::increment(ProfileEvents::ReplicaPartialShutdown); partial_shutdown_called = true; partial_shutdown_event.set(); queue.notifySubscribersOnPartialShutdown(); - if (!part_of_full_shutdown) - { - /// If we are going to completely shutdown table we allow other - /// replicas to fetch parts which are unique for our replica. - /// - /// Replicas try to fetch part only in case the source replica is active, - /// so don't reset handler here. - LOG_DEBUG(log, "Reset active node, replica will be inactive"); - replica_is_active_node = nullptr; - } - else - LOG_DEBUG(log, "Will not reset active node, it will be reset completely during full shutdown"); + replica_is_active_node = nullptr; LOG_TRACE(log, "Waiting for threads to finish"); merge_selecting_task->deactivate(); @@ -4914,10 +4894,27 @@ void StorageReplicatedMergeTree::shutdown() flushAndPrepareForShutdown(); auto settings_ptr = getSettings(); - LOG_DEBUG(log, "Data parts exchange still exists {}", data_parts_exchange_endpoint != nullptr); - waitForUniquePartsToBeFetchedByOtherReplicas(settings_ptr->wait_for_unique_parts_send_before_shutdown_ms.totalMilliseconds()); + if (!shutdown_deadline.has_value()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Shutdown deadline is not set in shutdown"); - replica_is_active_node = nullptr; + waitForUniquePartsToBeFetchedByOtherReplicas(*shutdown_deadline); + + session_expired_callback_handler.reset(); + stopOutdatedDataPartsLoadingTask(); + + partialShutdown(); + + part_moves_between_shards_orchestrator.shutdown(); + background_operations_assignee.finish(); + + { + auto lock = queue.lockQueue(); + /// Cancel logs pulling after background task were cancelled. It's still + /// required because we can trigger pullLogsToQueue during manual OPTIMIZE, + /// MUTATE, etc. query. + queue.pull_log_blocker.cancelForever(); + } + background_moves_assignee.finish(); auto data_parts_exchange_ptr = std::atomic_exchange(&data_parts_exchange_endpoint, InterserverIOEndpointPtr{}); if (data_parts_exchange_ptr) diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 656e8df6ccb..811a8524064 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -118,7 +118,7 @@ public: /// Partial shutdown called if we loose connection to zookeeper. /// Table can also recover after partial shutdown and continue /// to work. This method can be called regularly. - void partialShutdown(bool part_of_full_shutdown); + void partialShutdown(); /// These two methods are called during final table shutdown (DROP/DETACH/overall server shutdown). /// The shutdown process is split into two methods to make it more soft and fast. In database shutdown() @@ -368,15 +368,11 @@ public: ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock getMaxAddedBlocks() const; void addLastSentPart(const MergeTreePartInfo & info); - std::deque getLastSentParts() const - { - std::lock_guard lock(last_sent_parts_mutex); - return last_sent_parts; - } /// Wait required amount of milliseconds to give other replicas a chance to /// download unique parts from our replica - void waitForUniquePartsToBeFetchedByOtherReplicas(size_t wait_ms); + using ShutdownDeadline = std::chrono::time_point; + void waitForUniquePartsToBeFetchedByOtherReplicas(ShutdownDeadline shutdown_deadline); private: std::atomic_bool are_restoring_replica {false}; @@ -483,6 +479,8 @@ private: std::atomic shutdown_called {false}; std::atomic shutdown_prepared_called {false}; + std::optional shutdown_deadline; + mutable std::mutex last_sent_parts_mutex; std::condition_variable last_sent_parts_cv; @@ -740,7 +738,7 @@ private: */ String findReplicaHavingCoveringPart(LogEntry & entry, bool active); String findReplicaHavingCoveringPart(const String & part_name, bool active, String & found_part_name); - static std::vector findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_); + static std::set findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_); /** Download the specified part from the specified replica. * If `to_detached`, the part is placed in the `detached` directory. diff --git a/tests/config/config.d/enable_wait_for_shutdown_replicated_tables.xml b/tests/config/config.d/enable_wait_for_shutdown_replicated_tables.xml new file mode 100644 index 00000000000..b23dbdc2607 --- /dev/null +++ b/tests/config/config.d/enable_wait_for_shutdown_replicated_tables.xml @@ -0,0 +1,5 @@ + + + 1000 + + diff --git a/tests/config/install.sh b/tests/config/install.sh index 50f2627d37c..33d5c99202e 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -57,6 +57,7 @@ ln -sf $SRC_PATH/config.d/display_name.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/reverse_dns_query_function.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/compressed_marks_and_index.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/disable_s3_env_credentials.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/enable_wait_for_shutdown_replicated_tables.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/backups.xml $DEST_SERVER_PATH/config.d/ # Not supported with fasttest. From da61a8c509e2d43275f0c8f06dfd065f2874f79f Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Thu, 6 Jul 2023 17:16:00 +0200 Subject: [PATCH 1293/1997] Fix --- .../config.d/enable_wait_for_shutdown_replicated_tables.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/config/config.d/enable_wait_for_shutdown_replicated_tables.xml b/tests/config/config.d/enable_wait_for_shutdown_replicated_tables.xml index b23dbdc2607..504841296a8 100644 --- a/tests/config/config.d/enable_wait_for_shutdown_replicated_tables.xml +++ b/tests/config/config.d/enable_wait_for_shutdown_replicated_tables.xml @@ -1,5 +1,5 @@ - 1000 + 3000 From abf36065b7bbddeba2b80f76ad966a9167852089 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 6 Jul 2023 17:24:06 +0200 Subject: [PATCH 1294/1997] fix --- .../ReplicatedMergeTreePartCheckThread.cpp | 89 ++++++++++--------- .../ReplicatedMergeTreePartCheckThread.h | 4 +- src/Storages/StorageReplicatedMergeTree.cpp | 17 +++- .../__init__.py | 0 .../configs/testkeeper.xml | 6 -- .../test.py | 65 -------------- .../02254_projection_broken_part.reference | 6 ++ .../02254_projection_broken_part.sh | 44 +++++++++ 8 files changed, 115 insertions(+), 116 deletions(-) delete mode 100644 tests/integration/test_projection_report_broken_part/__init__.py delete mode 100644 tests/integration/test_projection_report_broken_part/configs/testkeeper.xml delete mode 100644 tests/integration/test_projection_report_broken_part/test.py create mode 100644 tests/queries/0_stateless/02254_projection_broken_part.reference create mode 100755 tests/queries/0_stateless/02254_projection_broken_part.sh diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 1cc3736bd2e..ffe3f883f80 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -63,6 +63,7 @@ void ReplicatedMergeTreePartCheckThread::enqueuePart(const String & name, time_t if (parts_set.contains(name)) return; + LOG_TRACE(log, "Enqueueing {} for check after after {}s", name, delay_to_check_seconds); parts_queue.emplace_back(name, time(nullptr) + delay_to_check_seconds); parts_set.insert(name); task->schedule(); @@ -423,7 +424,7 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St } -CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name) +CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name, std::optional * recheck_after) { LOG_INFO(log, "Checking part {}", part_name); ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks); @@ -438,7 +439,11 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & p break; case ReplicatedCheckResult::RecheckLater: - enqueuePart(part_name, result.recheck_after); + /// NOTE We cannot enqueue it from the check thread itself + if (recheck_after) + *recheck_after = result.recheck_after; + else + enqueuePart(part_name, result.recheck_after); break; case ReplicatedCheckResult::DetachUnexpected: @@ -471,10 +476,22 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & p /// Part is not in ZooKeeper and not on disk (so there's nothing to detach or remove from ZooKeeper). /// Probably we cannot execute some entry from the replication queue (so don't need to enqueue another one). - /// Either all replicas having the part are not active, or the part is lost forever. + /// Either all replicas having the part are not active... bool found_something = searchForMissingPartOnOtherReplicas(part_name); - if (!found_something) - onPartIsLostForever(part_name); + if (found_something) + break; + + /// ... or the part is lost forever + bool handled_lost_part = onPartIsLostForever(part_name); + if (handled_lost_part) + break; + + /// We failed to create empty part, need retry + constexpr time_t retry_after_seconds = 30; + if (recheck_after) + *recheck_after = retry_after_seconds; + else + enqueuePart(part_name, retry_after_seconds); break; } @@ -483,7 +500,7 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & p return result.status; } -void ReplicatedMergeTreePartCheckThread::onPartIsLostForever(const String & part_name) +bool ReplicatedMergeTreePartCheckThread::onPartIsLostForever(const String & part_name) { auto lost_part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version); if (lost_part_info.level != 0 || lost_part_info.mutation != 0) @@ -499,7 +516,7 @@ void ReplicatedMergeTreePartCheckThread::onPartIsLostForever(const String & part for (const String & source_part_name : source_parts) enqueuePart(source_part_name); - return; + return true; } } @@ -512,13 +529,11 @@ void ReplicatedMergeTreePartCheckThread::onPartIsLostForever(const String & part */ LOG_ERROR(log, "Part {} is lost forever.", part_name); ProfileEvents::increment(ProfileEvents::ReplicatedDataLoss); + return true; } - else - { - LOG_WARNING(log, "Cannot create empty part {} instead of lost. Will retry later", part_name); - constexpr time_t retry_after_seconds = 30; - enqueuePart(part_name, retry_after_seconds); - } + + LOG_WARNING(log, "Cannot create empty part {} instead of lost. Will retry later", part_name); + return false; } @@ -533,42 +548,29 @@ void ReplicatedMergeTreePartCheckThread::run() /// Take part from the queue for verification. PartsToCheckQueue::iterator selected = parts_queue.end(); /// end from std::list is not get invalidated - time_t min_check_time = std::numeric_limits::max(); { std::lock_guard lock(parts_mutex); - if (parts_queue.empty()) + if (parts_queue.empty() && !parts_set.empty()) { - if (!parts_set.empty()) - { - parts_set.clear(); - throw Exception(ErrorCodes::LOGICAL_ERROR, "Non-empty parts_set with empty parts_queue. This is a bug."); - } + parts_set.clear(); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Non-empty parts_set with empty parts_queue. This is a bug."); } - else - { - for (auto it = parts_queue.begin(); it != parts_queue.end(); ++it) - { - if (it->second <= current_time) - { - selected = it; - break; - } - if (it->second < min_check_time) - { - min_check_time = it->second; - selected = it; - } - } - } + selected = std::find_if(parts_queue.begin(), parts_queue.end(), [current_time](const auto & elem) + { + return elem.second <= current_time; + }); + if (selected == parts_queue.end()) + return; + + /// Move selected part to the end of the queue + parts_queue.splice(parts_queue.end(), parts_queue, selected); } - if (selected == parts_queue.end()) - return; - - checkPartAndFix(selected->first); + std::optional recheck_after; + checkPartAndFix(selected->first, &recheck_after); if (need_stop) return; @@ -581,6 +583,11 @@ void ReplicatedMergeTreePartCheckThread::run() { throw Exception(ErrorCodes::LOGICAL_ERROR, "Someone erased checking part from parts_queue. This is a bug."); } + else if (recheck_after.has_value()) + { + LOG_TRACE(log, "Will recheck part {} after after {}s", selected->first, *recheck_after); + selected->second = time(nullptr) + *recheck_after; + } else { parts_set.erase(selected->first); @@ -596,7 +603,7 @@ void ReplicatedMergeTreePartCheckThread::run() { tryLogCurrentException(log, __PRETTY_FUNCTION__); - if (e.code == Coordination::Error::ZSESSIONEXPIRED) + if (Coordination::isHardwareError(e.code)) return; task->scheduleAfter(PART_CHECK_ERROR_SLEEP_MS); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h index 0a8fbc75c05..fc76cbad4ed 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h @@ -65,7 +65,7 @@ public: size_t size() const; /// Check part by name - CheckResult checkPartAndFix(const String & part_name); + CheckResult checkPartAndFix(const String & part_name, std::optional * recheck_after = nullptr); ReplicatedCheckResult checkPartImpl(const String & part_name); @@ -77,7 +77,7 @@ public: private: void run(); - void onPartIsLostForever(const String & part_name); + bool onPartIsLostForever(const String & part_name); std::pair findLocalPart(const String & part_name); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index ff319e47946..e8176ac1d5f 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3366,6 +3366,10 @@ bool StorageReplicatedMergeTree::canExecuteFetch(const ReplicatedMergeTreeLogEnt { disable_reason = fmt::format("Not executing fetch of part {} because we still have broken part with that name. " "Waiting for the broken part to be removed first.", entry.new_part_name); + + constexpr time_t min_interval_to_wakeup_cleanup_s = 30; + if (entry.last_postpone_time + min_interval_to_wakeup_cleanup_s < time(nullptr)) + const_cast(this)->cleanup_thread.wakeup(); return false; } } @@ -3753,11 +3757,13 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n DataPartPtr broken_part; auto outdate_broken_part = [this, &broken_part]() { - if (broken_part) + if (!broken_part) return; DataPartsLock lock = lockParts(); if (broken_part->getState() == DataPartState::Active) removePartsFromWorkingSet(NO_TRANSACTION_RAW, {broken_part}, true, &lock); + broken_part.reset(); + cleanup_thread.wakeup(); }; /// We don't know exactly what happened to broken part @@ -3767,6 +3773,7 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n auto partition_range = getDataPartsVectorInPartitionForInternalUsage({MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}, broken_part_info.partition_id); + Strings detached_parts; for (const auto & part : partition_range) { if (!broken_part_info.contains(part->info)) @@ -3784,7 +3791,9 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n { part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr()); } + detached_parts.push_back(part->name); } + LOG_WARNING(log, "Detached {} parts covered by broken part {}: {}", detached_parts.size(), part_name, fmt::join(detached_parts, ", ")); ThreadFuzzer::maybeInjectSleep(); ThreadFuzzer::maybeInjectMemoryLimitException(); @@ -3873,10 +3882,14 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n zkutil::KeeperMultiException::check(rc, ops, results); + String path_created = dynamic_cast(*results.back()).path_created; + log_entry->znode_name = path_created.substr(path_created.find_last_of('/') + 1); + LOG_DEBUG(log, "Created entry {} to fetch missing part {}", log_entry->znode_name, part_name); + queue.insert(zookeeper, log_entry); + /// Make the part outdated after creating the log entry. /// Otherwise, if we failed to create the entry, cleanup thread could remove the part from ZooKeeper (leading to diverged replicas) outdate_broken_part(); - queue_updating_task->schedule(); return; } } diff --git a/tests/integration/test_projection_report_broken_part/__init__.py b/tests/integration/test_projection_report_broken_part/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_projection_report_broken_part/configs/testkeeper.xml b/tests/integration/test_projection_report_broken_part/configs/testkeeper.xml deleted file mode 100644 index 617371b13fa..00000000000 --- a/tests/integration/test_projection_report_broken_part/configs/testkeeper.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - testkeeper - - diff --git a/tests/integration/test_projection_report_broken_part/test.py b/tests/integration/test_projection_report_broken_part/test.py deleted file mode 100644 index f376adf4f1a..00000000000 --- a/tests/integration/test_projection_report_broken_part/test.py +++ /dev/null @@ -1,65 +0,0 @@ -# pylint: disable=unused-argument -# pylint: disable=redefined-outer-name -# pylint: disable=line-too-long - -import pytest -import time - -from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) -node = cluster.add_instance( - "node", - main_configs=[ - "configs/testkeeper.xml", - ], -) - - -@pytest.fixture(scope="module", autouse=True) -def start_cluster(): - try: - cluster.start() - yield cluster - finally: - cluster.shutdown() - - -def test_projection_broken_part(): - node.query( - """ - create table test_projection_broken_parts_1 (a int, b int, projection ab (select a, sum(b) group by a)) - engine = ReplicatedMergeTree('/clickhouse-tables/test_projection_broken_parts', 'r1') - order by a settings index_granularity = 1; - - create table test_projection_broken_parts_2 (a int, b int, projection ab (select a, sum(b) group by a)) - engine ReplicatedMergeTree('/clickhouse-tables/test_projection_broken_parts', 'r2') - order by a settings index_granularity = 1; - - insert into test_projection_broken_parts_1 values (1, 1), (1, 2), (1, 3); - - system sync replica test_projection_broken_parts_2; - """ - ) - - # break projection part - node.exec_in_container( - [ - "bash", - "-c", - "rm /var/lib/clickhouse/data/default/test_projection_broken_parts_1/all_0_0_0/ab.proj/data.bin", - ] - ) - - expected_error = "No such file or directory" - assert expected_error in node.query_and_get_error( - "select sum(b) from test_projection_broken_parts_1 group by a" - ) - - time.sleep(2) - - assert ( - int(node.query("select sum(b) from test_projection_broken_parts_1 group by a")) - == 6 - ) diff --git a/tests/queries/0_stateless/02254_projection_broken_part.reference b/tests/queries/0_stateless/02254_projection_broken_part.reference new file mode 100644 index 00000000000..68538fd31ea --- /dev/null +++ b/tests/queries/0_stateless/02254_projection_broken_part.reference @@ -0,0 +1,6 @@ +1 1 1 all_0_0_0 +1 1 2 all_0_0_0 +1 1 3 all_0_0_0 +2 6 +0 +5 6 diff --git a/tests/queries/0_stateless/02254_projection_broken_part.sh b/tests/queries/0_stateless/02254_projection_broken_part.sh new file mode 100755 index 00000000000..d276c67f8de --- /dev/null +++ b/tests/queries/0_stateless/02254_projection_broken_part.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, zookeeper + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" +$CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" + +$CLICKHOUSE_CLIENT -q "create table projection_broken_parts_1 (a int, b int, projection ab (select a, sum(b) group by a)) + engine = ReplicatedMergeTree('/test/02369/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r1') + order by a settings index_granularity = 1;" + +$CLICKHOUSE_CLIENT -q "create table projection_broken_parts_2 (a int, b int, projection ab (select a, sum(b) group by a)) + engine = ReplicatedMergeTree('/test/02369/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r2') + order by a settings index_granularity = 1;" + +$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into projection_broken_parts_1 values (1, 1), (1, 2), (1, 3);" +$CLICKHOUSE_CLIENT -q "system sync replica projection_broken_parts_2;" +$CLICKHOUSE_CLIENT -q "select 1, *, _part from projection_broken_parts_2 order by b;" +$CLICKHOUSE_CLIENT -q "select 2, sum(b) from projection_broken_parts_2 group by a;" + +path=$($CLICKHOUSE_CLIENT -q "select path from system.parts where database='$CLICKHOUSE_DATABASE' and table='projection_broken_parts_1' and name='all_0_0_0'") +# ensure that path is absolute before removing +$CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path')" || exit +rm -f "$path/ab.proj/data.bin" + +$CLICKHOUSE_CLIENT -q "select 3, sum(b) from projection_broken_parts_1 group by a;" 2>/dev/null + +num_tries=0 +while ! $CLICKHOUSE_CLIENT -q "select 4, sum(b) from projection_broken_parts_1 group by a format Null;" 2>/dev/null; do + sleep 1; + num_tries=$((num_tries+1)) + if [ $num_tries -eq 60 ]; then + break + fi +done + +$CLICKHOUSE_CLIENT -q "system sync replica projection_broken_parts_1;" +$CLICKHOUSE_CLIENT -q "select 5, sum(b) from projection_broken_parts_1 group by a;" + +$CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" +$CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" From 8cdb181c3909802ae3bdd48fd118358b50ef027d Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Thu, 6 Jul 2023 17:52:31 +0200 Subject: [PATCH 1295/1997] Reduce logging level --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 88bd788b9ef..0f5a52b275c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4000,7 +4000,7 @@ void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(St std::unique_lock lock(last_sent_parts_mutex); if (!last_sent_parts_cv.wait_until(lock, shutdown_deadline_, wait_predicate)) - LOG_WARNING(log, "Failed to wait for unique parts to be fetched in {} ms, {} parts can be left on this replica", wait_ms, unique_parts_set.size()); + LOG_INFO(log, "Failed to wait for unique parts to be fetched in {} ms, {} parts can be left on this replica", wait_ms, unique_parts_set.size()); else LOG_INFO(log, "Successfully waited all the parts"); } From 63b9c1ac0670947b49a916b5b6e47cab1dd1d3d0 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 6 Jul 2023 18:58:13 +0200 Subject: [PATCH 1296/1997] add test --- src/Common/ProfileEvents.cpp | 2 ++ src/IO/ReadWriteBufferFromHTTP.cpp | 6 ++++ ...ing_from_s3_with_connection_pool.reference | 1 + ...89_reading_from_s3_with_connection_pool.sh | 32 ++++++++++++++++++- 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index c9030070bf2..3bee12731aa 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -371,6 +371,8 @@ The server successfully detected this situation and will download merged part fr M(ReadBufferFromS3ResetSessions, "Number of HTTP sessions that were reset in ReadBufferFromS3.") \ M(ReadBufferFromS3PreservedSessions, "Number of HTTP sessions that were preserved in ReadBufferFromS3.") \ \ + M(ReadWriteBufferFromHTTPPreservedSessions, "Number of HTTP sessions that were preserved in ReadWriteBufferFromHTTP.") \ + \ M(WriteBufferFromS3Microseconds, "Time spent on writing to S3.") \ M(WriteBufferFromS3Bytes, "Bytes written to S3.") \ M(WriteBufferFromS3RequestsErrors, "Number of exceptions while writing to S3.") \ diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index b834c17ab6c..6d1c0f7aafa 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -5,6 +5,7 @@ namespace ProfileEvents { extern const Event ReadBufferSeekCancelConnection; +extern const Event ReadWriteBufferFromHTTPPreservedSessions; } namespace DB @@ -442,6 +443,7 @@ bool ReadWriteBufferFromHTTPBase::nextImpl() { /// Response was fully read. markSessionForReuse(session->getSession()); + ProfileEvents::increment(ProfileEvents::ReadWriteBufferFromHTTPPreservedSessions); return false; } @@ -568,6 +570,7 @@ bool ReadWriteBufferFromHTTPBase::nextImpl() { /// Eof is reached, i.e response was fully read. markSessionForReuse(session->getSession()); + ProfileEvents::increment(ProfileEvents::ReadWriteBufferFromHTTPPreservedSessions); return false; } @@ -623,8 +626,11 @@ size_t ReadWriteBufferFromHTTPBase::readBigAt(char * to, si size_t r = copyFromIStreamWithProgressCallback(*result_istr, to, n, progress_callback, &cancelled); if (!cancelled) + { /// Response was fully read. markSessionForReuse(sess); + ProfileEvents::increment(ProfileEvents::ReadWriteBufferFromHTTPPreservedSessions); + } return r; } diff --git a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference index d00491fd7e5..6ed281c757a 100644 --- a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference +++ b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference @@ -1 +1,2 @@ 1 +1 diff --git a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh index 7a8b94a10a8..ce90157d004 100755 --- a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh +++ b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-random-settings +# Tags: no-fasttest, no-random-settings, no-replicated-database CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -27,3 +27,33 @@ WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND query_id='$query_id'; " + + +# Test connection pool in ReadWriteBufferFromHTTP + +query_id=$(${CLICKHOUSE_CLIENT} -nq " +create table mut (n int, m int, k int) engine=ReplicatedMergeTree('/test/02441/{database}/mut', '1') order by n; +set insert_keeper_fault_injection_probability=0; +insert into mut values (1, 2, 3), (10, 20, 30); + +system stop merges mut; +alter table mut delete where n = 10; + +select queryID() from( + -- a funny way to wait for a MUTATE_PART to be assigned + select sleepEachRow(2) from url('http://localhost:8123/?param_tries={1..10}&query=' || encodeURLComponent( + 'select 1 where ''MUTATE_PART'' not in (select type from system.replication_queue where database=''' || currentDatabase() || ''' and table=''mut'')' + ), 'LineAsString', 's String') + -- queryID() will be returned for each row, since the query above doesn't return anything we need to return a fake row + union all + select 1 +) limit 1 settings max_threads=1; +" 2>&1) +${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} -nm --query " +SELECT ProfileEvents['ReadWriteBufferFromHTTPPreservedSessions'] > 0 +FROM system.query_log +WHERE type = 'QueryFinish' + AND current_database = currentDatabase() + AND query_id='$query_id'; +" From 45579417642c44956ebe329b5412bcbb48809d72 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 6 Jul 2023 17:03:43 +0000 Subject: [PATCH 1297/1997] black --- tests/sqllogic/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sqllogic/connection.py b/tests/sqllogic/connection.py index 5e2634787d8..d71cc005d09 100644 --- a/tests/sqllogic/connection.py +++ b/tests/sqllogic/connection.py @@ -62,7 +62,7 @@ def default_clickhouse_odbc_conn_str(): return str( OdbcConnectingArgs.create_from_kw( dsn="ClickHouse DSN (ANSI)", - Url="http://localhost:8123/query?default_format=ODBCDriver2&default_table_engine=MergeTree" + Url="http://localhost:8123/query?default_format=ODBCDriver2&default_table_engine=MergeTree", ) ) From 58793816a73b7b17eb72c35f0266276bc40507b4 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 6 Jul 2023 19:04:34 +0200 Subject: [PATCH 1298/1997] fix paranoid check --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 3 ++ src/Storages/StorageReplicatedMergeTree.cpp | 15 ++++--- src/Storages/StorageReplicatedMergeTree.h | 2 + .../02254_projection_broken_part.sh | 4 +- ...2255_broken_parts_chain_on_start.reference | 8 ++++ .../02255_broken_parts_chain_on_start.sh | 43 +++++++++++++++++++ 7 files changed, 68 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/02255_broken_parts_chain_on_start.reference create mode 100755 tests/queries/0_stateless/02255_broken_parts_chain_on_start.sh diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 4dc3583c706..b7fde55880e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2651,7 +2651,7 @@ size_t MergeTreeData::clearOldBrokenPartsFromDetachedDirectory() for (auto & [old_name, new_name, disk] : renamed_parts.old_and_new_names) { removeDetachedPart(disk, fs::path(relative_data_path) / "detached" / new_name / "", old_name); - LOG_DEBUG(log, "Removed broken detached part {} due to a timeout for broken detached parts", old_name); + LOG_WARNING(log, "Removed broken detached part {} due to a timeout for broken detached parts", old_name); old_name.clear(); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 792843cbe18..07f46c07466 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -218,6 +218,9 @@ void ReplicatedMergeTreeQueue::createLogEntriesToFetchBrokenParts() for (const auto & broken_part_name : broken_parts) storage.removePartAndEnqueueFetch(broken_part_name, /* storage_init = */true); + Strings parts_in_zk = storage.getZooKeeper()->getChildren(replica_path + "/parts"); + storage.paranoidCheckForCoveredPartsInZooKeeperOnStart(parts_in_zk, {}); + std::lock_guard lock(state_mutex); /// broken_parts_to_enqueue_fetches_on_loading can be assigned only once on table startup, /// so actually no race conditions are possible diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e8176ac1d5f..2da18f69baf 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1260,8 +1260,7 @@ static time_t tryGetPartCreateTime(zkutil::ZooKeeperPtr & zookeeper, const Strin return res; } -static void paranoidCheckForCoveredPartsInZooKeeperOnStart(const StorageReplicatedMergeTree * storage, const Strings & parts_in_zk, - MergeTreeDataFormatVersion format_version, Poco::Logger * log) +void StorageReplicatedMergeTree::paranoidCheckForCoveredPartsInZooKeeperOnStart(const Strings & parts_in_zk, const Strings & parts_to_fetch) const { #ifdef ABORT_ON_LOGICAL_ERROR constexpr bool paranoid_check_for_covered_parts_default = true; @@ -1275,15 +1274,15 @@ static void paranoidCheckForCoveredPartsInZooKeeperOnStart(const StorageReplicat return; /// FIXME https://github.com/ClickHouse/ClickHouse/issues/51182 - if (storage->getSettings()->use_metadata_cache) + if (getSettings()->use_metadata_cache) return; ActiveDataPartSet active_set(format_version); for (const auto & part_name : parts_in_zk) active_set.add(part_name); - const auto disks = storage->getStoragePolicy()->getDisks(); - auto path = storage->getRelativeDataPath(); + const auto disks = getStoragePolicy()->getDisks(); + auto path = getRelativeDataPath(); for (const auto & part_name : parts_in_zk) { @@ -1296,6 +1295,9 @@ static void paranoidCheckForCoveredPartsInZooKeeperOnStart(const StorageReplicat if (disk->exists(fs::path(path) / part_name)) found = true; + if (!found) + found = std::find(parts_to_fetch.begin(), parts_to_fetch.end(), part_name) != parts_to_fetch.end(); + if (!found) { LOG_WARNING(log, "Part {} exists in ZooKeeper and covered by another part in ZooKeeper ({}), but doesn't exist on any disk. " @@ -1310,7 +1312,6 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) auto zookeeper = getZooKeeper(); Strings expected_parts_vec = zookeeper->getChildren(fs::path(replica_path) / "parts"); - paranoidCheckForCoveredPartsInZooKeeperOnStart(this, expected_parts_vec, format_version, log); /// Parts in ZK. NameSet expected_parts(expected_parts_vec.begin(), expected_parts_vec.end()); @@ -1345,6 +1346,8 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) if (!getActiveContainingPart(missing_name)) parts_to_fetch.push_back(missing_name); + paranoidCheckForCoveredPartsInZooKeeperOnStart(expected_parts_vec, parts_to_fetch); + /** To check the adequacy, for the parts that are in the FS, but not in ZK, we will only consider not the most recent parts. * Because unexpected new parts usually arise only because they did not have time to enroll in ZK with a rough restart of the server. * It also occurs from deduplicated parts that did not have time to retire. diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index bdd3f0da5bf..72a022fce26 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -584,6 +584,8 @@ private: void forcefullyRemoveBrokenOutdatedPartFromZooKeeperBeforeDetaching(const String & part_name) override; + void paranoidCheckForCoveredPartsInZooKeeperOnStart(const Strings & parts_in_zk, const Strings & parts_to_fetch) const; + /// Removes a part from ZooKeeper and adds a task to the queue to download it. It is supposed to do this with broken parts. void removePartAndEnqueueFetch(const String & part_name, bool storage_init); diff --git a/tests/queries/0_stateless/02254_projection_broken_part.sh b/tests/queries/0_stateless/02254_projection_broken_part.sh index d276c67f8de..6ba5093f234 100755 --- a/tests/queries/0_stateless/02254_projection_broken_part.sh +++ b/tests/queries/0_stateless/02254_projection_broken_part.sh @@ -9,11 +9,11 @@ $CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" $CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" $CLICKHOUSE_CLIENT -q "create table projection_broken_parts_1 (a int, b int, projection ab (select a, sum(b) group by a)) - engine = ReplicatedMergeTree('/test/02369/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r1') + engine = ReplicatedMergeTree('/test/02254/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r1') order by a settings index_granularity = 1;" $CLICKHOUSE_CLIENT -q "create table projection_broken_parts_2 (a int, b int, projection ab (select a, sum(b) group by a)) - engine = ReplicatedMergeTree('/test/02369/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r2') + engine = ReplicatedMergeTree('/test/02254/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r2') order by a settings index_granularity = 1;" $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into projection_broken_parts_1 values (1, 1), (1, 2), (1, 3);" diff --git a/tests/queries/0_stateless/02255_broken_parts_chain_on_start.reference b/tests/queries/0_stateless/02255_broken_parts_chain_on_start.reference new file mode 100644 index 00000000000..d55cb5baf93 --- /dev/null +++ b/tests/queries/0_stateless/02255_broken_parts_chain_on_start.reference @@ -0,0 +1,8 @@ +1 1 10 all_0_0_0_1 +1 1 20 all_0_0_0_1 +1 1 30 all_0_0_0_1 +0 +0 +1 1 10 all_0_0_0_1 +1 1 20 all_0_0_0_1 +1 1 30 all_0_0_0_1 diff --git a/tests/queries/0_stateless/02255_broken_parts_chain_on_start.sh b/tests/queries/0_stateless/02255_broken_parts_chain_on_start.sh new file mode 100755 index 00000000000..de260937b9c --- /dev/null +++ b/tests/queries/0_stateless/02255_broken_parts_chain_on_start.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# Tags: long, zookeeper + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists rmt1 sync;" +$CLICKHOUSE_CLIENT -q "drop table if exists rmt2 sync;" + +$CLICKHOUSE_CLIENT -q "create table rmt1 (a int, b int) + engine = ReplicatedMergeTree('/test/02255/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r1') order by a settings old_parts_lifetime=100500;" + +$CLICKHOUSE_CLIENT -q "create table rmt2 (a int, b int) + engine = ReplicatedMergeTree('/test/02255/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r2') order by a settings old_parts_lifetime=100500;" + +$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into rmt1 values (1, 1), (1, 2), (1, 3);" +$CLICKHOUSE_CLIENT -q "alter table rmt1 update b = b*10 where 1 settings mutations_sync=1" +$CLICKHOUSE_CLIENT -q "system sync replica rmt2;" +$CLICKHOUSE_CLIENT -q "select 1, *, _part from rmt2 order by b;" + +path=$($CLICKHOUSE_CLIENT -q "select path from system.parts where database='$CLICKHOUSE_DATABASE' and table='rmt1' and name='all_0_0_0'") +# ensure that path is absolute before removing +$CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path')" || exit +rm -f "$path/data.bin" + +path=$($CLICKHOUSE_CLIENT -q "select path from system.parts where database='$CLICKHOUSE_DATABASE' and table='rmt1' and name='all_0_0_0_1'") +# ensure that path is absolute before removing +$CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path')" || exit +rm -f "$path/data.bin" + +$CLICKHOUSE_CLIENT -q "detach table rmt1 sync" +$CLICKHOUSE_CLIENT -q "attach table rmt1" 2>/dev/null + +$CLICKHOUSE_CLIENT -q "system sync replica rmt1;" +$CLICKHOUSE_CLIENT -q "select 1, *, _part from rmt1 order by b;" + +$CLICKHOUSE_CLIENT -q "truncate table rmt1" + +$CLICKHOUSE_CLIENT -q "SELECT table, lost_part_count FROM system.replicas WHERE database=currentDatabase() AND lost_part_count!=0"; + +$CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" +$CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" From 9c12994d942e48c112e9392738c561582f10bb0a Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 6 Jul 2023 19:49:22 +0200 Subject: [PATCH 1299/1997] initialize SeriesRecords for LogSeriesLimiter lazy --- src/Common/LoggingFormatStringHelpers.cpp | 29 +++++++++++------------ src/Common/LoggingFormatStringHelpers.h | 12 ++++++---- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/Common/LoggingFormatStringHelpers.cpp b/src/Common/LoggingFormatStringHelpers.cpp index ed578018d5f..074c8dd2803 100644 --- a/src/Common/LoggingFormatStringHelpers.cpp +++ b/src/Common/LoggingFormatStringHelpers.cpp @@ -77,9 +77,8 @@ void LogFrequencyLimiterIml::cleanup(time_t too_old_threshold_s) } -std::unordered_map> LogSeriesLimiter::series_settings; -std::unordered_map> LogSeriesLimiter::series_loggers; std::mutex LogSeriesLimiter::mutex; +time_t LogSeriesLimiter::last_cleanup = 0; LogSeriesLimiter::LogSeriesLimiter(Poco::Logger * logger_, size_t allowed_count_, time_t interval_s_) : logger(logger_) @@ -101,33 +100,33 @@ LogSeriesLimiter::LogSeriesLimiter(Poco::Logger * logger_, size_t allowed_count_ std::lock_guard lock(mutex); - if (series_settings.contains(name_hash)) + if (last_cleanup == 0) + last_cleanup = now; + + auto & series_records = getSeriesRecords(); + + static const time_t cleanup_delay_s = 600; + if (last_cleanup + cleanup_delay_s >= now) { - auto & settings = series_settings[name_hash]; - auto & [allowed_count, interval_s] = settings; - chassert(allowed_count_ == allowed_count); - chassert(interval_s_ == interval_s); - } - else - { - series_settings[name_hash] = std::make_tuple(allowed_count_, interval_s_); + time_t old = now - cleanup_delay_s; + std::erase_if(series_records, [old](const auto & elem) { return get<0>(elem.second) < old; }); + last_cleanup = now; } auto register_as_first = [&] () TSA_REQUIRES(mutex) { assert(allowed_count_ > 0); accepted = true; - series_loggers[name_hash] = std::make_tuple(now, 1, 1); + series_records[name_hash] = std::make_tuple(now, 1, 1); }; - - if (!series_loggers.contains(name_hash)) + if (!series_records.contains(name_hash)) { register_as_first(); return; } - auto & [last_time, accepted_count, total_count] = series_loggers[name_hash]; + auto & [last_time, accepted_count, total_count] = series_records[name_hash]; if (last_time + interval_s_ <= now) { debug_message = fmt::format( diff --git a/src/Common/LoggingFormatStringHelpers.h b/src/Common/LoggingFormatStringHelpers.h index 82c260e52a6..3afa3fb089d 100644 --- a/src/Common/LoggingFormatStringHelpers.h +++ b/src/Common/LoggingFormatStringHelpers.h @@ -199,12 +199,16 @@ public: class LogSeriesLimiter { static std::mutex mutex; - - /// Hash(logger_name) -> (allowed_count, interval_s) - static std::unordered_map> series_settings TSA_GUARDED_BY(mutex); + static time_t last_cleanup; /// Hash(logger_name) -> (last_logged_time_s, accepted, muted) - static std::unordered_map> series_loggers TSA_GUARDED_BY(mutex); + using SeriesRecords = std::unordered_map>; + + static SeriesRecords & getSeriesRecords() TSA_REQUIRES(mutex) + { + static SeriesRecords records; + return records; + } Poco::Logger * logger = nullptr; bool accepted = false; From ec5e26a017c39eb4d76a1b07e4083cc53a225a5d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 6 Jul 2023 21:08:53 +0200 Subject: [PATCH 1300/1997] Pin rust nightly (to make it stable) Because of using Rust nightly, and without #49601 the Rust toolchain is very unstable, and can be frequently failed. So let's ping particular version. Also I've looked and it seems that Rust archives stores this archive without any TTL, since there is even a version for 2015 year. Follow-up for: #50541 Signed-off-by: Azat Khuzhin --- docker/packager/binary/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index e824161a688..897bcd24d04 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -49,8 +49,8 @@ ENV CARGO_HOME=/rust/cargo ENV PATH="/rust/cargo/bin:${PATH}" RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \ chmod 777 -R /rust && \ - rustup toolchain install nightly && \ - rustup default nightly && \ + rustup toolchain install nightly-2023-07-04 && \ + rustup default nightly-2023-07-04 && \ rustup component add rust-src && \ rustup target add aarch64-unknown-linux-gnu && \ rustup target add x86_64-apple-darwin && \ From 7255c35edcefe03a39ad7bcf460d9dca5670ca3b Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 6 Jul 2023 19:43:37 +0000 Subject: [PATCH 1301/1997] Add more tests --- .../functions/string-search-functions.md | 50 +++++++++++++ .../functions/string-search-functions.md | 52 +++++++++++++ src/Functions/HasSubsequenceImpl.h | 74 ++++++++++++------- src/Functions/hasSubsequence.cpp | 2 +- .../hasSubsequenceCaseInsensitive.cpp | 2 +- .../hasSubsequenceCaseInsensitiveUTF8.cpp | 28 +++++++ src/Functions/hasSubsequenceUTF8.cpp | 29 ++++++++ .../02809_has_subsequence.reference | 13 +++- .../0_stateless/02809_has_subsequence.sql | 20 ++++- 9 files changed, 237 insertions(+), 33 deletions(-) create mode 100644 src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp create mode 100644 src/Functions/hasSubsequenceUTF8.cpp diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 3d8f89f7295..04ad6474310 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -631,3 +631,53 @@ Result: │ 100 │ 200 │ 100-200 │ 100 │ └──────────────────────────────────────────────┴──────────────────────────────────────────────┴──────────────────────────────────────────────┴───────────────────────────────────────────┘ ``` + +## hasSubsequence + +Returns 1 if needle is a subsequence of haystack, or 0 otherwise. +A subsequence of a string is a sequence that can be derived from the given string by deleting zero or more elements without changing the order of the remaining elements. + + +**Syntax** + +``` sql +hasSubsequence(haystack, needle) +``` + +**Arguments** + +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). + +**Returned values** + +- 1, if needle is a subsequence of haystack. +- 0, otherwise. + +Type: `UInt8`. + +**Examples** + +``` sql +SELECT hasSubsequence('garbage', 'arg') ; +``` + +Result: + +``` text +┌─hasSubsequence('garbage', 'arg')─┐ +│ 1 │ +└──────────────────────────────────┘ +``` + +## hasSubsequenceCaseInsensitive + +Like [hasSubsequence](#hasSubsequence) but searches case-insensitively. + +## hasSubsequenceUTF8 + +Like [hasSubsequence](#hasSubsequence) but assumes `haystack` and `needle` are UTF-8 encoded strings. + +## hasSubsequenceCaseInsensitiveUTF8 + +Like [hasSubsequenceUTF8](#hasSubsequenceUTF8) but searches case-insensitively. \ No newline at end of file diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md index ea4f90d4f66..21989e882b6 100644 --- a/docs/ru/sql-reference/functions/string-search-functions.md +++ b/docs/ru/sql-reference/functions/string-search-functions.md @@ -801,3 +801,55 @@ SELECT countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв'); │ 3 │ └────────────────────────────────────────────────────────────┘ ``` + +## hasSubsequence(haystack, needle) {#hasSubsequence} + +Возвращает 1 если needle является подпоследовательностью haystack, иначе 0. + + +**Синтаксис** + +``` sql +hasSubsequence(haystack, needle) +``` + +**Аргументы** + +- `haystack` — строка, по которой выполняется поиск. [Строка](../syntax.md#syntax-string-literal). +- `needle` — подстрока, которую необходимо найти. [Строка](../syntax.md#syntax-string-literal). + +**Возвращаемые значения** + +- 1, если +- 0, если подстрока не найдена. + +Тип: `UInt8`. + +**Примеры** + +Запрос: + +``` sql +SELECT hasSubsequence('garbage', 'arg') ; +``` + +Результат: + +``` text +┌─hasSubsequence('garbage', 'arg')─┐ +│ 1 │ +└──────────────────────────────────┘ +``` + + +## hasSubsequenceCaseInsensitive + +Такая же, как и [hasSubsequence](#hasSubsequence), но работает без учета регистра. + +## hasSubsequenceUTF8 + +Такая же, как и [hasSubsequence](#hasSubsequence) при допущении что `haystack` и `needle` содержат набор кодовых точек, представляющий текст в кодировке UTF-8. + +## hasSubsequenceCaseInsensitiveUTF8 + +Такая же, как и [hasSubsequenceUTF8](#hasSubsequenceUTF8), но работает без учета регистра. diff --git a/src/Functions/HasSubsequenceImpl.h b/src/Functions/HasSubsequenceImpl.h index 3a29ef68b0b..bcb8e8e99e6 100644 --- a/src/Functions/HasSubsequenceImpl.h +++ b/src/Functions/HasSubsequenceImpl.h @@ -1,11 +1,8 @@ #pragma once - - namespace DB { namespace { - template struct HasSubsequenceImpl { @@ -17,23 +14,31 @@ struct HasSubsequenceImpl static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {};} - /// Find one substring in many strings. static void vectorConstant( - const ColumnString::Chars & /*haystack_data*/, - const ColumnString::Offsets & /*haystack_offsets*/, - const std::string & /*needle*/, + const ColumnString::Chars & haystack_data, + const ColumnString::Offsets & haystack_offsets, + const String & needle, const ColumnPtr & /*start_pos*/, PaddedPODArray & res, [[maybe_unused]] ColumnUInt8 * /*res_null*/) { - size_t size = res.size(); - for (size_t i = 0; i < size; ++i) + if (needle.empty()) { - res[i] = 0; + for (auto & r : res) + r = 1; + return; + } + + ColumnString::Offset prev_haystack_offset = 0; + for (size_t i = 0; i < haystack_offsets.size(); ++i) + { + size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1; + const char * haystack = reinterpret_cast(&haystack_data[prev_haystack_offset]); + res[i] = hasSubsequence(haystack, haystack_size, needle.c_str(), needle.size()); + prev_haystack_offset = haystack_offsets[i]; } } - /// Search each time for a different single substring inside each time different string. static void vectorVector( const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, @@ -61,7 +66,7 @@ struct HasSubsequenceImpl { const char * needle = reinterpret_cast(&needle_data[prev_needle_offset]); const char * haystack = reinterpret_cast(&haystack_data[prev_haystack_offset]); - res[i] = impl(haystack, haystack_size, needle, needle_size); + res[i] = hasSubsequence(haystack, haystack_size, needle, needle_size); } prev_haystack_offset = haystack_offsets[i]; @@ -69,35 +74,38 @@ struct HasSubsequenceImpl } } - /// Find many substrings in single string. static void constantVector( - const String & /*haystack*/, - const ColumnString::Chars & /*needle_data*/, + const String & haystack, + const ColumnString::Chars & needle_data, const ColumnString::Offsets & needle_offsets, const ColumnPtr & /*start_pos*/, PaddedPODArray & res, ColumnUInt8 * /*res_null*/) { + ColumnString::Offset prev_needle_offset = 0; + size_t size = needle_offsets.size(); for (size_t i = 0; i < size; ++i) { - res[i] = 0; + size_t needle_size = needle_offsets[i] - prev_needle_offset - 1; + + if (0 == needle_size) + { + res[i] = 1; + } + else + { + const char * needle = reinterpret_cast(&needle_data[prev_needle_offset]); + res[i] = hasSubsequence(haystack.c_str(), haystack.size(), needle, needle_size); + } + prev_needle_offset = needle_offsets[i]; } } - static UInt8 impl(const char * haystack, size_t haystack_size, const char * needle, size_t needle_size) - { - size_t j = 0; - for (size_t i = 0; (i < haystack_size) && (j < needle_size); i++) - if (needle[j] == haystack[i]) - ++j; - return j == needle_size; - } - static void constantConstant( - std::string haystack, - std::string needle, + String haystack, + String needle, const ColumnPtr & /*start_pos*/, PaddedPODArray & res, ColumnUInt8 * /*res_null*/) @@ -106,13 +114,23 @@ struct HasSubsequenceImpl Impl::toLowerIfNeed(haystack); Impl::toLowerIfNeed(needle); - UInt8 result = impl(haystack.c_str(), haystack.size(), needle.c_str(), needle.size()); + UInt8 result = hasSubsequence(haystack.c_str(), haystack.size(), needle.c_str(), needle.size()); for (size_t i = 0; i < size; ++i) { res[i] = result; } } + + static UInt8 hasSubsequence(const char * haystack, size_t haystack_size, const char * needle, size_t needle_size) + { + size_t j = 0; + for (size_t i = 0; (i < haystack_size) && (j < needle_size); i++) + if (needle[j] == haystack[i]) + ++j; + return j == needle_size; + } + template static void vectorFixedConstant(Args &&...) { diff --git a/src/Functions/hasSubsequence.cpp b/src/Functions/hasSubsequence.cpp index da2aaddcf50..bb1f295cee4 100644 --- a/src/Functions/hasSubsequence.cpp +++ b/src/Functions/hasSubsequence.cpp @@ -10,7 +10,7 @@ namespace struct HasSubsequenceCaseSensitiveASCII { - static void toLowerIfNeed(std::string & /*s*/) { } + static void toLowerIfNeed(String & /*s*/) { } }; struct NameHasSubsequence diff --git a/src/Functions/hasSubsequenceCaseInsensitive.cpp b/src/Functions/hasSubsequenceCaseInsensitive.cpp index f5c13a7cf8c..fe50ada9be9 100644 --- a/src/Functions/hasSubsequenceCaseInsensitive.cpp +++ b/src/Functions/hasSubsequenceCaseInsensitive.cpp @@ -9,7 +9,7 @@ namespace struct HasSubsequenceCaseInsensitiveASCII { - static void toLowerIfNeed(std::string & s) { std::transform(std::begin(s), std::end(s), std::begin(s), tolower); } + static void toLowerIfNeed(String & s) { std::transform(std::begin(s), std::end(s), std::begin(s), tolower); } }; struct NameHasSubsequenceCaseInsensitive diff --git a/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp b/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp new file mode 100644 index 00000000000..2908c284a25 --- /dev/null +++ b/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp @@ -0,0 +1,28 @@ +#include +#include +#include + +namespace DB +{ +namespace +{ + +struct HasSubsequenceCaseInsensitiveUTF8 +{ + static void toLowerIfNeed(String & s) { std::transform(std::begin(s), std::end(s), std::begin(s), tolower); } +}; + +struct NameHasSubsequenceCaseInsensitiveUTF8 +{ + static constexpr auto name = "hasSubsequenceCaseInsensitiveUTF8"; +}; + +using FunctionHasSubsequenceCaseInsensitiveUTF8 = FunctionsStringSearch>; +} + +REGISTER_FUNCTION(hasSubsequenceCaseInsensitiveUTF8) +{ + factory.registerFunction({}, FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/hasSubsequenceUTF8.cpp b/src/Functions/hasSubsequenceUTF8.cpp new file mode 100644 index 00000000000..c0811de6575 --- /dev/null +++ b/src/Functions/hasSubsequenceUTF8.cpp @@ -0,0 +1,29 @@ +#include +#include +#include + + +namespace DB +{ +namespace +{ + +struct HasSubsequenceCaseSensitiveUTF8 +{ + static void toLowerIfNeed(String & /*s*/) { } +}; + +struct NameHasSubsequenceUTF8 +{ + static constexpr auto name = "hasSubsequenceUTF8"; +}; + +using FunctionHasSubsequenceUTF8 = FunctionsStringSearch>; +} + +REGISTER_FUNCTION(hasSubsequenceUTF8) +{ + factory.registerFunction({}, FunctionFactory::CaseInsensitive); +} + +} diff --git a/tests/queries/0_stateless/02809_has_subsequence.reference b/tests/queries/0_stateless/02809_has_subsequence.reference index 827caa105d0..d12c0ba9fb3 100644 --- a/tests/queries/0_stateless/02809_has_subsequence.reference +++ b/tests/queries/0_stateless/02809_has_subsequence.reference @@ -1,3 +1,4 @@ +hasSubsequence / const / const 1 1 1 @@ -10,7 +11,17 @@ 0 0 0 +hasSubsequence / const / string 1 1 +0 +hasSubsequence / string / const +1 +1 +0 +hasSubsequence / string / string +1 +1 +0 +hasSubsequenceCaseInsensitive / const / const 1 -0 \ No newline at end of file diff --git a/tests/queries/0_stateless/02809_has_subsequence.sql b/tests/queries/0_stateless/02809_has_subsequence.sql index 63ffb49dc54..64f3fd8dc77 100644 --- a/tests/queries/0_stateless/02809_has_subsequence.sql +++ b/tests/queries/0_stateless/02809_has_subsequence.sql @@ -1,3 +1,4 @@ +select 'hasSubsequence / const / const'; select hasSubsequence('garbage', ''); select hasSubsequence('garbage', 'g'); select hasSubsequence('garbage', 'a'); @@ -12,8 +13,23 @@ select hasSubsequence('garbage', 'garbage1'); select hasSubsequence('garbage', 'arbw'); select hasSubsequence('garbage', 'ARG'); -select hasSubsequenceCaseInsensitive('garbage', 'ARG'); +select 'hasSubsequence / const / string'; +select hasSubsequence('garbage', materialize('')); +select hasSubsequence('garbage', materialize('arg')); +select hasSubsequence('garbage', materialize('arbw')); + +select 'hasSubsequence / string / const'; +select hasSubsequence(materialize('garbage'), ''); +select hasSubsequence(materialize('garbage'), 'arg'); +select hasSubsequence(materialize('garbage'), 'arbw'); + +select 'hasSubsequence / string / string'; select hasSubsequence(materialize('garbage'), materialize('')); select hasSubsequence(materialize('garbage'), materialize('arg')); -select hasSubsequence(materialize('garbage'), materialize('garbage1')); \ No newline at end of file +select hasSubsequence(materialize('garbage'), materialize('garbage1')); + +select 'hasSubsequenceCaseInsensitive / const / const'; + +select hasSubsequenceCaseInsensitive('garbage', 'ARG'); + From c1fa38ea8ed98123a780f2a35c41b8eaf85e2ec0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 22:22:36 +0200 Subject: [PATCH 1302/1997] Add RISC-V 64 to the universal installer --- docs/_includes/install/universal.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh index 1699be138c8..5d4571aed9e 100755 --- a/docs/_includes/install/universal.sh +++ b/docs/_includes/install/universal.sh @@ -33,6 +33,9 @@ then elif [ "${ARCH}" = "powerpc64le" -o "${ARCH}" = "ppc64le" ] then DIR="powerpc64le" + elif [ "${ARCH}" = "riscv64" ] + then + DIR="riscv64" fi elif [ "${OS}" = "FreeBSD" ] then From 271297823ae6abe82908220d1a540fbf0113f4d8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 6 Jul 2023 14:56:05 +0000 Subject: [PATCH 1303/1997] Allow var-int encoded 64-bit integers with MSB=1 Resolves: #51486 Until now, it was illegal to encode 64-bit (unsigned) integers with MSB=1, i.e. values > (1ULL<<63) - 1, as var-int. In more detail, the var-int code used by ClickHouse server and client spent at most 9 bytes per value such that 9 * 7 = 63 bits could be encoded. Some 3rd party clients (e.g. Rust clickhouse-rs) had the same limitation, whereas other clients understand the full range (Python clickhouse-driver). PRs #47608 and #48628 added sanity checks as asserts or exceptions during var-int encoding on the server side. This was considered okay as such huge integers so far occurred only during testing (usually fuzzing) but not in practice. Issue #51486 is a new fuzzing issue where the exception thrown from the sanity check led to a half-baked progress packet and as a result, a logical error / server crash. The only fix which is not another bandaid is to allow the full range in var-int coding. Clients will have to allow the full range too, a note will be added to the changelog. (the alternative was to create another protocol version but as var-int is used all over the place this was considered infeasible) Review note: this is the relevant commit. --- src/IO/VarInt.cpp | 9 --- src/IO/VarInt.h | 65 ++++++++----------- src/Server/TCPHandler.cpp | 7 +- .../0_stateless/02812_large_varints.reference | 0 .../0_stateless/02812_large_varints.sql | 4 ++ 5 files changed, 34 insertions(+), 51 deletions(-) create mode 100644 tests/queries/0_stateless/02812_large_varints.reference create mode 100644 tests/queries/0_stateless/02812_large_varints.sql diff --git a/src/IO/VarInt.cpp b/src/IO/VarInt.cpp index ca4b95fcb60..a4b249b01d7 100644 --- a/src/IO/VarInt.cpp +++ b/src/IO/VarInt.cpp @@ -6,7 +6,6 @@ namespace DB namespace ErrorCodes { extern const int ATTEMPT_TO_READ_AFTER_EOF; - extern const int BAD_ARGUMENTS; } void throwReadAfterEOF() @@ -14,12 +13,4 @@ void throwReadAfterEOF() throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after eof"); } -void throwValueTooLargeForVarIntEncoding(UInt64 x) -{ - /// Under practical circumstances, we should virtually never end up here but AST Fuzzer manages to create superlarge input integers - /// which trigger this exception. Intentionally not throwing LOGICAL_ERROR or calling abort() or [ch]assert(false), so AST Fuzzer - /// can swallow the exception and continue to run. - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Value {} is too large for VarInt encoding", x); -} - } diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index 9099b5e7f6a..2a2743e3407 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -13,73 +13,59 @@ namespace DB /// Variable-Length Quantity (VLQ) Base-128 compression, also known as Variable Byte (VB) or Varint encoding. [[noreturn]] void throwReadAfterEOF(); -[[noreturn]] void throwValueTooLargeForVarIntEncoding(UInt64 x); -/// NOTE: Due to historical reasons, only values up to 1<<63-1 can be safely encoded/decoded (bigger values are not idempotent under -/// encoding/decoding). This cannot be changed without breaking backward compatibility (some drivers, e.g. clickhouse-rs (Rust), have the -/// same limitation, others support the full 1<<64 range, e.g. clickhouse-driver (Python)) -constexpr UInt64 VAR_UINT_MAX = (1ULL<<63) - 1; - inline void writeVarUInt(UInt64 x, WriteBuffer & ostr) { - if (x > VAR_UINT_MAX) [[unlikely]] - throwValueTooLargeForVarIntEncoding(x); - - for (size_t i = 0; i < 9; ++i) + while (x > 0x7F) { - uint8_t byte = x & 0x7F; - if (x > 0x7F) - byte |= 0x80; + uint8_t byte = 0x80 | (x & 0x7F); ostr.nextIfAtEnd(); *ostr.position() = byte; ++ostr.position(); x >>= 7; - if (!x) - return; } + + uint8_t final_byte = static_cast(x); + + ostr.nextIfAtEnd(); + *ostr.position() = final_byte; + ++ostr.position(); } inline void writeVarUInt(UInt64 x, std::ostream & ostr) { - if (x > VAR_UINT_MAX) [[unlikely]] - throwValueTooLargeForVarIntEncoding(x); - - for (size_t i = 0; i < 9; ++i) + while (x > 0x7F) { - uint8_t byte = x & 0x7F; - if (x > 0x7F) - byte |= 0x80; - + uint8_t byte = 0x80 | (x & 0x7F); ostr.put(byte); x >>= 7; - if (!x) - return; } + + uint8_t final_byte = static_cast(x); + ostr.put(final_byte); } inline char * writeVarUInt(UInt64 x, char * ostr) { - if (x > VAR_UINT_MAX) [[unlikely]] - throwValueTooLargeForVarIntEncoding(x); - - for (size_t i = 0; i < 9; ++i) + while (x > 0x7F) { - uint8_t byte = x & 0x7F; - if (x > 0x7F) - byte |= 0x80; + uint8_t byte = 0x80 | (x & 0x7F); *ostr = byte; ++ostr; x >>= 7; - if (!x) - return ostr; } + uint8_t final_byte = static_cast(x); + + *ostr = final_byte; + ++ostr; + return ostr; } @@ -101,7 +87,7 @@ template inline void readVarUInt(UInt64 & x, ReadBuffer & istr) { x = 0; - for (size_t i = 0; i < 9; ++i) + for (size_t i = 0; i < 10; ++i) { if constexpr (check_eof) if (istr.eof()) [[unlikely]] @@ -120,7 +106,7 @@ inline void readVarUInt(UInt64 & x, ReadBuffer & istr) inline void readVarUInt(UInt64 & x, ReadBuffer & istr) { - if (istr.buffer().end() - istr.position() >= 9) + if (istr.buffer().end() - istr.position() >= 10) return impl::readVarUInt(x, istr); return impl::readVarUInt(x, istr); } @@ -128,7 +114,7 @@ inline void readVarUInt(UInt64 & x, ReadBuffer & istr) inline void readVarUInt(UInt64 & x, std::istream & istr) { x = 0; - for (size_t i = 0; i < 9; ++i) + for (size_t i = 0; i < 10; ++i) { UInt64 byte = istr.get(); x |= (byte & 0x7F) << (7 * i); @@ -143,7 +129,7 @@ inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size) const char * end = istr + size; x = 0; - for (size_t i = 0; i < 9; ++i) + for (size_t i = 0; i < 10; ++i) { if (istr == end) [[unlikely]] throwReadAfterEOF(); @@ -220,7 +206,8 @@ inline size_t getLengthOfVarUInt(UInt64 x) : (x < (1ULL << 42) ? 6 : (x < (1ULL << 49) ? 7 : (x < (1ULL << 56) ? 8 - : 9))))))); + : (x < (1ULL << 63) ? 9 + : 10)))))))); } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 4d9fb47c893..36566832ebc 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1905,17 +1905,18 @@ void TCPHandler::sendData(const Block & block) { initBlockOutput(block); - auto prev_bytes_written_out = out->count(); - auto prev_bytes_written_compressed_out = state.maybe_compressed_out->count(); + size_t prev_bytes_written_out = out->count(); + size_t prev_bytes_written_compressed_out = state.maybe_compressed_out->count(); try { /// For testing hedged requests if (unknown_packet_in_send_data) { + constexpr UInt64 marker = (1ULL<<63) - 1; --unknown_packet_in_send_data; if (unknown_packet_in_send_data == 0) - writeVarUInt(VAR_UINT_MAX, *out); + writeVarUInt(marker, *out); } writeVarUInt(Protocol::Server::Data, *out); diff --git a/tests/queries/0_stateless/02812_large_varints.reference b/tests/queries/0_stateless/02812_large_varints.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02812_large_varints.sql b/tests/queries/0_stateless/02812_large_varints.sql new file mode 100644 index 00000000000..cfbebb7292e --- /dev/null +++ b/tests/queries/0_stateless/02812_large_varints.sql @@ -0,0 +1,4 @@ +-- 64-bit integers with MSB set (i.e. values > (1ULL<<63) - 1) could for historical/compat reasons not be serialized as var-ints (issue #51486). +-- These two queries internally produce such big values, run them to be sure no bad things happen. +SELECT topKWeightedState(65535)(now(), -2) FORMAT Null; +SELECT number FROM numbers(toUInt64(-1)) limit 10 Format Null; From c35294317dbff31b8ff8b48f6256162d6d5dc02e Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 6 Jul 2023 15:06:54 +0000 Subject: [PATCH 1304/1997] Remove parts in order for object storage always --- src/Storages/MergeTree/MergeTreeData.cpp | 34 +++++++++++++++--------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index fa9bfd38a23..0ef71895999 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2137,20 +2137,20 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) /// Please don't use "zero-copy replication" (a non-production feature) in production. /// It is not ready for production usage. Don't use it. - bool need_remove_parts_in_order = supportsReplication() && getSettings()->allow_remote_fs_zero_copy_replication; + /// It also is disabled for any object storage, because it can lead to race conditions on blob removal. + /// (see comment at `clearPartsFromFilesystemImpl`). + bool need_remove_parts_in_order = false; - if (need_remove_parts_in_order) + if (supportsReplication()) { - bool has_zero_copy_disk = false; for (const auto & disk : getDisks()) { - if (disk->supportZeroCopyReplication()) + if (disk->isRemote()) { - has_zero_copy_disk = true; + need_remove_parts_in_order = true; break; } } - need_remove_parts_in_order = has_zero_copy_disk; } std::vector parts_to_delete; @@ -2394,18 +2394,28 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t std::mutex part_names_mutex; auto runner = threadPoolCallbackRunner(getPartsCleaningThreadPool().get(), "PartsCleaning"); - /// This flag disallow straightforward concurrent parts removal. It's required only in case - /// when we have parts on zero-copy disk + at least some of them were mutated. + /** Straightforward concurrent parts removal can be applied for the case + * when we have parts on object storage disk + at least some of them were mutated + * (thus, can contains hardlinks to files in the previous parts). + * If we are deleting parts that contains hardlinks to the same file we may face into race condition + * and delete only local metadata files, but not the blobs on object storage. + * Given that, we remove in parallel only "independent" parts that don't have such hardlinks. + * Note that it also may be applicable for the regular MergeTree, fixed only for Replicated. + * + * To avoid this we need to fix race conditions on parts and blob removal. + */ bool remove_parts_in_order = false; - if (settings->allow_remote_fs_zero_copy_replication && dynamic_cast(this) != nullptr) + if (dynamic_cast(this) != nullptr) { remove_parts_in_order = std::any_of( parts_to_remove.begin(), parts_to_remove.end(), - [] (const auto & data_part) { return data_part->isStoredOnRemoteDiskWithZeroCopySupport() && data_part->info.getMutationVersion() > 0; } + [] (const auto & data_part) + { + return data_part->isStoredOnRemoteDisk() && data_part->info.getMutationVersion() > 0; + } ); } - if (!remove_parts_in_order) { /// NOTE: Under heavy system load you may get "Cannot schedule a task" from ThreadPool. @@ -2441,7 +2451,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t /// NOTE: Under heavy system load you may get "Cannot schedule a task" from ThreadPool. LOG_DEBUG( - log, "Removing {} parts from filesystem (concurrently): Parts: [{}]", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); + log, "Removing {} parts from filesystem (concurrently in order): Parts: [{}]", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); /// We have "zero copy replication" parts and we are going to remove them in parallel. /// The problem is that all parts in a mutation chain must be removed sequentially to avoid "key does not exits" issues. From 8b6376005a730b9ae461d3fe93a55e51cd494181 Mon Sep 17 00:00:00 2001 From: Mike Kot Date: Thu, 20 Apr 2023 13:26:02 +0000 Subject: [PATCH 1305/1997] "reconfig" support for CH Keeper --- base/base/find_symbols.h | 4 +- base/base/move_extend.h | 9 + contrib/NuRaft | 2 +- programs/keeper/CMakeLists.txt | 2 + src/Common/ProfileEvents.cpp | 2 + src/Common/ZooKeeper/IKeeper.cpp | 1 + src/Common/ZooKeeper/IKeeper.h | 41 +++- src/Common/ZooKeeper/TestKeeper.cpp | 56 ++++- src/Common/ZooKeeper/TestKeeper.h | 7 + src/Common/ZooKeeper/ZooKeeper.cpp | 32 ++- src/Common/ZooKeeper/ZooKeeperCommon.cpp | 44 +++- src/Common/ZooKeeper/ZooKeeperCommon.h | 29 +++ src/Common/ZooKeeper/ZooKeeperConstants.cpp | 50 +--- src/Common/ZooKeeper/ZooKeeperConstants.h | 2 +- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 28 ++- src/Common/ZooKeeper/ZooKeeperImpl.h | 7 + src/Coordination/KeeperConstants.h | 9 +- src/Coordination/KeeperContext.h | 10 +- src/Coordination/KeeperDispatcher.cpp | 103 +++++--- src/Coordination/KeeperDispatcher.h | 16 +- src/Coordination/KeeperReconfiguration.cpp | 92 +++++++ src/Coordination/KeeperReconfiguration.h | 10 + src/Coordination/KeeperServer.cpp | 232 ++++++++---------- src/Coordination/KeeperServer.h | 23 +- src/Coordination/KeeperStateMachine.cpp | 124 ++++++++-- src/Coordination/KeeperStateMachine.h | 16 +- src/Coordination/KeeperStateManager.cpp | 15 +- src/Coordination/KeeperStateManager.h | 32 +-- src/Coordination/KeeperStorage.cpp | 20 +- src/Coordination/RaftServerConfig.cpp | 96 ++++++++ src/Coordination/RaftServerConfig.h | 78 ++++++ src/Coordination/tests/gtest_coordination.cpp | 51 +++- src/Interpreters/ZooKeeperLog.cpp | 1 + src/Storages/DataLakes/HudiMetadataParser.cpp | 3 +- .../ReplicatedMergeTreeCleanupThread.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 6 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 8 +- src/Storages/StorageReplicatedMergeTree.cpp | 16 +- tests/integration/helpers/keeper_utils.py | 33 ++- .../test_keeper_nodes_move/test.py | 5 - .../test_keeper_reconfig_add/__init__.py | 0 .../configs/keeper1.xml | 20 ++ .../configs/keeper2.xml | 21 ++ .../configs/keeper3.xml | 22 ++ .../test_keeper_reconfig_add/test.py | 155 ++++++++++++ .../test_keeper_reconfig_remove/__init__.py | 0 .../configs/keeper1.xml | 37 +++ .../configs/keeper2.xml | 37 +++ .../configs/keeper3.xml | 37 +++ .../test_keeper_reconfig_remove/test.py | 145 +++++++++++ .../__init__.py | 0 .../configs/keeper1.xml | 47 ++++ .../configs/keeper2.xml | 47 ++++ .../configs/keeper3.xml | 47 ++++ .../configs/keeper4.xml | 47 ++++ .../configs/keeper5.xml | 47 ++++ .../test_keeper_reconfig_remove_many/test.py | 149 +++++++++++ .../__init__.py | 0 .../configs/keeper1.xml | 35 +++ .../configs/keeper2.xml | 35 +++ .../configs/keeper3.xml | 35 +++ .../configs/keeper4.xml | 21 ++ .../test.py | 127 ++++++++++ .../__init__.py | 0 .../configs/keeper1.xml | 35 +++ .../configs/keeper2.xml | 35 +++ .../configs/keeper3.xml | 35 +++ .../configs/keeper4.xml | 21 ++ .../test.py | 120 +++++++++ utils/keeper-data-dumper/main.cpp | 2 +- 70 files changed, 2309 insertions(+), 367 deletions(-) create mode 100644 base/base/move_extend.h create mode 100644 src/Coordination/KeeperReconfiguration.cpp create mode 100644 src/Coordination/KeeperReconfiguration.h create mode 100644 src/Coordination/RaftServerConfig.cpp create mode 100644 src/Coordination/RaftServerConfig.h create mode 100644 tests/integration/test_keeper_reconfig_add/__init__.py create mode 100644 tests/integration/test_keeper_reconfig_add/configs/keeper1.xml create mode 100644 tests/integration/test_keeper_reconfig_add/configs/keeper2.xml create mode 100644 tests/integration/test_keeper_reconfig_add/configs/keeper3.xml create mode 100644 tests/integration/test_keeper_reconfig_add/test.py create mode 100644 tests/integration/test_keeper_reconfig_remove/__init__.py create mode 100644 tests/integration/test_keeper_reconfig_remove/configs/keeper1.xml create mode 100644 tests/integration/test_keeper_reconfig_remove/configs/keeper2.xml create mode 100644 tests/integration/test_keeper_reconfig_remove/configs/keeper3.xml create mode 100644 tests/integration/test_keeper_reconfig_remove/test.py create mode 100644 tests/integration/test_keeper_reconfig_remove_many/__init__.py create mode 100644 tests/integration/test_keeper_reconfig_remove_many/configs/keeper1.xml create mode 100644 tests/integration/test_keeper_reconfig_remove_many/configs/keeper2.xml create mode 100644 tests/integration/test_keeper_reconfig_remove_many/configs/keeper3.xml create mode 100644 tests/integration/test_keeper_reconfig_remove_many/configs/keeper4.xml create mode 100644 tests/integration/test_keeper_reconfig_remove_many/configs/keeper5.xml create mode 100644 tests/integration/test_keeper_reconfig_remove_many/test.py create mode 100644 tests/integration/test_keeper_reconfig_replace_leader/__init__.py create mode 100644 tests/integration/test_keeper_reconfig_replace_leader/configs/keeper1.xml create mode 100644 tests/integration/test_keeper_reconfig_replace_leader/configs/keeper2.xml create mode 100644 tests/integration/test_keeper_reconfig_replace_leader/configs/keeper3.xml create mode 100644 tests/integration/test_keeper_reconfig_replace_leader/configs/keeper4.xml create mode 100644 tests/integration/test_keeper_reconfig_replace_leader/test.py create mode 100644 tests/integration/test_keeper_reconfig_replace_leader_in_one_command/__init__.py create mode 100644 tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper1.xml create mode 100644 tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper2.xml create mode 100644 tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper3.xml create mode 100644 tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper4.xml create mode 100644 tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py diff --git a/base/base/find_symbols.h b/base/base/find_symbols.h index 83232669c04..f7d24ccfc11 100644 --- a/base/base/find_symbols.h +++ b/base/base/find_symbols.h @@ -448,7 +448,7 @@ inline char * find_last_not_symbols_or_null(char * begin, char * end) /// See https://github.com/boostorg/algorithm/issues/63 /// And https://bugs.llvm.org/show_bug.cgi?id=41141 template -inline void splitInto(To & to, const std::string & what, bool token_compress = false) +inline To& splitInto(To & to, std::string_view what, bool token_compress = false) { const char * pos = what.data(); const char * end = pos + what.size(); @@ -464,4 +464,6 @@ inline void splitInto(To & to, const std::string & what, bool token_compress = f else pos = delimiter_or_end; } + + return to; } diff --git a/base/base/move_extend.h b/base/base/move_extend.h new file mode 100644 index 00000000000..6e5b16e037c --- /dev/null +++ b/base/base/move_extend.h @@ -0,0 +1,9 @@ +#pragma once + +/// Extend @p to by moving elements from @p from to @p to end +/// @return @p to iterator to first of moved elements. +template +typename To::iterator moveExtend(To & to, From && from) +{ + return to.insert(to.end(), std::make_move_iterator(from.begin()), std::make_move_iterator(from.end())); +} diff --git a/contrib/NuRaft b/contrib/NuRaft index 491eaf592d9..eb1572129c7 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit 491eaf592d950e0e37accbe8b3f217e068c9fecf +Subproject commit eb1572129c71beb2156dcdaadc3fb136954aed96 diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 18bdc8f317c..20cab03dec2 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -34,6 +34,8 @@ add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs) if (BUILD_STANDALONE_KEEPER) # Straight list of all required sources set(CLICKHOUSE_KEEPER_STANDALONE_SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperReconfiguration.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/RaftServerConfig.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ACLMap.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Changelog.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/CoordinationSettings.cpp diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 0838e0366df..8e3ec4f9e65 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -125,6 +125,7 @@ M(ZooKeeperMulti, "Number of 'multi' requests to ZooKeeper (compound transactions).") \ M(ZooKeeperCheck, "Number of 'check' requests to ZooKeeper. Usually they don't make sense in isolation, only as part of a complex transaction.") \ M(ZooKeeperSync, "Number of 'sync' requests to ZooKeeper. These requests are rarely needed or usable.") \ + M(ZooKeeperReconfig, "Number of 'reconfig' requests to ZooKeeper.") \ M(ZooKeeperClose, "Number of times connection with ZooKeeper has been closed voluntary.") \ M(ZooKeeperWatchResponse, "Number of times watch notification has been received from ZooKeeper.") \ M(ZooKeeperUserExceptions, "Number of exceptions while working with ZooKeeper related to the data (no node, bad version or similar).") \ @@ -499,6 +500,7 @@ The server successfully detected this situation and will download merged part fr M(KeeperCreateRequest, "Number of create requests")\ M(KeeperRemoveRequest, "Number of remove requests")\ M(KeeperSetRequest, "Number of set requests")\ + M(KeeperReconfigRequest, "Number of reconfig requests")\ M(KeeperCheckRequest, "Number of check requests")\ M(KeeperMultiRequest, "Number of multi requests")\ M(KeeperMultiReadRequest, "Number of multi read requests")\ diff --git a/src/Common/ZooKeeper/IKeeper.cpp b/src/Common/ZooKeeper/IKeeper.cpp index f0a07241735..50160279506 100644 --- a/src/Common/ZooKeeper/IKeeper.cpp +++ b/src/Common/ZooKeeper/IKeeper.cpp @@ -110,6 +110,7 @@ const char * errorMessage(Error code) case Error::ZCLOSING: return "ZooKeeper is closing"; case Error::ZNOTHING: return "(not error) no server responses to process"; case Error::ZSESSIONMOVED: return "Session moved to another server, so operation is ignored"; + case Error::ZRECONFIGINPROGRESS: return "Another reconfiguration is progress"; } UNREACHABLE(); diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index 2703c1079c0..20ce2a748e6 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -82,6 +82,7 @@ enum class Error : int32_t ZOPERATIONTIMEOUT = -7, /// Operation timeout ZBADARGUMENTS = -8, /// Invalid arguments ZINVALIDSTATE = -9, /// Invalid zhandle state + ZRECONFIGINPROGRESS = -14, /// Another reconfig is running /** API errors. * This is never thrown by the server, it shouldn't be used other than @@ -350,6 +351,29 @@ struct SyncResponse : virtual Response size_t bytesSize() const override { return path.size(); } }; +struct ReconfigRequest : virtual Request +{ + String joining; + String leaving; + String new_members; + int32_t version; + + String getPath() const final { return keeper_config_path; } + + size_t bytesSize() const final + { + return joining.size() + leaving.size() + new_members.size() + sizeof(version); + } +}; + +struct ReconfigResponse : virtual Response +{ + String value; + Stat stat; + + size_t bytesSize() const override { return value.size() + sizeof(stat); } +}; + struct MultiRequest : virtual Request { Requests requests; @@ -395,9 +419,9 @@ using SetCallback = std::function; using ListCallback = std::function; using CheckCallback = std::function; using SyncCallback = std::function; +using ReconfigCallback = std::function; using MultiCallback = std::function; - /// For watches. enum State { @@ -526,6 +550,13 @@ public: const String & path, SyncCallback callback) = 0; + virtual void reconfig( + std::string_view joining, + std::string_view leaving, + std::string_view new_members, + int32_t version, + ReconfigCallback callback) = 0; + virtual void multi( const Requests & requests, MultiCallback callback) = 0; @@ -539,3 +570,11 @@ public: }; } + +template <> struct fmt::formatter : fmt::formatter +{ + constexpr auto format(Coordination::Error code, auto& ctx) + { + return formatter::format(Coordination::errorMessage(code), ctx); + } +}; diff --git a/src/Common/ZooKeeper/TestKeeper.cpp b/src/Common/ZooKeeper/TestKeeper.cpp index fe4cb83c78a..87c87c4fc92 100644 --- a/src/Common/ZooKeeper/TestKeeper.cpp +++ b/src/Common/ZooKeeper/TestKeeper.cpp @@ -3,12 +3,8 @@ #include #include #include - -#include -#include #include - namespace Coordination { @@ -147,6 +143,14 @@ struct TestKeeperSyncRequest final : SyncRequest, TestKeeperRequest std::pair process(TestKeeper::Container & container, int64_t zxid) const override; }; +struct TestKeeperReconfigRequest final : ReconfigRequest, TestKeeperRequest +{ + TestKeeperReconfigRequest() = default; + explicit TestKeeperReconfigRequest(const ReconfigRequest & base) : ReconfigRequest(base) {} + ResponsePtr createResponse() const override; + std::pair process(TestKeeper::Container & container, int64_t zxid) const override; +}; + struct TestKeeperMultiRequest final : MultiRequest, TestKeeperRequest { explicit TestKeeperMultiRequest(const Requests & generic_requests) @@ -226,15 +230,7 @@ std::pair TestKeeperCreateRequest::process(TestKeeper::Contai std::string path_created = path; if (is_sequential) - { - auto seq_num = it->second.seq_num; - - std::stringstream seq_num_str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - seq_num_str.exceptions(std::ios::failbit); - seq_num_str << std::setw(10) << std::setfill('0') << seq_num; - - path_created += seq_num_str.str(); - } + path_created += fmt::format("{:0>10}", it->second.seq_num); /// Increment sequential number even if node is not sequential ++it->second.seq_num; @@ -446,6 +442,17 @@ std::pair TestKeeperSyncRequest::process(TestKeeper::Containe return { std::make_shared(std::move(response)), {} }; } +std::pair TestKeeperReconfigRequest::process(TestKeeper::Container &, int64_t) const +{ + // In TestKeeper we assume data is stored on one server, so this is a dummy implementation to + // satisfy IKeeper interface. + // We can't even check the validity of input data, neither can we create the /keeper/config znode + // as we don't know the id of current "server". + ReconfigResponse response; + response.error = Error::ZOK; + return { std::make_shared(std::move(response)), {} }; +} + std::pair TestKeeperMultiRequest::process(TestKeeper::Container & container, int64_t zxid) const { MultiResponse response; @@ -505,6 +512,7 @@ ResponsePtr TestKeeperSetRequest::createResponse() const { return std::make_shar ResponsePtr TestKeeperListRequest::createResponse() const { return std::make_shared(); } ResponsePtr TestKeeperCheckRequest::createResponse() const { return std::make_shared(); } ResponsePtr TestKeeperSyncRequest::createResponse() const { return std::make_shared(); } +ResponsePtr TestKeeperReconfigRequest::createResponse() const { return std::make_shared(); } ResponsePtr TestKeeperMultiRequest::createResponse() const { return std::make_shared(); } @@ -828,6 +836,28 @@ void TestKeeper::sync( pushRequest(std::move(request_info)); } +void TestKeeper::reconfig( + std::string_view joining, + std::string_view leaving, + std::string_view new_members, + int32_t version, + ReconfigCallback callback) +{ + TestKeeperReconfigRequest req; + req.joining = joining; + req.leaving = leaving; + req.new_members = new_members; + req.version = version; + + pushRequest({ + .request = std::make_shared(std::move(req)), + .callback = [callback](const Response & response) + { + callback(dynamic_cast(response)); + } + }); +} + void TestKeeper::multi( const Requests & requests, MultiCallback callback) diff --git a/src/Common/ZooKeeper/TestKeeper.h b/src/Common/ZooKeeper/TestKeeper.h index 9bbd018cfb1..8615ed0fb77 100644 --- a/src/Common/ZooKeeper/TestKeeper.h +++ b/src/Common/ZooKeeper/TestKeeper.h @@ -87,6 +87,13 @@ public: const String & path, SyncCallback callback) override; + void reconfig( + std::string_view joining, + std::string_view leaving, + std::string_view new_members, + int32_t version, + ReconfigCallback callback) final; + void multi( const Requests & requests, MultiCallback callback) override; diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 5dd7948276d..12b1d82133e 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -75,13 +75,14 @@ void ZooKeeper::init(ZooKeeperArgs args_) auto & host_string = host.host; try { - bool secure = startsWith(host_string, "secure://"); + const bool secure = startsWith(host_string, "secure://"); if (secure) host_string.erase(0, strlen("secure://")); - LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, Poco::Net::SocketAddress{host_string}.toString()); - nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure}); + const Poco::Net::SocketAddress host_socket_addr{host_string}; + LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, host_socket_addr.toString()); + nodes.emplace_back(Coordination::ZooKeeper::Node{host_socket_addr, secure}); } catch (const Poco::Net::HostNotFoundException & e) { @@ -191,12 +192,7 @@ std::vector ZooKeeper::shuffleHosts() const shuffle_hosts.emplace_back(shuffle_host); } - ::sort( - shuffle_hosts.begin(), shuffle_hosts.end(), - [](const ShuffleHost & lhs, const ShuffleHost & rhs) - { - return ShuffleHost::compare(lhs, rhs); - }); + ::sort(shuffle_hosts.begin(), shuffle_hosts.end(), ShuffleHost::compare); return shuffle_hosts; } @@ -231,7 +227,7 @@ Coordination::Error ZooKeeper::getChildrenImpl(const std::string & path, Strings if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) { - impl->finalize(fmt::format("Operation timeout on {} {}", toString(Coordination::OpNum::List), path)); + impl->finalize(fmt::format("Operation timeout on {} {}", Coordination::OpNum::List, path)); return Coordination::Error::ZOPERATIONTIMEOUT; } else @@ -298,7 +294,7 @@ Coordination::Error ZooKeeper::createImpl(const std::string & path, const std::s if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) { - impl->finalize(fmt::format("Operation timeout on {} {}", toString(Coordination::OpNum::Create), path)); + impl->finalize(fmt::format("Operation timeout on {} {}", Coordination::OpNum::Create, path)); return Coordination::Error::ZOPERATIONTIMEOUT; } else @@ -393,7 +389,7 @@ Coordination::Error ZooKeeper::removeImpl(const std::string & path, int32_t vers if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) { - impl->finalize(fmt::format("Operation timeout on {} {}", toString(Coordination::OpNum::Remove), path)); + impl->finalize(fmt::format("Operation timeout on {} {}", Coordination::OpNum::Remove, path)); return Coordination::Error::ZOPERATIONTIMEOUT; } else @@ -425,7 +421,7 @@ Coordination::Error ZooKeeper::existsImpl(const std::string & path, Coordination if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) { - impl->finalize(fmt::format("Operation timeout on {} {}", toString(Coordination::OpNum::Exists), path)); + impl->finalize(fmt::format("Operation timeout on {} {}", Coordination::OpNum::Exists, path)); return Coordination::Error::ZOPERATIONTIMEOUT; } else @@ -459,7 +455,7 @@ Coordination::Error ZooKeeper::getImpl(const std::string & path, std::string & r if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) { - impl->finalize(fmt::format("Operation timeout on {} {}", toString(Coordination::OpNum::Get), path)); + impl->finalize(fmt::format("Operation timeout on {} {}", Coordination::OpNum::Get, path)); return Coordination::Error::ZOPERATIONTIMEOUT; } else @@ -531,7 +527,7 @@ Coordination::Error ZooKeeper::setImpl(const std::string & path, const std::stri if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) { - impl->finalize(fmt::format("Operation timeout on {} {}", toString(Coordination::OpNum::Set), path)); + impl->finalize(fmt::format("Operation timeout on {} {}", Coordination::OpNum::Set, path)); return Coordination::Error::ZOPERATIONTIMEOUT; } else @@ -583,7 +579,7 @@ Coordination::Error ZooKeeper::multiImpl(const Coordination::Requests & requests if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) { - impl->finalize(fmt::format("Operation timeout on {} {}", toString(Coordination::OpNum::Multi), requests[0]->getPath())); + impl->finalize(fmt::format("Operation timeout on {} {}", Coordination::OpNum::Multi, requests[0]->getPath())); return Coordination::Error::ZOPERATIONTIMEOUT; } else @@ -617,7 +613,7 @@ Coordination::Error ZooKeeper::syncImpl(const std::string & path, std::string & if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) { - impl->finalize(fmt::format("Operation timeout on {} {}", toString(Coordination::OpNum::Sync), path)); + impl->finalize(fmt::format("Operation timeout on {} {}", Coordination::OpNum::Sync, path)); return Coordination::Error::ZOPERATIONTIMEOUT; } else @@ -1229,7 +1225,7 @@ size_t getFailedOpIndex(Coordination::Error exception_code, const Coordination:: if (!Coordination::isUserError(exception_code)) throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "There are no failed OPs because '{}' is not valid response code for that", - std::string(Coordination::errorMessage(exception_code))); + exception_code); throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "There is no failed OpResult"); } diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index 5031af38812..c24eecbafd8 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -36,7 +36,7 @@ std::string ZooKeeperRequest::toString() const "OpNum = {}\n" "Additional info:\n{}", xid, - Coordination::toString(getOpNum()), + getOpNum(), toStringImpl()); } @@ -76,6 +76,41 @@ void ZooKeeperSyncResponse::writeImpl(WriteBuffer & out) const Coordination::write(path, out); } +void ZooKeeperReconfigRequest::writeImpl(WriteBuffer & out) const +{ + Coordination::write(joining, out); + Coordination::write(leaving, out); + Coordination::write(new_members, out); + Coordination::write(version, out); +} + +void ZooKeeperReconfigRequest::readImpl(ReadBuffer & in) +{ + Coordination::read(joining, in); + Coordination::read(leaving, in); + Coordination::read(new_members, in); + Coordination::read(version, in); +} + +std::string ZooKeeperReconfigRequest::toStringImpl() const +{ + return fmt::format( + "joining = {}\nleaving = {}\nnew_members = {}\nversion = {}", + joining, leaving, new_members, version); +} + +void ZooKeeperReconfigResponse::readImpl(ReadBuffer & in) +{ + Coordination::read(value, in); + Coordination::read(stat, in); +} + +void ZooKeeperReconfigResponse::writeImpl(WriteBuffer & out) const +{ + Coordination::write(value, out); + Coordination::write(stat, out); +} + void ZooKeeperWatchResponse::readImpl(ReadBuffer & in) { Coordination::read(type, in); @@ -664,6 +699,7 @@ ZooKeeperResponsePtr ZooKeeperRemoveRequest::makeResponse() const { return setTi ZooKeeperResponsePtr ZooKeeperExistsRequest::makeResponse() const { return setTime(std::make_shared()); } ZooKeeperResponsePtr ZooKeeperGetRequest::makeResponse() const { return setTime(std::make_shared()); } ZooKeeperResponsePtr ZooKeeperSetRequest::makeResponse() const { return setTime(std::make_shared()); } +ZooKeeperResponsePtr ZooKeeperReconfigRequest::makeResponse() const { return setTime(std::make_shared()); } ZooKeeperResponsePtr ZooKeeperListRequest::makeResponse() const { return setTime(std::make_shared()); } ZooKeeperResponsePtr ZooKeeperSimpleListRequest::makeResponse() const { return setTime(std::make_shared()); } @@ -861,7 +897,8 @@ void ZooKeeperMultiResponse::fillLogElements(LogElements & elems, size_t idx) co void ZooKeeperRequestFactory::registerRequest(OpNum op_num, Creator creator) { if (!op_num_to_request.try_emplace(op_num, creator).second) - throw Coordination::Exception("Request type " + toString(op_num) + " already registered", Coordination::Error::ZRUNTIMEINCONSISTENCY); + throw Coordination::Exception(Coordination::Error::ZRUNTIMEINCONSISTENCY, + "Request type {} already registered", op_num); } std::shared_ptr ZooKeeperRequest::read(ReadBuffer & in) @@ -916,7 +953,7 @@ ZooKeeperRequestPtr ZooKeeperRequestFactory::get(OpNum op_num) const { auto it = op_num_to_request.find(op_num); if (it == op_num_to_request.end()) - throw Exception("Unknown operation type " + toString(op_num), Error::ZBADARGUMENTS); + throw Exception(Error::ZBADARGUMENTS, "Unknown operation type {}", op_num); return it->second(); } @@ -960,6 +997,7 @@ ZooKeeperRequestFactory::ZooKeeperRequestFactory() registerZooKeeperRequest(*this); registerZooKeeperRequest(*this); registerZooKeeperRequest(*this); + registerZooKeeperRequest(*this); registerZooKeeperRequest(*this); registerZooKeeperRequest(*this); registerZooKeeperRequest(*this); diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h index 5f00698423e..131d19f1ca4 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.h +++ b/src/Common/ZooKeeper/ZooKeeperCommon.h @@ -117,6 +117,35 @@ struct ZooKeeperSyncResponse final : SyncResponse, ZooKeeperResponse OpNum getOpNum() const override { return OpNum::Sync; } }; +struct ZooKeeperReconfigRequest final : ZooKeeperRequest +{ + String joining; + String leaving; + String new_members; + int64_t version; // kazoo sends a 64bit integer in this request + + String getPath() const override { return keeper_config_path; } + OpNum getOpNum() const override { return OpNum::Reconfig; } + void writeImpl(WriteBuffer & out) const override; + void readImpl(ReadBuffer & in) override; + std::string toStringImpl() const override; + ZooKeeperResponsePtr makeResponse() const override; + bool isReadRequest() const override { return false; } + + size_t bytesSize() const override + { + return ZooKeeperRequest::bytesSize() + joining.size() + leaving.size() + new_members.size() + + sizeof(version); + } +}; + +struct ZooKeeperReconfigResponse final : ReconfigResponse, ZooKeeperResponse +{ + void readImpl(ReadBuffer & in) override; + void writeImpl(WriteBuffer & out) const override; + OpNum getOpNum() const override { return OpNum::Reconfig; } +}; + struct ZooKeeperHeartbeatResponse final : ZooKeeperResponse { void readImpl(ReadBuffer &) override {} diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.cpp b/src/Common/ZooKeeper/ZooKeeperConstants.cpp index 86f70ea547a..9bb9c7b0488 100644 --- a/src/Common/ZooKeeper/ZooKeeperConstants.cpp +++ b/src/Common/ZooKeeper/ZooKeeperConstants.cpp @@ -19,6 +19,7 @@ static const std::unordered_set VALID_OPERATIONS = static_cast(OpNum::Heartbeat), static_cast(OpNum::List), static_cast(OpNum::Check), + static_cast(OpNum::Reconfig), static_cast(OpNum::Multi), static_cast(OpNum::MultiRead), static_cast(OpNum::Auth), @@ -29,55 +30,6 @@ static const std::unordered_set VALID_OPERATIONS = static_cast(OpNum::CheckNotExists), }; -std::string toString(OpNum op_num) -{ - switch (op_num) - { - case OpNum::Close: - return "Close"; - case OpNum::Error: - return "Error"; - case OpNum::Create: - return "Create"; - case OpNum::Remove: - return "Remove"; - case OpNum::Exists: - return "Exists"; - case OpNum::Get: - return "Get"; - case OpNum::Set: - return "Set"; - case OpNum::SimpleList: - return "SimpleList"; - case OpNum::List: - return "List"; - case OpNum::Check: - return "Check"; - case OpNum::Multi: - return "Multi"; - case OpNum::MultiRead: - return "MultiRead"; - case OpNum::Sync: - return "Sync"; - case OpNum::Heartbeat: - return "Heartbeat"; - case OpNum::Auth: - return "Auth"; - case OpNum::SessionID: - return "SessionID"; - case OpNum::SetACL: - return "SetACL"; - case OpNum::GetACL: - return "GetACL"; - case OpNum::FilteredList: - return "FilteredList"; - case OpNum::CheckNotExists: - return "CheckNotExists"; - } - int32_t raw_op = static_cast(op_num); - throw Exception("Operation " + std::to_string(raw_op) + " is unknown", Error::ZUNIMPLEMENTED); -} - OpNum getOpNum(int32_t raw_op_num) { if (!VALID_OPERATIONS.contains(raw_op_num)) diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.h b/src/Common/ZooKeeper/ZooKeeperConstants.h index 6b50c5c5d09..a773fbbab74 100644 --- a/src/Common/ZooKeeper/ZooKeeperConstants.h +++ b/src/Common/ZooKeeper/ZooKeeperConstants.h @@ -31,6 +31,7 @@ enum class OpNum : int32_t List = 12, Check = 13, Multi = 14, + Reconfig = 16, MultiRead = 22, Auth = 100, @@ -41,7 +42,6 @@ enum class OpNum : int32_t SessionID = 997, /// Special internal request }; -std::string toString(OpNum op_num); OpNum getOpNum(int32_t raw_op_num); static constexpr int32_t ZOOKEEPER_PROTOCOL_VERSION = 0; diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 0f27d078234..5e16a437be3 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -35,6 +35,7 @@ namespace ProfileEvents extern const Event ZooKeeperRemove; extern const Event ZooKeeperExists; extern const Event ZooKeeperMulti; + extern const Event ZooKeeperReconfig; extern const Event ZooKeeperGet; extern const Event ZooKeeperSet; extern const Event ZooKeeperList; @@ -571,7 +572,7 @@ void ZooKeeper::sendAuth(const String & scheme, const String & data) if (err != Error::ZOK) throw Exception(Error::ZMARSHALLINGERROR, "Error received in reply to auth request. Code: {}. Message: {}", - static_cast(err), errorMessage(err)); + static_cast(err), err); } void ZooKeeper::sendThread() @@ -697,7 +698,7 @@ void ZooKeeper::receiveThread() if (earliest_operation) { throw Exception(Error::ZOPERATIONTIMEOUT, "Operation timeout (no response in {} ms) for request {} for path: {}", - args.operation_timeout_ms, toString(earliest_operation->request->getOpNum()), earliest_operation->request->getPath()); + args.operation_timeout_ms, earliest_operation->request->getOpNum(), earliest_operation->request->getPath()); } waited_us += max_wait_us; if (waited_us >= args.session_timeout_ms * 1000) @@ -738,7 +739,7 @@ void ZooKeeper::receiveEvent() if (xid == PING_XID) { if (err != Error::ZOK) - throw Exception(Error::ZRUNTIMEINCONSISTENCY, "Received error in heartbeat response: {}", errorMessage(err)); + throw Exception(Error::ZRUNTIMEINCONSISTENCY, "Received error in heartbeat response: {}", err); response = std::make_shared(); } @@ -1195,7 +1196,6 @@ void ZooKeeper::create( ProfileEvents::increment(ProfileEvents::ZooKeeperCreate); } - void ZooKeeper::remove( const String & path, int32_t version, @@ -1335,6 +1335,26 @@ void ZooKeeper::sync( ProfileEvents::increment(ProfileEvents::ZooKeeperSync); } +void ZooKeeper::reconfig( + std::string_view joining, + std::string_view leaving, + std::string_view new_members, + int32_t version, + ReconfigCallback callback) +{ + ZooKeeperReconfigRequest request; + request.joining = joining; + request.leaving = leaving; + request.new_members = new_members; + request.version = version; + + RequestInfo request_info; + request_info.request = std::make_shared(std::move(request)); + request_info.callback = [callback](const Response & response) { callback(dynamic_cast(response)); }; + + pushRequest(std::move(request_info)); + ProfileEvents::increment(ProfileEvents::ZooKeeperReconfig); +} void ZooKeeper::multi( const Requests & requests, diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index 44ea993947e..7e27608d0a1 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -178,6 +178,13 @@ public: const String & path, SyncCallback callback) override; + void reconfig( + std::string_view joining, + std::string_view leaving, + std::string_view new_members, + int32_t version, + ReconfigCallback callback) final; + void multi( const Requests & requests, MultiCallback callback) override; diff --git a/src/Coordination/KeeperConstants.h b/src/Coordination/KeeperConstants.h index 84cbb0ab7c5..675001d51e0 100644 --- a/src/Coordination/KeeperConstants.h +++ b/src/Coordination/KeeperConstants.h @@ -1,5 +1,4 @@ #pragma once - #include namespace DB @@ -14,8 +13,8 @@ enum class KeeperApiVersion : uint8_t WITH_CHECK_NOT_EXISTS, }; -const std::string keeper_system_path = "/keeper"; -const std::string keeper_api_version_path = keeper_system_path + "/api_version"; -const std::string keeper_api_feature_flags_path = keeper_system_path + "/feature_flags"; - +const String keeper_system_path = "/keeper"; +const String keeper_api_version_path = keeper_system_path + "/api_version"; +const String keeper_api_feature_flags_path = keeper_system_path + "/feature_flags"; +const String keeper_config_path = keeper_system_path + "/config"; } diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index 229dbd51ab2..4fb552f20a3 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -1,10 +1,8 @@ #pragma once - -#include - #include -#include #include +#include +#include #include #include @@ -12,6 +10,8 @@ namespace DB { +class KeeperDispatcher; + class KeeperContext { public: @@ -51,6 +51,7 @@ public: const KeeperFeatureFlags & getFeatureFlags() const; void dumpConfiguration(WriteBufferFromOwnString & buf) const; + private: /// local disk defined using path or disk name using Storage = std::variant; @@ -85,6 +86,7 @@ private: std::unordered_map system_nodes_with_data; KeeperFeatureFlags feature_flags; + KeeperDispatcher * dispatcher{nullptr}; }; using KeeperContextPtr = std::shared_ptr; diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 9d9df5c7f30..178453b2f5b 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -38,6 +38,8 @@ namespace ProfileEvents extern const Event MemoryAllocatorPurgeTimeMicroseconds; } +using namespace std::chrono_literals; + namespace DB { @@ -336,6 +338,7 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf keeper_context = std::make_shared(standalone_keeper); keeper_context->initialize(config); + keeper_context->dispatcher = this; server = std::make_unique( configuration_and_settings, @@ -392,7 +395,10 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf /// Start it after keeper server start session_cleaner_thread = ThreadFromGlobalPool([this] { sessionCleanerTask(); }); - update_configuration_thread = ThreadFromGlobalPool([this] { updateConfigurationThread(); }); + + update_configuration_thread = reconfigEnabled() + ? ThreadFromGlobalPool([this] { clusterUpdateThread(); }) + : ThreadFromGlobalPool([this] { clusterUpdateWithReconfigDisabledThread(); }); LOG_DEBUG(log, "Dispatcher initialized"); } @@ -429,7 +435,7 @@ void KeeperDispatcher::shutdown() if (snapshot_thread.joinable()) snapshot_thread.join(); - update_configuration_queue.finish(); + cluster_update_queue.finish(); if (update_configuration_thread.joinable()) update_configuration_thread.join(); } @@ -608,7 +614,7 @@ void KeeperDispatcher::addErrorResponses(const KeeperStorage::RequestsForSession "Could not push error response xid {} zxid {} error message {} to responses queue", response->xid, response->zxid, - errorMessage(error)); + error); } } @@ -653,7 +659,7 @@ int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms) { if (response->getOpNum() != Coordination::OpNum::SessionID) promise->set_exception(std::make_exception_ptr(Exception(ErrorCodes::LOGICAL_ERROR, - "Incorrect response of type {} instead of SessionID response", Coordination::toString(response->getOpNum())))); + "Incorrect response of type {} instead of SessionID response", response->getOpNum()))); auto session_id_response = dynamic_cast(*response); if (session_id_response.internal_id != internal_id) @@ -685,17 +691,12 @@ int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms) return future.get(); } - -void KeeperDispatcher::updateConfigurationThread() +void KeeperDispatcher::clusterUpdateWithReconfigDisabledThread() { - while (true) + while (!shutdown_called) { - if (shutdown_called) - return; - try { - using namespace std::chrono_literals; if (!server->checkInit()) { LOG_INFO(log, "Server still not initialized, will not apply configuration until initialization finished"); @@ -710,11 +711,10 @@ void KeeperDispatcher::updateConfigurationThread() continue; } - ConfigUpdateAction action; - if (!update_configuration_queue.pop(action)) + ClusterUpdateAction action; + if (!cluster_update_queue.pop(action)) break; - /// We must wait this update from leader or apply it ourself (if we are leader) bool done = false; while (!done) @@ -727,15 +727,13 @@ void KeeperDispatcher::updateConfigurationThread() if (isLeader()) { - server->applyConfigurationUpdate(action); + server->applyConfigUpdateWithReconfigDisabled(action); done = true; } - else - { - done = server->waitConfigurationUpdate(action); - if (!done) - LOG_INFO(log, "Cannot wait for configuration update, maybe we become leader, or maybe update is invalid, will try to wait one more time"); - } + else if (done = server->waitForConfigUpdateWithReconfigDisabled(action); !done) + LOG_INFO(log, + "Cannot wait for configuration update, maybe we became leader " + "or maybe update is invalid, will try to wait one more time"); } } catch (...) @@ -745,6 +743,46 @@ void KeeperDispatcher::updateConfigurationThread() } } +void KeeperDispatcher::clusterUpdateThread() +{ + while (!shutdown_called) + { + ClusterUpdateAction action; + if (!cluster_update_queue.pop(action)) + return; + + if (server->applyConfigUpdate(action)) + LOG_DEBUG(log, "Processing config update {}: accepted", action); + else // TODO (myrrc) sleep a random amount? sleep less? + { + (void)cluster_update_queue.pushFront(action); + LOG_DEBUG(log, "Processing config update {}: declined, backoff", action); + std::this_thread::sleep_for(50ms); + } + } +} + +void KeeperDispatcher::pushClusterUpdates(ClusterUpdateActions&& actions) +{ + if (shutdown_called) return; + for (auto && action : actions) + { + if (!cluster_update_queue.push(std::move(action))) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot push configuration update"); + LOG_DEBUG(log, "Processing config update {}: pushed", action); + } +} + +bool KeeperDispatcher::clusterUpdateQueueEmpty() const +{ + return cluster_update_queue.empty(); +} + +bool KeeperDispatcher::reconfigEnabled() const +{ + return server->reconfigEnabled(); +} + bool KeeperDispatcher::isServerActive() const { return checkInit() && hasLeader() && !server->isRecovering(); @@ -752,20 +790,25 @@ bool KeeperDispatcher::isServerActive() const void KeeperDispatcher::updateConfiguration(const Poco::Util::AbstractConfiguration & config, const MultiVersion::Version & macros) { - auto diff = server->getConfigurationDiff(config); + auto diff = server->getRaftConfigurationDiff(config); + if (diff.empty()) - LOG_TRACE(log, "Configuration update triggered, but nothing changed for RAFT"); + LOG_TRACE(log, "Configuration update triggered, but nothing changed for Raft"); + else if (reconfigEnabled()) + LOG_WARNING(log, + "Raft configuration changed, but keeper_server.enable_reconfiguration is on. " + "This update will be ignored. Use \"reconfig\" instead"); else if (diff.size() > 1) - LOG_WARNING(log, "Configuration changed for more than one server ({}) from cluster, it's strictly not recommended", diff.size()); + LOG_WARNING(log, + "Configuration changed for more than one server ({}) from cluster, " + "it's strictly not recommended", diff.size()); else LOG_DEBUG(log, "Configuration change size ({})", diff.size()); - for (auto & change : diff) - { - bool push_result = update_configuration_queue.push(change); - if (!push_result) - throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push configuration update to queue"); - } + if (!reconfigEnabled()) + for (auto & change : diff) + if (!cluster_update_queue.push(change)) + throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push configuration update to queue"); snapshot_s3.updateS3Configuration(config, macros); } diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 1b44f0f6ced..a9b3d33eb51 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -31,7 +31,7 @@ private: using RequestsQueue = ConcurrentBoundedQueue; using SessionToResponseCallback = std::unordered_map; - using UpdateConfigurationQueue = ConcurrentBoundedQueue; + using ClusterUpdateQueue = ConcurrentBoundedQueue; /// Size depends on coordination settings std::unique_ptr requests_queue; @@ -39,7 +39,7 @@ private: SnapshotsQueue snapshots_queue{1}; /// More than 1k updates is definitely misconfiguration. - UpdateConfigurationQueue update_configuration_queue{1000}; + ClusterUpdateQueue cluster_update_queue{1000}; std::atomic shutdown_called{false}; @@ -91,8 +91,10 @@ private: void sessionCleanerTask(); /// Thread create snapshots in the background void snapshotThread(); - /// Thread apply or wait configuration changes from leader - void updateConfigurationThread(); + + // TODO (myrrc) this should be removed once "reconfig" is stabilized + void clusterUpdateWithReconfigDisabledThread(); + void clusterUpdateThread(); void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response); @@ -132,10 +134,10 @@ public: /// and achieved quorum bool isServerActive() const; - /// Registered in ConfigReloader callback. Add new configuration changes to - /// update_configuration_queue. Keeper Dispatcher apply them asynchronously. - /// 'macros' are used to substitute macros in endpoint of disks void updateConfiguration(const Poco::Util::AbstractConfiguration & config, const MultiVersion::Version & macros); + void pushClusterUpdates(ClusterUpdateActions&& actions); + bool clusterUpdateQueueEmpty() const; + bool reconfigEnabled() const; /// Shutdown internal keeper parts (server, state machine, log storage, etc) void shutdown(); diff --git a/src/Coordination/KeeperReconfiguration.cpp b/src/Coordination/KeeperReconfiguration.cpp new file mode 100644 index 00000000000..dec3e1f155f --- /dev/null +++ b/src/Coordination/KeeperReconfiguration.cpp @@ -0,0 +1,92 @@ +#include "KeeperReconfiguration.h" +#include +#include +#include +#include + +namespace DB +{ +ClusterUpdateActions joiningToClusterUpdates(const ClusterConfigPtr & cfg, std::string_view joining) +{ + ClusterUpdateActions out; + std::unordered_set endpoints; + + for (const auto & server : cfg->get_servers()) + endpoints.emplace(server->get_endpoint()); + + // We can either add new servers or change weight of existing ones. + // It makes no sense having a server in _joining_ which is identical to existing one including + // weight, so such requests are declined. + for (const RaftServerConfig & update : parseRaftServers(joining)) + if (auto server_ptr = cfg->get_server(update.id)) + { + if (update.endpoint != server_ptr->get_endpoint() || update.learner != server_ptr->is_learner() + || update.priority == server_ptr->get_priority()) + return {}; // can't change server endpoint/type due to NuRaft API limitations + out.emplace_back(UpdateRaftServerPriority{.id = update.id, .priority = update.priority}); + } + else if (endpoints.contains(update.endpoint)) + return {}; + else + out.emplace_back(AddRaftServer{update}); + + return out; +} + +ClusterUpdateActions leavingToClusterUpdates(const ClusterConfigPtr & cfg, std::string_view leaving) +{ + std::vector leaving_arr; + splitInto<','>(leaving_arr, leaving); + if (leaving_arr.size() >= cfg->get_servers().size()) + return {}; + + std::unordered_set remove_ids; + ClusterUpdateActions out; + + for (std::string_view leaving_server : leaving_arr) + { + int id; + if (std::from_chars(leaving_server.begin(), leaving_server.end(), id).ec != std::error_code{}) + return {}; + + if (remove_ids.contains(id)) + continue; + + if (auto ptr = cfg->get_server(id)) + out.emplace_back(RemoveRaftServer{.id = id}); + else + return {}; + + remove_ids.emplace(id); + } + + return out; +} + +String serializeClusterConfig(const ClusterConfigPtr & cfg, const ClusterUpdateActions & updates) +{ + RaftServers new_config; + std::unordered_set remove_update_ids; + + for (const auto & update : updates) + { + if (const auto * add = std::get_if(&update)) + new_config.emplace_back(*add); + else if (const auto * remove = std::get_if(&update)) + remove_update_ids.insert(remove->id); + else if (const auto * priority = std::get_if(&update)) + { + remove_update_ids.insert(priority->id); + new_config.emplace_back(RaftServerConfig{*cfg->get_server(priority->id)}); + } + else + UNREACHABLE(); + } + + for (const auto & item : cfg->get_servers()) + if (!remove_update_ids.contains(item->get_id())) + new_config.emplace_back(RaftServerConfig{*item}); + + return fmt::format("{}", fmt::join(new_config.begin(), new_config.end(), "\n")); +} +} diff --git a/src/Coordination/KeeperReconfiguration.h b/src/Coordination/KeeperReconfiguration.h new file mode 100644 index 00000000000..71958f2035e --- /dev/null +++ b/src/Coordination/KeeperReconfiguration.h @@ -0,0 +1,10 @@ +#pragma once +#include "Coordination/KeeperSnapshotManager.h" +#include "Coordination/RaftServerConfig.h" + +namespace DB +{ +ClusterUpdateActions joiningToClusterUpdates(const ClusterConfigPtr & cfg, std::string_view joining); +ClusterUpdateActions leavingToClusterUpdates(const ClusterConfigPtr & cfg, std::string_view leaving); +String serializeClusterConfig(const ClusterConfigPtr & cfg, const ClusterUpdateActions & updates = {}); +} diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 82c843287c1..1cde957ef3a 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -27,6 +27,7 @@ #include #include #include +#include namespace DB { @@ -40,6 +41,8 @@ namespace ErrorCodes extern const int INVALID_CONFIG_PARAMETER; } +using namespace std::chrono_literals; + namespace { @@ -118,6 +121,7 @@ KeeperServer::KeeperServer( , is_recovering(config.getBool("keeper_server.force_recovery", false)) , keeper_context{std::move(keeper_context_)} , create_snapshot_on_exit(config.getBool("keeper_server.create_snapshot_on_exit", true)) + , enable_reconfiguration(config.getBool("keeper_server.enable_reconfiguration", false)) { if (coordination_settings->quorum_reads) LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower."); @@ -450,7 +454,7 @@ void KeeperServer::shutdownRaftServer() size_t count = 0; while (asio_service->get_active_workers() != 0 && count < timeout * 100) { - std::this_thread::sleep_for(std::chrono::milliseconds(10)); + std::this_thread::sleep_for(10ms); count++; } } @@ -715,10 +719,12 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ if (next_index < last_commited || next_index - last_commited <= 1) commited_store = true; - auto set_initialized = [this]() + auto set_initialized = [this] { - std::lock_guard lock(initialized_mutex); - initialized_flag = true; + { + std::lock_guard lock(initialized_mutex); + initialized_flag = true; + } initialized_cv.notify_all(); }; @@ -783,9 +789,42 @@ std::vector KeeperServer::getDeadSessions() return state_machine->getDeadSessions(); } -ConfigUpdateActions KeeperServer::getConfigurationDiff(const Poco::Util::AbstractConfiguration & config) +bool KeeperServer::applyConfigUpdate(const ClusterUpdateAction& action) { - auto diff = state_manager->getConfigurationDiff(config); + std::lock_guard _{server_write_mutex}; + + if (const auto* add = std::get_if(&action)) + return raft_instance->get_srv_config(add->id) != nullptr + || raft_instance->add_srv(static_cast(*add))->get_accepted(); + else if (const auto* remove = std::get_if(&action)) + { + if (isLeader() && remove->id == state_manager->server_id()) + { + raft_instance->yield_leadership(); + return false; + } + + return raft_instance->get_srv_config(remove->id) == nullptr + || raft_instance->remove_srv(remove->id)->get_accepted(); + } + else if (const auto* update = std::get_if(&action)) + { + if (auto ptr = raft_instance->get_srv_config(update->id); ptr == nullptr) + throw Exception(ErrorCodes::RAFT_ERROR, + "Attempt to apply {} but server is not present in Raft", + action); + else if (ptr->get_priority() == update->priority) + return true; + + raft_instance->set_priority(update->id, update->priority, /*broadcast on live leader*/true); + return true; + } + UNREACHABLE(); +} + +ClusterUpdateActions KeeperServer::getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config) +{ + auto diff = state_manager->getRaftConfigurationDiff(config); if (!diff.empty()) { @@ -796,160 +835,103 @@ ConfigUpdateActions KeeperServer::getConfigurationDiff(const Poco::Util::Abstrac return diff; } -void KeeperServer::applyConfigurationUpdate(const ConfigUpdateAction & task) +void KeeperServer::applyConfigUpdateWithReconfigDisabled(const ClusterUpdateAction& action) { - std::lock_guard lock{server_write_mutex}; - if (is_recovering) - return; + std::lock_guard _{server_write_mutex}; + if (is_recovering) return; + constexpr auto sleep_time = 500ms; - size_t sleep_ms = 500; - if (task.action_type == ConfigUpdateActionType::AddServer) + LOG_INFO(log, "Will try to apply {}", action); + + auto applied = [&] { LOG_INFO(log, "Applied {}", action); }; + auto not_leader = [&] { LOG_INFO(log, "Not leader anymore, aborting"); }; + auto backoff_on_refusal = [&](size_t i) + { + LOG_INFO(log, "Update was not accepted (try {}), backing off for {}", i + 1, sleep_time * (i + 1)); + std::this_thread::sleep_for(sleep_time * (i + 1)); + }; + + if (const auto* add = std::get_if(&action)) { - LOG_INFO(log, "Will try to add server with id {}", task.server->get_id()); - bool added = false; for (size_t i = 0; i < coordination_settings->configuration_change_tries_count && !is_recovering; ++i) { - if (raft_instance->get_srv_config(task.server->get_id()) != nullptr) - { - LOG_INFO(log, "Server with id {} was successfully added", task.server->get_id()); - added = true; - break; - } - + if (raft_instance->get_srv_config(add->id) != nullptr) + return applied(); if (!isLeader()) - { - LOG_INFO(log, "We are not leader anymore, will not try to add server {}", task.server->get_id()); - break; - } - - auto result = raft_instance->add_srv(*task.server); - if (!result->get_accepted()) - LOG_INFO( - log, - "Command to add server {} was not accepted for the {} time, will sleep for {} ms and retry", - task.server->get_id(), - i + 1, - sleep_ms * (i + 1)); - - std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms * (i + 1))); + return not_leader(); + if (!raft_instance->add_srv(static_cast(*add))->get_accepted()) + backoff_on_refusal(i); } - if (!added) - throw Exception( - ErrorCodes::RAFT_ERROR, - "Configuration change to add server (id {}) was not accepted by RAFT after all {} retries", - task.server->get_id(), - coordination_settings->configuration_change_tries_count); } - else if (task.action_type == ConfigUpdateActionType::RemoveServer) + else if (const auto* remove = std::get_if(&action)) { - LOG_INFO(log, "Will try to remove server with id {}", task.server->get_id()); - - bool removed = false; - if (task.server->get_id() == state_manager->server_id()) + if (remove->id == state_manager->server_id()) { - LOG_INFO( - log, - "Trying to remove leader node (ourself), so will yield leadership and some other node (new leader) will try remove us. " + LOG_INFO(log, + "Trying to remove leader node (ourself), so will yield leadership and some other node " + "(new leader) will try to remove us. " "Probably you will have to run SYSTEM RELOAD CONFIG on the new leader node"); - - raft_instance->yield_leadership(); - return; + return raft_instance->yield_leadership(); } for (size_t i = 0; i < coordination_settings->configuration_change_tries_count && !is_recovering; ++i) { - if (raft_instance->get_srv_config(task.server->get_id()) == nullptr) - { - LOG_INFO(log, "Server with id {} was successfully removed", task.server->get_id()); - removed = true; - break; - } - + if (raft_instance->get_srv_config(remove->id) == nullptr) + return applied(); if (!isLeader()) - { - LOG_INFO(log, "We are not leader anymore, will not try to remove server {}", task.server->get_id()); - break; - } - - auto result = raft_instance->remove_srv(task.server->get_id()); - if (!result->get_accepted()) - LOG_INFO( - log, - "Command to remove server {} was not accepted for the {} time, will sleep for {} ms and retry", - task.server->get_id(), - i + 1, - sleep_ms * (i + 1)); - - std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms * (i + 1))); + return not_leader(); + if (!raft_instance->remove_srv(remove->id)->get_accepted()) + backoff_on_refusal(i); } - if (!removed) - throw Exception( - ErrorCodes::RAFT_ERROR, - "Configuration change to remove server (id {}) was not accepted by RAFT after all {} retries", - task.server->get_id(), - coordination_settings->configuration_change_tries_count); } - else if (task.action_type == ConfigUpdateActionType::UpdatePriority) - raft_instance->set_priority(task.server->get_id(), task.server->get_priority()); - else - LOG_WARNING(log, "Unknown configuration update type {}", static_cast(task.action_type)); + else if (const auto* update = std::get_if(&action)) + { + raft_instance->set_priority(update->id, update->priority, /*broadcast on live leader*/true); + return; + } + + throw Exception(ErrorCodes::RAFT_ERROR, + "Configuration change {} was not accepted by Raft after {} retries", + action, coordination_settings->configuration_change_tries_count); } - -bool KeeperServer::waitConfigurationUpdate(const ConfigUpdateAction & task) +bool KeeperServer::waitForConfigUpdateWithReconfigDisabled(const ClusterUpdateAction& action) { - if (is_recovering) - return false; + if (is_recovering) return false; + constexpr auto sleep_time = 500ms; - size_t sleep_ms = 500; - if (task.action_type == ConfigUpdateActionType::AddServer) + LOG_INFO(log, "Will try to wait for {}", action); + + auto applied = [&] { LOG_INFO(log, "Applied {}", action); return true; }; + auto became_leader = [&] { LOG_INFO(log, "Became leader, aborting"); return false; }; + auto backoff = [&](size_t i) { std::this_thread::sleep_for(sleep_time * (i + 1)); }; + + if (const auto* add = std::get_if(&action)) { - LOG_INFO(log, "Will try to wait server with id {} to be added", task.server->get_id()); for (size_t i = 0; i < coordination_settings->configuration_change_tries_count && !is_recovering; ++i) { - if (raft_instance->get_srv_config(task.server->get_id()) != nullptr) - { - LOG_INFO(log, "Server with id {} was successfully added by leader", task.server->get_id()); - return true; - } - + if (raft_instance->get_srv_config(add->id) != nullptr) + return applied(); if (isLeader()) - { - LOG_INFO(log, "We are leader now, probably we will have to add server {}", task.server->get_id()); - return false; - } - - std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms * (i + 1))); + return became_leader(); + backoff(i); } - return false; } - else if (task.action_type == ConfigUpdateActionType::RemoveServer) + else if (const auto* remove = std::get_if(&action)) { - LOG_INFO(log, "Will try to wait remove of server with id {}", task.server->get_id()); - for (size_t i = 0; i < coordination_settings->configuration_change_tries_count && !is_recovering; ++i) { - if (raft_instance->get_srv_config(task.server->get_id()) == nullptr) - { - LOG_INFO(log, "Server with id {} was successfully removed by leader", task.server->get_id()); - return true; - } - + if (raft_instance->get_srv_config(remove->id) == nullptr) + return applied(); if (isLeader()) - { - LOG_INFO(log, "We are leader now, probably we will have to remove server {}", task.server->get_id()); - return false; - } - - std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms * (i + 1))); + return became_leader(); + backoff(i); } - return false; } - else if (task.action_type == ConfigUpdateActionType::UpdatePriority) + else if (std::get_if(&action) != nullptr) return true; - else - LOG_WARNING(log, "Unknown configuration update type {}", static_cast(task.action_type)); - return true; + + return false; } Keeper4LWInfo KeeperServer::getPartiallyFilled4LWInfo() const diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h index 8f416b1f48c..61e29b67bbd 100644 --- a/src/Coordination/KeeperServer.h +++ b/src/Coordination/KeeperServer.h @@ -10,12 +10,15 @@ #include #include #include +#include namespace DB { using RaftAppendResult = nuraft::ptr>>; +class KeeperDispatcher; + class KeeperServer { private: @@ -28,9 +31,10 @@ private: nuraft::ptr state_manager; struct KeeperRaftServer; - nuraft::ptr raft_instance; + nuraft::ptr raft_instance; // TSA_GUARDED_BY(server_write_mutex); nuraft::ptr asio_service; std::vector> asio_listeners; + // because some actions can be applied // when we are sure that there are no requests currently being // processed (e.g. recovery) we do all write actions @@ -65,6 +69,7 @@ private: std::shared_ptr keeper_context; const bool create_snapshot_on_exit; + const bool enable_reconfiguration; public: KeeperServer( @@ -84,6 +89,7 @@ public: void putLocalReadRequest(const KeeperStorage::RequestForSession & request); bool isRecovering() const { return is_recovering; } + bool reconfigEnabled() const { return enable_reconfiguration; } /// Put batch of requests into Raft and get result of put. Responses will be set separately into /// responses_queue. @@ -122,17 +128,12 @@ public: int getServerID() const { return server_id; } - /// Get configuration diff between current configuration in RAFT and in XML file - ConfigUpdateActions getConfigurationDiff(const Poco::Util::AbstractConfiguration & config); + bool applyConfigUpdate(const ClusterUpdateAction& action); - /// Apply action for configuration update. Actually call raft_instance->remove_srv or raft_instance->add_srv. - /// Synchronously check for update results with retries. - void applyConfigurationUpdate(const ConfigUpdateAction & task); - - - /// Wait configuration update for action. Used by followers. - /// Return true if update was successfully received. - bool waitConfigurationUpdate(const ConfigUpdateAction & task); + // TODO (myrrc) these functions should be removed once "reconfig" is stabilized + void applyConfigUpdateWithReconfigDisabled(const ClusterUpdateAction& action); + bool waitForConfigUpdateWithReconfigDisabled(const ClusterUpdateAction& action); + ClusterUpdateActions getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config); uint64_t createSnapshot(); diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 5c84f23fc60..3e9850caa40 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -2,17 +2,20 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include #include #include #include "Coordination/KeeperStorage.h" +#include "Coordination/KeeperReconfiguration.h" #include @@ -146,7 +149,7 @@ void assertDigest( "Digest for nodes is not matching after {} request of type '{}'.\nExpected digest - {}, actual digest - {} (digest " "{}). Keeper will terminate to avoid inconsistencies.\nExtra information about the request:\n{}", committing ? "committing" : "preprocessing", - Coordination::toString(request.getOpNum()), + request.getOpNum(), first.value, second.value, first.version, @@ -261,7 +264,8 @@ std::shared_ptr KeeperStateMachine::parseReque bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & request_for_session) { - if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) + const auto op_num = request_for_session.request->getOpNum(); + if (op_num == Coordination::OpNum::SessionID || op_num == Coordination::OpNum::Reconfig) return true; std::lock_guard lock(storage_and_responses_lock); @@ -291,14 +295,89 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req return true; } +KeeperStorage::ResponseForSession KeeperStateMachine::processReconfiguration( + const KeeperStorage::RequestForSession& request_for_session) +{ + const auto& request = static_cast(*request_for_session.request); + const int64_t session_id = request_for_session.session_id; + const int64_t zxid = request_for_session.zxid; + + using enum Coordination::Error; + auto bad_request = [&](Coordination::Error code = ZBADARGUMENTS) -> KeeperStorage::ResponseForSession + { + auto res = std::make_shared(); + res->xid = request.xid; + res->zxid = zxid; + res->error = code; + return { session_id, std::move(res) }; + }; + + KeeperDispatcher& dispatcher = *keeper_context->dispatcher; + if (!dispatcher.reconfigEnabled()) + return bad_request(ZUNIMPLEMENTED); + if (!dispatcher.clusterUpdateQueueEmpty()) + return bad_request(ZRECONFIGINPROGRESS); + if (request.version != -1) + return bad_request(ZBADVERSION); + + const bool has_new_members = !request.new_members.empty(); + const bool has_joining = !request.joining.empty(); + const bool has_leaving = !request.leaving.empty(); + const bool incremental_reconfig = (has_joining || has_leaving) && !has_new_members; + if (!incremental_reconfig) + return bad_request(); + + const ClusterConfigPtr config = getClusterConfig(); + if (!config) // Server can be uninitialized yet + return bad_request(); + + ClusterUpdateActions updates; + + if (has_joining) + { + if (auto join_updates = joiningToClusterUpdates(config, request.joining); !join_updates.empty()) + moveExtend(updates, std::move(join_updates)); + else + return bad_request(); + } + + if (has_leaving) + { + if (auto leave_updates = leavingToClusterUpdates(config, request.leaving); !leave_updates.empty()) + moveExtend(updates, std::move(leave_updates)); + else + return bad_request(); + } + + auto response = std::make_shared(); + response->xid = request.xid; + response->zxid = zxid; + response->error = Coordination::Error::ZOK; + response->value = serializeClusterConfig(config, updates); + + dispatcher.pushClusterUpdates(std::move(updates)); + return { session_id, std::move(response) }; +} + nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, nuraft::buffer & data) { auto request_for_session = parseRequest(data, true); if (!request_for_session->zxid) request_for_session->zxid = log_idx; - /// Special processing of session_id request - if (request_for_session->request->getOpNum() == Coordination::OpNum::SessionID) + auto try_push = [this](const KeeperStorage::ResponseForSession& response) + { + if (!responses_queue.push(response)) + { + ProfileEvents::increment(ProfileEvents::KeeperCommitsFailed); + LOG_WARNING(log, + "Failed to push response with session id {} to the queue, probably because of shutdown", + response.session_id); + } + }; + + const auto op_num = request_for_session->request->getOpNum(); + if (op_num == Coordination::OpNum::SessionID) { const Coordination::ZooKeeperSessionIDRequest & session_id_request = dynamic_cast(*request_for_session->request); @@ -309,21 +388,24 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n KeeperStorage::ResponseForSession response_for_session; response_for_session.session_id = -1; response_for_session.response = response; - { - std::lock_guard lock(storage_and_responses_lock); - session_id = storage->getSessionID(session_id_request.session_timeout_ms); - LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms); - response->session_id = session_id; - if (!responses_queue.push(response_for_session)) - { - ProfileEvents::increment(ProfileEvents::KeeperCommitsFailed); - LOG_WARNING(log, "Failed to push response with session id {} to the queue, probably because of shutdown", session_id); - } - } + + std::lock_guard lock(storage_and_responses_lock); + session_id = storage->getSessionID(session_id_request.session_timeout_ms); + LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms); + response->session_id = session_id; + try_push(response_for_session); + } + // Processing reconfig request as an ordinary one (in KeeperStorage) brings multiple inconsistencies + // regarding replays of old reconfigurations in new nodes. Thus the storage is not involved. + // See https://github.com/ClickHouse/ClickHouse/pull/49450 for details + else if (op_num == Coordination::OpNum::Reconfig) + { + std::lock_guard lock(storage_and_responses_lock); + try_push(processReconfiguration(*request_for_session)); } else { - if (request_for_session->request->getOpNum() == Coordination::OpNum::Close) + if (op_num == Coordination::OpNum::Close) { std::lock_guard lock(request_cache_mutex); parsed_request_cache.erase(request_for_session->session_id); @@ -333,14 +415,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n KeeperStorage::ResponsesForSessions responses_for_sessions = storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid); for (auto & response_for_session : responses_for_sessions) - if (!responses_queue.push(response_for_session)) - { - ProfileEvents::increment(ProfileEvents::KeeperCommitsFailed); - LOG_WARNING( - log, - "Failed to push response with session id {} to the queue, probably because of shutdown", - response_for_session.session_id); - } + try_push(response_for_session); if (keeper_context->digestEnabled() && request_for_session->digest) assertDigest(*request_for_session->digest, storage->getNodesDigest(true), *request_for_session->request, true); @@ -782,5 +857,4 @@ void KeeperStateMachine::recalculateStorageStats() storage->recalculateStats(); LOG_INFO(log, "Done recalculating storage stats"); } - } diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index b47a9b5cc42..3b239adae45 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -12,6 +12,7 @@ namespace DB { +class KeeperDispatcher; using ResponsesQueue = ConcurrentBoundedQueue; using SnapshotsQueue = ConcurrentBoundedQueue; @@ -67,7 +68,9 @@ public: // (can happen in case of exception during preprocessing) void rollbackRequest(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing); - void rollbackRequestNoLock(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing); + void rollbackRequestNoLock( + const KeeperStorage::RequestForSession & request_for_session, + bool allow_missing) TSA_NO_THREAD_SAFETY_ANALYSIS; uint64_t last_commit_index() override { return last_committed_idx; } @@ -87,8 +90,10 @@ public: int read_logical_snp_obj( nuraft::snapshot & s, void *& user_snp_ctx, uint64_t obj_id, nuraft::ptr & data_out, bool & is_last_obj) override; - /// just for test - KeeperStorage & getStorage() { return *storage; } + KeeperStorage & getStorageForUnitTests() TSA_NO_THREAD_SAFETY_ANALYSIS + { + return *storage; + } void shutdownStorage(); @@ -122,6 +127,7 @@ public: uint64_t getLatestSnapshotBufSize() const; void recalculateStorageStats(); + private: CommitCallback commit_callback; /// In our state machine we always have a single snapshot which is stored @@ -133,7 +139,7 @@ private: CoordinationSettingsPtr coordination_settings; /// Main state machine logic - KeeperStoragePtr storage; + KeeperStoragePtr storage TSA_PT_GUARDED_BY(storage_and_responses_lock); /// Save/Load and Serialize/Deserialize logic for snapshots. KeeperSnapshotManager snapshot_manager; @@ -178,6 +184,8 @@ private: KeeperContextPtr keeper_context; KeeperSnapshotManagerS3 * snapshot_manager_s3; + + KeeperStorage::ResponseForSession processReconfiguration(const KeeperStorage::RequestForSession& request_for_session); }; } diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index 450fd04b61d..cf1bad8c5fa 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -451,7 +451,7 @@ nuraft::ptr KeeperStateManager::read_state() return nullptr; } -ConfigUpdateActions KeeperStateManager::getConfigurationDiff(const Poco::Util::AbstractConfiguration & config) const +ClusterUpdateActions KeeperStateManager::getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config) const { auto new_configuration_wrapper = parseServersConfiguration(config, true); @@ -465,14 +465,14 @@ ConfigUpdateActions KeeperStateManager::getConfigurationDiff(const Poco::Util::A old_ids[old_server->get_id()] = old_server; } - ConfigUpdateActions result; + ClusterUpdateActions result; /// First of all add new servers for (const auto & [new_id, server_config] : new_ids) { auto old_server_it = old_ids.find(new_id); if (old_server_it == old_ids.end()) - result.emplace_back(ConfigUpdateAction{ConfigUpdateActionType::AddServer, server_config}); + result.emplace_back(AddRaftServer{RaftServerConfig{*server_config}}); else { const auto & old_endpoint = old_server_it->second->get_endpoint(); @@ -491,10 +491,8 @@ ConfigUpdateActions KeeperStateManager::getConfigurationDiff(const Poco::Util::A /// After that remove old ones for (auto [old_id, server_config] : old_ids) - { if (!new_ids.contains(old_id)) - result.emplace_back(ConfigUpdateAction{ConfigUpdateActionType::RemoveServer, server_config}); - } + result.emplace_back(RemoveRaftServer{old_id}); { std::lock_guard lock(configuration_wrapper_mutex); @@ -507,7 +505,10 @@ ConfigUpdateActions KeeperStateManager::getConfigurationDiff(const Poco::Util::A { if (old_server->get_priority() != new_server->get_priority()) { - result.emplace_back(ConfigUpdateAction{ConfigUpdateActionType::UpdatePriority, new_server}); + result.emplace_back(UpdateRaftServerPriority{ + .id = new_server->get_id(), + .priority = new_server->get_priority() + }); } break; } diff --git a/src/Coordination/KeeperStateManager.h b/src/Coordination/KeeperStateManager.h index f24f0c2b1e5..5abeea604b5 100644 --- a/src/Coordination/KeeperStateManager.h +++ b/src/Coordination/KeeperStateManager.h @@ -7,31 +7,13 @@ #include #include #include "Coordination/KeeperStateMachine.h" +#include "Coordination/RaftServerConfig.h" #include namespace DB { - using KeeperServerConfigPtr = nuraft::ptr; -/// When our configuration changes the following action types -/// can happen -enum class ConfigUpdateActionType -{ - RemoveServer, - AddServer, - UpdatePriority, -}; - -/// Action to update configuration -struct ConfigUpdateAction -{ - ConfigUpdateActionType action_type; - KeeperServerConfigPtr server; -}; - -using ConfigUpdateActions = std::vector; - /// Responsible for managing our and cluster configuration class KeeperStateManager : public nuraft::state_mgr { @@ -74,7 +56,11 @@ public: int32_t server_id() override { return my_server_id; } - nuraft::ptr get_srv_config() const { return configuration_wrapper.config; } /// NOLINT + nuraft::ptr get_srv_config() const + { + std::lock_guard lk(configuration_wrapper_mutex); + return configuration_wrapper.config; + } void system_exit(const int exit_code) override; /// NOLINT @@ -106,8 +92,8 @@ public: /// Read all log entries in log store from the begging and return latest config (with largest log_index) ClusterConfigPtr getLatestConfigFromLogStore() const; - /// Get configuration diff between proposed XML and current state in RAFT - ConfigUpdateActions getConfigurationDiff(const Poco::Util::AbstractConfiguration & config) const; + // TODO (myrrc) This should be removed once "reconfig" is stabilized + ClusterUpdateActions getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config) const; private: const String & getOldServerStatePath(); @@ -133,7 +119,7 @@ private: std::string config_prefix; mutable std::mutex configuration_wrapper_mutex; - KeeperConfigurationWrapper configuration_wrapper; + KeeperConfigurationWrapper configuration_wrapper TSA_GUARDED_BY(configuration_wrapper_mutex); nuraft::ptr log_store; diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 884aacc4558..2b245a455b7 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -20,10 +20,10 @@ #include #include +#include #include +#include -#include -#include #include #include #include @@ -53,7 +53,6 @@ namespace ErrorCodes namespace { - String getSHA1(const String & userdata) { Poco::SHA1Engine engine; @@ -1060,7 +1059,8 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce ProfileEvents::increment(ProfileEvents::KeeperGetRequest); Coordination::ZooKeeperGetRequest & request = dynamic_cast(*zk_request); - if (request.path == Coordination::keeper_api_feature_flags_path) + if (request.path == Coordination::keeper_api_feature_flags_path + || request.path == Coordination::keeper_config_path) return {}; if (!storage.uncommitted_state.getNode(request.path)) @@ -1085,6 +1085,14 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce } } + if (request.path == Coordination::keeper_config_path) + { + response.data = serializeClusterConfig( + storage.keeper_context->dispatcher->getStateMachine().getClusterConfig()); + response.error = Coordination::Error::ZOK; + return response_ptr; + } + auto & container = storage.container; auto node_it = container.find(request.path); if (node_it == container.end()) @@ -1784,7 +1792,7 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro throw DB::Exception( ErrorCodes::BAD_ARGUMENTS, "Illegal command as part of multi ZooKeeper request {}", - Coordination::toString(sub_zk_request->getOpNum())); + sub_zk_request->getOpNum()); } } @@ -1975,7 +1983,7 @@ public: { auto request_it = op_num_to_request.find(zk_request->getOpNum()); if (request_it == op_num_to_request.end()) - throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unknown operation type {}", toString(zk_request->getOpNum())); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unknown operation type {}", zk_request->getOpNum()); return request_it->second(zk_request); } diff --git a/src/Coordination/RaftServerConfig.cpp b/src/Coordination/RaftServerConfig.cpp new file mode 100644 index 00000000000..42923dd0b29 --- /dev/null +++ b/src/Coordination/RaftServerConfig.cpp @@ -0,0 +1,96 @@ +#include "RaftServerConfig.h" +#include +#include +#include +#include + +namespace DB +{ +RaftServerConfig::RaftServerConfig(const nuraft::srv_config & cfg) noexcept + : id(cfg.get_id()), endpoint(cfg.get_endpoint()), learner(cfg.is_learner()), priority(cfg.get_priority()) +{ +} + +RaftServerConfig::operator nuraft::srv_config() const noexcept +{ + return {id, 0, endpoint, "", learner, priority}; +} + +std::optional RaftServerConfig::parse(std::string_view server) noexcept +{ + std::vector parts; + splitInto<';', '='>(parts, server); + + const bool with_id_endpoint = parts.size() == 2; + const bool with_server_type = parts.size() == 3; + const bool with_priority = parts.size() == 4; + if (!with_id_endpoint && !with_server_type && !with_priority) + return std::nullopt; + + const std::string_view id_str = parts[0]; + if (!id_str.starts_with("server.")) + return std::nullopt; + + int id; + if (std::from_chars(std::next(id_str.begin(), 7), id_str.end(), id).ec != std::error_code{}) + return std::nullopt; + if (id <= 0) + return std::nullopt; + + const std::string_view endpoint = parts[1]; + const size_t port_delimiter = endpoint.find_last_of(':'); + if (port_delimiter == std::string::npos) + return {}; + const std::string_view port = endpoint.substr(port_delimiter + 1); + + uint16_t port_tmp; + if (std::from_chars(port.begin(), port.end(), port_tmp).ec != std::error_code{}) + return std::nullopt; + + RaftServerConfig out{id, endpoint}; + + if (with_id_endpoint) + return out; + + if (parts[2] != "learner" && parts[2] != "participant") + return std::nullopt; + out.learner = parts[2] == "learner"; + if (with_server_type) + return out; + + const std::string_view priority = parts[3]; + if (std::from_chars(priority.begin(), priority.end(), out.priority).ec != std::error_code{}) + return std::nullopt; + if (out.priority < 0) + return std::nullopt; + + return out; +} + +RaftServers parseRaftServers(std::string_view servers) +{ + std::vector server_arr; + std::unordered_set ids; + std::unordered_set endpoints; + RaftServers out; + + for (auto & server : splitInto<','>(server_arr, servers)) + if (auto maybe_server = RaftServerConfig::parse(server)) + { + String endpoint = maybe_server->endpoint; + if (endpoints.contains(endpoint)) + return {}; + const int id = maybe_server->id; + if (ids.contains(id)) + return {}; + + out.emplace_back(std::move(*maybe_server)); + endpoints.emplace(std::move(endpoint)); + ids.emplace(id); + } + else + return {}; + + return out; +} +} diff --git a/src/Coordination/RaftServerConfig.h b/src/Coordination/RaftServerConfig.h new file mode 100644 index 00000000000..451d61a436e --- /dev/null +++ b/src/Coordination/RaftServerConfig.h @@ -0,0 +1,78 @@ +#pragma once +#include +#include +#include +#include + +namespace DB +{ +// default- and copy-constructible version of nuraft::srv_config +struct RaftServerConfig +{ + int id; + String endpoint; + bool learner; + int priority; + + constexpr RaftServerConfig() = default; + constexpr RaftServerConfig(int id_, std::string_view endpoint_, bool learner_ = false, int priority_ = 1) + : id(id_), endpoint(endpoint_), learner(learner_), priority(priority_) + { + } + + constexpr bool operator==(const RaftServerConfig &) const = default; + explicit RaftServerConfig(const nuraft::srv_config & cfg) noexcept; + explicit operator nuraft::srv_config() const noexcept; + + /// Parse server in format "server.id=host:port[;learner][;priority]" + static std::optional parse(std::string_view server) noexcept; +}; + +using RaftServers = std::vector; +/// Parse comma-delimited servers. Check for duplicate endpoints and ids. +/// @returns {} on parsing or validation error. +RaftServers parseRaftServers(std::string_view servers); + +struct AddRaftServer : RaftServerConfig +{ +}; + +struct RemoveRaftServer +{ + int id; +}; + +struct UpdateRaftServerPriority +{ + int id; + int priority; +}; + +using ClusterUpdateAction = std::variant; +using ClusterUpdateActions = std::vector; +} + +template <> +struct fmt::formatter : fmt::formatter +{ + constexpr auto format(const DB::RaftServerConfig & server, format_context & ctx) + { + return fmt::format_to( + ctx.out(), "server.{}={};{};{}", server.id, server.endpoint, server.learner ? "learner" : "participant", server.priority); + } +}; + +template <> +struct fmt::formatter : fmt::formatter +{ + constexpr auto format(const DB::ClusterUpdateAction & action, format_context & ctx) + { + if (const auto * add = std::get_if(&action)) + return fmt::format_to(ctx.out(), "(Add server {})", add->id); + if (const auto * remove = std::get_if(&action)) + return fmt::format_to(ctx.out(), "(Remove server {})", remove->id); + if (const auto * update = std::get_if(&action)) + return fmt::format_to(ctx.out(), "(Change server {} priority to {})", update->id, update->priority); + UNREACHABLE(); + } +}; diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 0f60c960b8b..b302f9b13ca 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -84,6 +84,47 @@ protected: } }; +TEST_P(CoordinationTest, RaftServerConfigParse) +{ + auto parse = Coordination::RaftServerConfig::parse; + using Cfg = std::optional; + + EXPECT_EQ(parse(""), std::nullopt); + EXPECT_EQ(parse("="), std::nullopt); + EXPECT_EQ(parse("=;"), std::nullopt); + EXPECT_EQ(parse("=;;"), std::nullopt); + EXPECT_EQ(parse("=:80"), std::nullopt); + EXPECT_EQ(parse("server."), std::nullopt); + EXPECT_EQ(parse("server.=:80"), std::nullopt); + EXPECT_EQ(parse("server.-5=1:2"), std::nullopt); + EXPECT_EQ(parse("server.1=host;-123"), std::nullopt); + EXPECT_EQ(parse("server.1=host:999"), (Cfg{{1, "host:999"}})); + EXPECT_EQ(parse("server.1=host:999;learner"), (Cfg{{1, "host:999", true}})); + EXPECT_EQ(parse("server.1=host:999;participant"), (Cfg{{1, "host:999", false}})); + EXPECT_EQ(parse("server.1=host:999;learner;25"), (Cfg{{1, "host:999", true, 25}})); + + EXPECT_EQ(parse("server.1=127.0.0.1:80"), (Cfg{{1, "127.0.0.1:80"}})); + EXPECT_EQ( + parse("server.1=2001:0db8:85a3:0000:0000:8a2e:0370:7334:80"), + (Cfg{{1, "2001:0db8:85a3:0000:0000:8a2e:0370:7334:80"}})); +} + +TEST_P(CoordinationTest, RaftServerClusterConfigParse) +{ + auto parse = Coordination::parseRaftServers; + using Cfg = DB::RaftServerConfig; + using Servers = DB::RaftServers; + + EXPECT_EQ(parse(""), Servers{}); + EXPECT_EQ(parse(","), Servers{}); + EXPECT_EQ(parse("1,2"), Servers{}); + EXPECT_EQ(parse("server.1=host:80,server.1=host2:80"), Servers{}); + EXPECT_EQ(parse("server.1=host:80,server.2=host:80"), Servers{}); + EXPECT_EQ( + parse("server.1=host:80,server.2=host:81"), + (Servers{Cfg{1, "host:80"}, Cfg{2, "host:81"}})); +} + TEST_P(CoordinationTest, BuildTest) { DB::InMemoryLogStore store; @@ -1575,8 +1616,8 @@ void testLogAndStateMachine( restore_machine->commit(i, changelog.entry_at(i)->get_buf()); } - auto & source_storage = state_machine->getStorage(); - auto & restored_storage = restore_machine->getStorage(); + auto & source_storage = state_machine->getStorageForUnitTests(); + auto & restored_storage = restore_machine->getStorageForUnitTests(); EXPECT_EQ(source_storage.container.size(), restored_storage.container.size()); for (size_t i = 1; i < total_logs + 1; ++i) @@ -1678,7 +1719,7 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove) auto entry_c = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), request_c); state_machine->pre_commit(1, entry_c->get_buf()); state_machine->commit(1, entry_c->get_buf()); - const auto & storage = state_machine->getStorage(); + const auto & storage = state_machine->getStorageForUnitTests(); EXPECT_EQ(storage.ephemerals.size(), 1); std::shared_ptr request_d = std::make_shared(); @@ -1727,7 +1768,7 @@ TEST_P(CoordinationTest, TestCreateNodeWithAuthSchemeForAclWhenAuthIsPrecommitte auto create_entry = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), create_req); state_machine->pre_commit(2, create_entry->get_buf()); - const auto & uncommitted_state = state_machine->getStorage().uncommitted_state; + const auto & uncommitted_state = state_machine->getStorageForUnitTests().uncommitted_state; ASSERT_TRUE(uncommitted_state.nodes.contains(node_path)); // commit log entries @@ -1790,7 +1831,7 @@ TEST_P(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitted) state_machine->commit(2, create_entry->get_buf()); state_machine->commit(3, set_acl_entry->get_buf()); - const auto & uncommitted_state = state_machine->getStorage().uncommitted_state; + const auto & uncommitted_state = state_machine->getStorageForUnitTests().uncommitted_state; auto node = uncommitted_state.getNode(node_path); ASSERT_NE(node, nullptr); diff --git a/src/Interpreters/ZooKeeperLog.cpp b/src/Interpreters/ZooKeeperLog.cpp index 48f4d510af7..2231a58c6a9 100644 --- a/src/Interpreters/ZooKeeperLog.cpp +++ b/src/Interpreters/ZooKeeperLog.cpp @@ -73,6 +73,7 @@ NamesAndTypesList ZooKeeperLogElement::getNamesAndTypes() {"Create", static_cast(Coordination::OpNum::Create)}, {"Remove", static_cast(Coordination::OpNum::Remove)}, {"Exists", static_cast(Coordination::OpNum::Exists)}, + {"Reconfig", static_cast(Coordination::OpNum::Reconfig)}, {"Get", static_cast(Coordination::OpNum::Get)}, {"Set", static_cast(Coordination::OpNum::Set)}, {"GetACL", static_cast(Coordination::OpNum::GetACL)}, diff --git a/src/Storages/DataLakes/HudiMetadataParser.cpp b/src/Storages/DataLakes/HudiMetadataParser.cpp index a1f35a5ae42..78d69c83989 100644 --- a/src/Storages/DataLakes/HudiMetadataParser.cpp +++ b/src/Storages/DataLakes/HudiMetadataParser.cpp @@ -67,7 +67,8 @@ struct HudiMetadataParser::Impl { auto key_file = std::filesystem::path(key); Strings file_parts; - splitInto<'_'>(file_parts, key_file.stem()); + const String stem = key_file.stem(); + splitInto<'_'>(file_parts, stem); if (file_parts.size() != 3) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected format for file: {}", key); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index bcc4dc749fb..07cfced8362 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -492,7 +492,7 @@ size_t ReplicatedMergeTreeCleanupThread::clearOldBlocks(const String & blocks_di } else { - LOG_WARNING(log, "Error while deleting ZooKeeper path `{}`: {}, ignoring.", path, Coordination::errorMessage(rc)); + LOG_WARNING(log, "Error while deleting ZooKeeper path `{}`: {}, ignoring.", path, rc); } first_outdated_block++; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 792843cbe18..b08b9de12a3 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -494,7 +494,7 @@ void ReplicatedMergeTreeQueue::updateTimesInZooKeeper( if (code != Coordination::Error::ZOK) LOG_ERROR(log, "Couldn't set value of nodes for insert times " "({}/min_unprocessed_insert_time, max_processed_insert_time): {}. " - "This shouldn't happen often.", replica_path, Coordination::errorMessage(code)); + "This shouldn't happen often.", replica_path, code); } } @@ -551,7 +551,7 @@ void ReplicatedMergeTreeQueue::removeProcessedEntry(zkutil::ZooKeeperPtr zookeep auto code = zookeeper->tryRemove(fs::path(replica_path) / "queue" / entry->znode_name); if (code != Coordination::Error::ZOK) - LOG_ERROR(log, "Couldn't remove {}/queue/{}: {}. This shouldn't happen often.", replica_path, entry->znode_name, Coordination::errorMessage(code)); + LOG_ERROR(log, "Couldn't remove {}/queue/{}: {}. This shouldn't happen often.", replica_path, entry->znode_name, code); updateTimesInZooKeeper(zookeeper, min_unprocessed_insert_time_changed, max_processed_insert_time_changed); } @@ -1144,7 +1144,7 @@ void ReplicatedMergeTreeQueue::removePartProducingOpsInRange( auto code = zookeeper->tryRemove(fs::path(replica_path) / "queue" / znode_name); if (code != Coordination::Error::ZOK) - LOG_INFO(log, "Couldn't remove {}: {}", (fs::path(replica_path) / "queue" / znode_name).string(), Coordination::errorMessage(code)); + LOG_INFO(log, "Couldn't remove {}: {}", (fs::path(replica_path) / "queue" / znode_name).string(), code); updateStateOnQueueEntryRemoval( *it, /* is_successful = */ false, diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 4128654a632..1e033566fed 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -723,7 +723,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: retries_ctl.setUserError( ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR, "Insert failed due to zookeeper error. Please retry. Reason: {}", - Coordination::errorMessage(write_part_info_keeper_error)); + write_part_info_keeper_error); } retries_ctl.stopRetries(); @@ -1033,7 +1033,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: retries_ctl.setUserError( ErrorCodes::UNKNOWN_STATUS_OF_INSERT, "Unknown status, client must retry. Reason: {}", - Coordination::errorMessage(multi_code)); + multi_code); return; } else if (Coordination::isUserError(multi_code)) @@ -1109,7 +1109,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: "Unexpected logical error while adding block {} with ID '{}': {}, path {}", block_number, toString(block_id), - Coordination::errorMessage(multi_code), + multi_code, failed_op_path); } } @@ -1122,7 +1122,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: "Unexpected ZooKeeper error while adding block {} with ID '{}': {}", block_number, toString(block_id), - Coordination::errorMessage(multi_code)); + multi_code); } }, [&zookeeper]() { zookeeper->cleanupEphemeralNodes(); }); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index dac9e6923a5..06e9d88a954 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1037,7 +1037,7 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con code = zookeeper->tryMulti(ops, res); if (code != Coordination::Error::ZOK) LOG_WARNING(logger, "Cannot quickly remove nodes without children: {} (replica: {}). Will remove recursively.", - Coordination::errorMessage(code), remote_replica_path); + code, remote_replica_path); /// And finally remove everything else recursively /// It may left some garbage if replica_path subtree is concurrently modified @@ -1145,7 +1145,7 @@ bool StorageReplicatedMergeTree::removeTableNodesFromZooKeeper(zkutil::ZooKeeper auto code = zookeeper->tryMulti(ops, res); if (code != Coordination::Error::ZOK) LOG_WARNING(logger, "Cannot quickly remove nodes without children: {} (table: {}). Will remove recursively.", - Coordination::errorMessage(code), zookeeper_path); + code, zookeeper_path); Strings children; code = zookeeper->tryGetChildren(zookeeper_path, children); @@ -1893,7 +1893,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che else if (code == Coordination::Error::ZBADVERSION || code == Coordination::Error::ZNONODE || code == Coordination::Error::ZNODEEXISTS) { LOG_DEBUG(log, "State was changed or isn't expected when trying to mark quorum for part {} as failed. Code: {}", - entry.new_part_name, Coordination::errorMessage(code)); + entry.new_part_name, code); } else throw Coordination::Exception(code); @@ -3098,7 +3098,7 @@ void StorageReplicatedMergeTree::cloneReplicaIfNeeded(zkutil::ZooKeeperPtr zooke if (get_is_lost.error != Coordination::Error::ZOK) { - LOG_INFO(log, "Not cloning {}, cannot get '/is_lost': {}", source_replica_name, Coordination::errorMessage(get_is_lost.error)); + LOG_INFO(log, "Not cloning {}, cannot get '/is_lost': {}", source_replica_name, get_is_lost.error); continue; } else if (get_is_lost.data != "0") @@ -3109,12 +3109,12 @@ void StorageReplicatedMergeTree::cloneReplicaIfNeeded(zkutil::ZooKeeperPtr zooke if (get_log_pointer.error != Coordination::Error::ZOK) { - LOG_INFO(log, "Not cloning {}, cannot get '/log_pointer': {}", source_replica_name, Coordination::errorMessage(get_log_pointer.error)); + LOG_INFO(log, "Not cloning {}, cannot get '/log_pointer': {}", source_replica_name, get_log_pointer.error); continue; } if (get_queue.error != Coordination::Error::ZOK) { - LOG_INFO(log, "Not cloning {}, cannot get '/queue': {}", source_replica_name, Coordination::errorMessage(get_queue.error)); + LOG_INFO(log, "Not cloning {}, cannot get '/queue': {}", source_replica_name, get_queue.error); continue; } @@ -7203,7 +7203,7 @@ void StorageReplicatedMergeTree::clearBlocksInPartition( { for (size_t i = 0; i < delete_requests.size(); ++i) if (delete_responses[i]->error != Coordination::Error::ZOK) - LOG_WARNING(log, "Error while deleting ZooKeeper path `{}`: {}, ignoring.", delete_requests[i]->getPath(), Coordination::errorMessage(delete_responses[i]->error)); + LOG_WARNING(log, "Error while deleting ZooKeeper path `{}`: {}, ignoring.", delete_requests[i]->getPath(), delete_responses[i]->error); } LOG_TRACE(log, "Deleted {} deduplication block IDs in partition ID {}", delete_requests.size(), partition_id); @@ -8717,7 +8717,7 @@ std::pair> getParentLockedBlobs(const ZooKeeperWith zookeeper_ptr->tryGet(fs::path(zero_copy_part_path_prefix) / part_candidate_info_str, files_not_to_remove_str, nullptr, nullptr, &code); if (code != Coordination::Error::ZOK) { - LOG_TRACE(log, "Cannot get parent files from ZooKeeper on path ({}), error {}", (fs::path(zero_copy_part_path_prefix) / part_candidate_info_str).string(), errorMessage(code)); + LOG_TRACE(log, "Cannot get parent files from ZooKeeper on path ({}), error {}", (fs::path(zero_copy_part_path_prefix) / part_candidate_info_str).string(), code); return {true, std::nullopt}; } diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py index 3b909194b63..3da1d5bd7b0 100644 --- a/tests/integration/helpers/keeper_utils.py +++ b/tests/integration/helpers/keeper_utils.py @@ -1,5 +1,6 @@ import socket import time +from kazoo.client import KazooClient def get_keeper_socket(cluster, node, port=9181): @@ -26,9 +27,17 @@ def send_4lw_cmd(cluster, node, cmd="ruok", port=9181): NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" -def wait_until_connected(cluster, node, port=9181): +def wait_until_connected(cluster, node, port=9181, timeout=30.0): + elapsed = 0.0 + while send_4lw_cmd(cluster, node, "mntr", port) == NOT_SERVING_REQUESTS_ERROR_MSG: time.sleep(0.1) + elapsed += 0.1 + + if elapsed >= timeout: + raise Exception( + f"{timeout}s timeout while waiting for {node.name} to start serving requests" + ) def wait_until_quorum_lost(cluster, node, port=9181): @@ -51,3 +60,25 @@ def get_leader(cluster, nodes): if is_leader(cluster, node): return node raise Exception("No leader in Keeper cluster.") + + +def get_fake_zk(cluster, node, timeout: float = 30.0) -> KazooClient: + _fake = KazooClient( + hosts=cluster.get_instance_ip(node.name) + ":9181", timeout=timeout + ) + _fake.start() + return _fake + + +def get_config_str(zk: KazooClient) -> str: + """ + Return decoded contents of /keeper/config node + """ + return zk.get("/keeper/config")[0].decode("utf-8") + + +def configs_equal(left: str, right: str) -> bool: + """ + Check whether /keeper/config nodes are equal + """ + return sorted(left.split("\n")) == sorted(right.split("\n")) diff --git a/tests/integration/test_keeper_nodes_move/test.py b/tests/integration/test_keeper_nodes_move/test.py index 6884ff29607..8ac7bc9b5e2 100644 --- a/tests/integration/test_keeper_nodes_move/test.py +++ b/tests/integration/test_keeper_nodes_move/test.py @@ -1,12 +1,7 @@ #!/usr/bin/env python3 - -#!/usr/bin/env python3 - import pytest from helpers.cluster import ClickHouseCluster -import random -import string import os import time from multiprocessing.dummy import Pool diff --git a/tests/integration/test_keeper_reconfig_add/__init__.py b/tests/integration/test_keeper_reconfig_add/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_reconfig_add/configs/keeper1.xml b/tests/integration/test_keeper_reconfig_add/configs/keeper1.xml new file mode 100644 index 00000000000..44e2090e9d8 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_add/configs/keeper1.xml @@ -0,0 +1,20 @@ + + + true + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + 1 node1 9234 + + + diff --git a/tests/integration/test_keeper_reconfig_add/configs/keeper2.xml b/tests/integration/test_keeper_reconfig_add/configs/keeper2.xml new file mode 100644 index 00000000000..e9249f7091c --- /dev/null +++ b/tests/integration/test_keeper_reconfig_add/configs/keeper2.xml @@ -0,0 +1,21 @@ + + + true + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + 1 node1 9234 + 2 node2 9234 + + + diff --git a/tests/integration/test_keeper_reconfig_add/configs/keeper3.xml b/tests/integration/test_keeper_reconfig_add/configs/keeper3.xml new file mode 100644 index 00000000000..a7ff1f6de28 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_add/configs/keeper3.xml @@ -0,0 +1,22 @@ + + + true + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + 1 node1 9234 + 2 node2 9234 + 3 node3 9234 + + + diff --git a/tests/integration/test_keeper_reconfig_add/test.py b/tests/integration/test_keeper_reconfig_add/test.py new file mode 100644 index 00000000000..c80279a0727 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_add/test.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as ku +import os +from kazoo.client import KazooClient +from kazoo.exceptions import BadArgumentsException + +cluster = ClickHouseCluster(__file__) +CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") + +node1 = cluster.add_instance("node1", main_configs=["configs/keeper1.xml"]) +node2 = cluster.add_instance("node2", stay_alive=True) +node3 = cluster.add_instance("node3", stay_alive=True) + +server_join_msg = "confirms it will join" +part_of_cluster = "now this node is the part of cluster" +zk1, zk2, zk3 = None, None, None + + +def get_fake_zk(node): + return ku.get_fake_zk(cluster, node) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + node2.stop_clickhouse() + node2.copy_file_to_container( + os.path.join(CONFIG_DIR, "keeper2.xml"), + "/etc/clickhouse-server/config.d/keeper.xml", + ) + + node3.stop_clickhouse() + node3.copy_file_to_container( + os.path.join(CONFIG_DIR, "keeper3.xml"), + "/etc/clickhouse-server/config.d/keeper.xml", + ) + + yield cluster + + finally: + for conn in [zk1, zk2, zk3]: + if conn: + conn.stop() + conn.close() + + cluster.shutdown() + + +def test_reconfig_add(started_cluster): + """ + Add a node to another node. Then add another node to two. + """ + + zk1 = get_fake_zk(node1) + config = ku.get_config_str(zk1) + print("Initial config", config) + + assert len(config.split("\n")) == 1 + assert "node1" in config + assert "node2" not in config + assert "node3" not in config + + with pytest.raises(BadArgumentsException): + # duplicate id with different endpoint + zk1.reconfig(joining="server.1=localhost:1337", leaving=None, new_members=None) + + with pytest.raises(BadArgumentsException): + # duplicate endpoint + zk1.reconfig(joining="server.8=node1:9234", leaving=None, new_members=None) + + for i in range(100): + zk1.create(f"/test_three_{i}", b"somedata") + + node2.start_clickhouse() + config, _ = zk1.reconfig( + joining="server.2=node2:9234", leaving=None, new_members=None + ) + ku.wait_until_connected(cluster, node2) + + config = config.decode("utf-8") + print("After adding 2", config) + + assert len(config.split("\n")) == 2 + assert "node1" in config + assert "node2" in config + assert "node3" not in config + + zk2 = get_fake_zk(node2) + assert ku.configs_equal(config, ku.get_config_str(zk2)) + + for i in range(100): + assert zk2.exists(f"/test_three_{i}") is not None + zk2.create(f"/test_three_{100 + i}", b"somedata") + + # Why not both? + # One node will process add_srv request, other will pull out updated config, apply + # and return true in config update thread (without calling add_srv again) + assert node1.contains_in_log(server_join_msg) or node2.contains_in_log( + server_join_msg + ) + + assert node2.contains_in_log(part_of_cluster) + + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_three_0") + + for i in range(200): + assert zk1.exists(f"/test_three_{i}") is not None + + for i in range(100): + zk2.create(f"/test_four_{i}", b"somedata") + + node3.start_clickhouse() + config, _ = zk2.reconfig( + joining="server.3=node3:9234", leaving=None, new_members=None + ) + ku.wait_until_connected(cluster, node3) + + config = config.decode("utf-8") + print("After adding 3", config) + + assert len(config.split("\n")) == 3 + assert "node1" in config + assert "node2" in config + assert "node3" in config + + zk3 = get_fake_zk(node3) + assert ku.configs_equal(config, ku.get_config_str(zk3)) + + for i in range(100): + assert zk3.exists(f"/test_four_{i}") is not None + zk3.create(f"/test_four_{100 + i}", b"somedata") + + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_four_0") + + zk2.stop() + zk2.close() + zk2 = get_fake_zk(node2) + zk2.sync("/test_four_0") + + for i in range(200): + assert zk1.exists(f"/test_four_{i}") is not None + assert zk2.exists(f"/test_four_{i}") is not None + + assert node3.contains_in_log(part_of_cluster) diff --git a/tests/integration/test_keeper_reconfig_remove/__init__.py b/tests/integration/test_keeper_reconfig_remove/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_reconfig_remove/configs/keeper1.xml b/tests/integration/test_keeper_reconfig_remove/configs/keeper1.xml new file mode 100644 index 00000000000..bbadc2741af --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove/configs/keeper1.xml @@ -0,0 +1,37 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + 0 + + + 3 + node3 + 9234 + true + 0 + + + + diff --git a/tests/integration/test_keeper_reconfig_remove/configs/keeper2.xml b/tests/integration/test_keeper_reconfig_remove/configs/keeper2.xml new file mode 100644 index 00000000000..0191a522a50 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove/configs/keeper2.xml @@ -0,0 +1,37 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + 0 + + + 3 + node3 + 9234 + true + 0 + + + + diff --git a/tests/integration/test_keeper_reconfig_remove/configs/keeper3.xml b/tests/integration/test_keeper_reconfig_remove/configs/keeper3.xml new file mode 100644 index 00000000000..345bf402336 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove/configs/keeper3.xml @@ -0,0 +1,37 @@ + + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + 0 + + + 3 + node3 + 9234 + true + 0 + + + + diff --git a/tests/integration/test_keeper_reconfig_remove/test.py b/tests/integration/test_keeper_reconfig_remove/test.py new file mode 100644 index 00000000000..7f0b1ee92c6 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove/test.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as ku +import os +from kazoo.client import KazooClient +from kazoo.exceptions import BadVersionException, BadArgumentsException + +cluster = ClickHouseCluster(__file__) +CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") +node1 = cluster.add_instance("node1", main_configs=["configs/keeper1.xml"]) +node2 = cluster.add_instance("node2", main_configs=["configs/keeper2.xml"]) +node3 = cluster.add_instance("node3", main_configs=["configs/keeper3.xml"]) + +log_msg_removed = "has been removed from the cluster" +zk1, zk2, zk3 = None, None, None + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + for conn in [zk1, zk2, zk3]: + if conn: + conn.stop() + conn.close() + + cluster.shutdown() + + +def get_fake_zk(node): + return ku.get_fake_zk(cluster, node) + + +def test_reconfig_remove_followers_from_3(started_cluster): + """ + Remove 1 follower node from cluster of 3. + Then remove another follower from two left nodes. + Check that remaining node is in standalone mode. + """ + + zk1 = get_fake_zk(node1) + config, _ = zk1.get("/keeper/config") + config = config.decode("utf-8") + print("Initial config", config) + + assert len(config.split("\n")) == 3 + assert "node1" in config + assert "node2" in config + assert "node3" in config + + with pytest.raises(BadVersionException): + zk1.reconfig(joining=None, leaving="1", new_members=None, from_config=20) + with pytest.raises(BadArgumentsException): + zk1.reconfig(joining=None, leaving=None, new_members=None) + with pytest.raises(BadArgumentsException): + # bulk reconfiguration is not supported + zk1.reconfig(joining=None, leaving=None, new_members="3") + with pytest.raises(BadArgumentsException): + zk1.reconfig(joining="1", leaving="1", new_members="3") + with pytest.raises(BadArgumentsException): + # at least one node must be left + zk1.reconfig(joining=None, leaving="1,2,3", new_members=None) + + for i in range(100): + zk1.create(f"/test_two_{i}", b"somedata") + + zk2 = get_fake_zk(node2) + zk2.sync("/test_two_0") + assert ku.configs_equal(config, ku.get_config_str(zk2)) + + zk3 = get_fake_zk(node3) + zk3.sync("/test_two_0") + assert ku.configs_equal(config, ku.get_config_str(zk3)) + + for i in range(100): + assert zk2.exists(f"test_two_{i}") is not None + assert zk3.exists(f"test_two_{i}") is not None + + config, _ = zk1.reconfig(joining=None, leaving="3", new_members=None) + config = config.decode("utf-8") + print("After removing 3", config) + + assert len(config.split("\n")) == 2 + assert "node1" in config + assert "node2" in config + assert "node3" not in config + + zk2.stop() + zk2.close() + zk2 = get_fake_zk(node2) + assert ku.configs_equal(config, ku.get_config_str(zk2)) + + for i in range(100): + assert zk2.exists(f"test_two_{i}") is not None + zk2.create(f"/test_two_{100 + i}", b"otherdata") + + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_two_0") + + for i in range(200): + assert zk1.exists(f"test_two_{i}") is not None + + with pytest.raises(Exception): + zk3.stop() + zk3.close() + zk3 = get_fake_zk(node3) + zk3.sync("/test_two_0") + + assert node3.contains_in_log(log_msg_removed) + + for i in range(100): + zk2.create(f"/test_two_{200 + i}", b"otherdata") + + config, _ = zk1.reconfig(joining=None, leaving="2", new_members=None) + config = config.decode("utf-8") + + print("After removing 2", config) + assert len(config.split("\n")) == 1 + assert "node1" in config + assert "node2" not in config + assert "node3" not in config + + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_two_0") + + for i in range(300): + assert zk1.exists(f"test_two_{i}") is not None + + with pytest.raises(Exception): + zk2.stop() + zk2.close() + zk2 = get_fake_zk(node2) + zk2.sync("/test_two_0") + + assert not node1.contains_in_log(log_msg_removed) + assert node2.contains_in_log(log_msg_removed) + assert "Mode: standalone" in zk1.command(b"stat") diff --git a/tests/integration/test_keeper_reconfig_remove_many/__init__.py b/tests/integration/test_keeper_reconfig_remove_many/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_reconfig_remove_many/configs/keeper1.xml b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper1.xml new file mode 100644 index 00000000000..9976169624b --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper1.xml @@ -0,0 +1,47 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + 4 + node4 + 9234 + true + + + 5 + node5 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_remove_many/configs/keeper2.xml b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper2.xml new file mode 100644 index 00000000000..edc43142464 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper2.xml @@ -0,0 +1,47 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + 4 + node4 + 9234 + true + + + 5 + node5 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_remove_many/configs/keeper3.xml b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper3.xml new file mode 100644 index 00000000000..8cebcbc0808 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper3.xml @@ -0,0 +1,47 @@ + + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + 4 + node4 + 9234 + true + + + 5 + node5 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_remove_many/configs/keeper4.xml b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper4.xml new file mode 100644 index 00000000000..99ac7e53f30 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper4.xml @@ -0,0 +1,47 @@ + + + 9181 + 4 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + 4 + node4 + 9234 + true + + + 5 + node5 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_remove_many/configs/keeper5.xml b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper5.xml new file mode 100644 index 00000000000..92102ad486b --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper5.xml @@ -0,0 +1,47 @@ + + + 9181 + 5 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + 4 + node4 + 9234 + true + + + 5 + node5 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_remove_many/test.py b/tests/integration/test_keeper_reconfig_remove_many/test.py new file mode 100644 index 00000000000..6bf477ff9c9 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove_many/test.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as ku +import os +from kazoo.client import KazooClient, KazooState +from kazoo.exceptions import BadVersionException, BadArgumentsException + +cluster = ClickHouseCluster(__file__) +CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") + +nodes = [ + cluster.add_instance(f"node{i}", main_configs=[f"configs/keeper{i}.xml"]) + for i in range(1, 6) +] +node1, node2, node3, node4, node5 = nodes + +log_msg_removed = "has been removed from the cluster" +zk1, zk2, zk3, zk4, zk5 = None, None, None, None, None + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + for conn in [zk1, zk2, zk3, zk4, zk5]: + if conn: + conn.stop() + conn.close() + + cluster.shutdown() + + +def get_fake_zk(node): + return ku.get_fake_zk(cluster, node) + + +def test_reconfig_remove_2_and_leader(started_cluster): + """ + Remove 2 followers from a cluster of 5. Remove leader from 3 nodes. + """ + + zk1 = get_fake_zk(node1) + config = ku.get_config_str(zk1) + print("Initial config", config) + + assert len(config.split("\n")) == 5 + + for i in range(100): + zk1.create(f"/test_two_{i}", b"somedata") + + zk4 = get_fake_zk(node4) + zk4.sync("/test_two_0") + assert ku.configs_equal(config, ku.get_config_str(zk4)) + + zk5 = get_fake_zk(node5) + zk5.sync("/test_two_0") + assert ku.configs_equal(config, ku.get_config_str(zk5)) + + for i in range(100): + assert zk4.exists(f"test_two_{i}") is not None + assert zk5.exists(f"test_two_{i}") is not None + + zk4.create(f"/test_two_{100 + i}", b"otherdata") + + zk2 = get_fake_zk(node2) + config, _ = zk2.reconfig(joining=None, leaving="4,5", new_members=None) + config = config.decode("utf-8") + + print("After removing 4,5", config) + assert len(config.split("\n")) == 3 + assert "node1" in config + assert "node2" in config + assert "node3" in config + assert "node4" not in config + assert "node5" not in config + + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_two_0") + + assert ku.configs_equal(config, ku.get_config_str(zk1)) + + for i in range(200): + assert zk1.exists(f"test_two_{i}") is not None + assert zk2.exists(f"test_two_{i}") is not None + + with pytest.raises(Exception): + zk4.stop() + zk4.close() + zk4 = get_fake_zk(node4) + zk4.sync("/test_two_0") + + with pytest.raises(Exception): + zk5.stop() + zk5.close() + zk5 = get_fake_zk(node5) + zk5.sync("/test_two_0") + + assert not node1.contains_in_log(log_msg_removed) + assert not node2.contains_in_log(log_msg_removed) + assert not node3.contains_in_log(log_msg_removed) + assert node4.contains_in_log(log_msg_removed) + assert node5.contains_in_log(log_msg_removed) + + assert ku.is_leader(cluster, node1) + + for i in range(100): + zk1.create(f"/test_leader_{i}", b"somedata") + + # when a leader gets a remove request, it must yield leadership + config, _ = zk1.reconfig(joining=None, leaving="1", new_members=None) + config = config.decode("utf-8") + print("After removing 1 (leader)", config) + + assert len(config.split("\n")) == 2 + assert "node1" not in config + assert "node2" in config + assert "node3" in config + assert "node4" not in config + assert "node5" not in config + + zk2.stop() + zk2.close() + zk2 = get_fake_zk(node2) + zk2.sync("/test_leader_0") + assert ku.configs_equal(config, ku.get_config_str(zk2)) + + zk3 = get_fake_zk(node3) + zk3.sync("/test_leader_0") + assert ku.configs_equal(config, ku.get_config_str(zk3)) + + for i in range(100): + assert zk2.exists(f"test_leader_{i}") is not None + assert zk3.exists(f"test_leader_{i}") is not None + + with pytest.raises(Exception): + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_leader_0") + + assert node1.contains_in_log(log_msg_removed) + assert not node2.contains_in_log(log_msg_removed) + assert not node3.contains_in_log(log_msg_removed) diff --git a/tests/integration/test_keeper_reconfig_replace_leader/__init__.py b/tests/integration/test_keeper_reconfig_replace_leader/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper1.xml b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper1.xml new file mode 100644 index 00000000000..71f3403aca3 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper1.xml @@ -0,0 +1,35 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper2.xml b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper2.xml new file mode 100644 index 00000000000..faefb4d1102 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper2.xml @@ -0,0 +1,35 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper3.xml b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper3.xml new file mode 100644 index 00000000000..80a9caa92c2 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper3.xml @@ -0,0 +1,35 @@ + + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper4.xml b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper4.xml new file mode 100644 index 00000000000..9fd88fe5d63 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper4.xml @@ -0,0 +1,21 @@ + + + 9181 + 4 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + 2 node2 9234 + 3 node3 9234 + 4 node4 9234 + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader/test.py b/tests/integration/test_keeper_reconfig_replace_leader/test.py new file mode 100644 index 00000000000..1b23aa056c6 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader/test.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +from os.path import join, dirname, realpath +import time +import helpers.keeper_utils as ku +from kazoo.client import KazooClient, KazooState + +cluster = ClickHouseCluster(__file__) +CONFIG_DIR = join(dirname(realpath(__file__)), "configs") + +node1 = cluster.add_instance("node1", main_configs=["configs/keeper1.xml"]) +node2 = cluster.add_instance("node2", main_configs=["configs/keeper2.xml"]) +node3 = cluster.add_instance("node3", main_configs=["configs/keeper3.xml"]) +node4 = cluster.add_instance("node4", stay_alive=True) +zk1, zk2, zk3, zk4 = None, None, None, None + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + node4.stop_clickhouse() + node4.copy_file_to_container( + join(CONFIG_DIR, "keeper4.xml"), + "/etc/clickhouse-server/config.d/keeper.xml", + ) + + yield cluster + + finally: + for conn in [zk1, zk2, zk3, zk4]: + if conn: + conn.stop() + conn.close() + + cluster.shutdown() + + +def get_fake_zk(node): + return ku.get_fake_zk(cluster, node) + + +def test_reconfig_replace_leader(started_cluster): + """ + Remove leader from a cluster of 3 and add a new node via two commands. + """ + + zk1 = get_fake_zk(node1) + config = ku.get_config_str(zk1) + + assert len(config.split("\n")) == 3 + assert "node1" in config + assert "node2" in config + assert "node3" in config + assert "node4" not in config + + for i in range(100): + zk1.create(f"/test_four_{i}", b"somedata") + + zk2 = get_fake_zk(node2) + zk2.sync("/test_four_0") + assert ku.configs_equal(config, ku.get_config_str(zk2)) + + zk3 = get_fake_zk(node3) + zk3.sync("/test_four_0") + assert ku.configs_equal(config, ku.get_config_str(zk3)) + + for i in range(100): + assert zk2.exists(f"/test_four_{i}") is not None + assert zk3.exists(f"/test_four_{i}") is not None + + assert ku.is_leader(cluster, node1) + config, _ = zk2.reconfig(joining=None, leaving="1", new_members=None) + config = config.decode("utf-8") + + print("After removing 1 (leader)", config) + assert len(config.split("\n")) == 2 + assert "node1" not in config + assert "node2" in config + assert "node3" in config + assert "node4" not in config + + with pytest.raises(Exception): + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_four_0") + + node4.start_clickhouse() + config, _ = zk2.reconfig( + joining="server.4=node4:9234", leaving=None, new_members=None + ) + config = config.decode("utf-8") + ku.wait_until_connected(cluster, node4) + + print("After adding 4", config) + assert len(config.split("\n")) == 3 + assert "node1" not in config + assert "node2" in config + assert "node3" in config + assert "node4" in config + + zk4 = get_fake_zk(node4) + assert ku.configs_equal(config, ku.get_config_str(zk4)) + + for i in range(100): + assert zk4.exists(f"test_four_{i}") is not None + zk4.create(f"/test_four_{100 + i}", b"somedata") + + zk2.stop() + zk2.close() + zk2 = get_fake_zk(node2) + zk2.sync("/test_four_0") + assert ku.configs_equal(config, ku.get_config_str(zk2)) + + zk3.stop() + zk3.close() + zk3 = get_fake_zk(node3) + zk3.sync("/test_four_0") + assert ku.configs_equal(config, ku.get_config_str(zk3)) + + for i in range(200): + assert zk2.exists(f"test_four_{i}") is not None + assert zk3.exists(f"test_four_{i}") is not None diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/__init__.py b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper1.xml b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper1.xml new file mode 100644 index 00000000000..71f3403aca3 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper1.xml @@ -0,0 +1,35 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper2.xml b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper2.xml new file mode 100644 index 00000000000..faefb4d1102 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper2.xml @@ -0,0 +1,35 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper3.xml b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper3.xml new file mode 100644 index 00000000000..80a9caa92c2 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper3.xml @@ -0,0 +1,35 @@ + + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper4.xml b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper4.xml new file mode 100644 index 00000000000..9fd88fe5d63 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper4.xml @@ -0,0 +1,21 @@ + + + 9181 + 4 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + 2 node2 9234 + 3 node3 9234 + 4 node4 9234 + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py new file mode 100644 index 00000000000..c7aed945097 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +from os.path import join, dirname, realpath +import time +import helpers.keeper_utils as ku +from kazoo.client import KazooClient, KazooState + +cluster = ClickHouseCluster(__file__) +CONFIG_DIR = join(dirname(realpath(__file__)), "configs") + +node1 = cluster.add_instance("node1", main_configs=["configs/keeper1.xml"]) +node2 = cluster.add_instance("node2", main_configs=["configs/keeper2.xml"]) +node3 = cluster.add_instance("node3", main_configs=["configs/keeper3.xml"]) +node4 = cluster.add_instance("node4", stay_alive=True) +zk1, zk2, zk3, zk4 = None, None, None, None + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + node4.stop_clickhouse() + node4.copy_file_to_container( + join(CONFIG_DIR, "keeper4.xml"), + "/etc/clickhouse-server/config.d/keeper.xml", + ) + + yield cluster + + finally: + for conn in [zk1, zk2, zk3, zk4]: + if conn: + conn.stop() + conn.close() + + cluster.shutdown() + + +def get_fake_zk(node): + return ku.get_fake_zk(cluster, node) + + +def test_reconfig_replace_leader_in_one_command(started_cluster): + """ + Remove leader from a cluster of 3 and add a new node to this cluster in a single command + """ + + zk1 = get_fake_zk(node1) + config = ku.get_config_str(zk1) + + assert len(config.split("\n")) == 3 + assert "node1" in config + assert "node2" in config + assert "node3" in config + assert "node4" not in config + + for i in range(100): + zk1.create(f"/test_four_{i}", b"somedata") + + zk2 = get_fake_zk(node2) + zk2.sync("/test_four_0") + assert ku.configs_equal(config, ku.get_config_str(zk2)) + + zk3 = get_fake_zk(node3) + zk3.sync("/test_four_0") + assert ku.configs_equal(config, ku.get_config_str(zk3)) + + for i in range(100): + assert zk2.exists(f"/test_four_{i}") is not None + assert zk3.exists(f"/test_four_{i}") is not None + + assert ku.is_leader(cluster, node1) + node4.start_clickhouse() + config, _ = zk2.reconfig( + joining="server.4=node4:9234", leaving="1", new_members=None + ) + config = config.decode("utf-8") + + print("After removing 1 and adding 4", config) + assert len(config.split("\n")) == 3 + assert "node1" not in config + assert "node2" in config + assert "node3" in config + assert "node4" in config + + ku.wait_until_connected(cluster, node4) + time.sleep(1) + + zk4 = get_fake_zk(node4) + zk4.sync("/test_four_0") + assert ku.configs_equal(config, ku.get_config_str(zk4)) + + for i in range(100): + assert zk4.exists(f"test_four_{i}") is not None + zk4.create(f"/test_four_{100 + i}", b"somedata") + + with pytest.raises(Exception): + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_four_0") + + zk2.stop() + zk2.close() + zk2 = get_fake_zk(node2) + zk2.sync("/test_four_0") + assert ku.configs_equal(config, ku.get_config_str(zk2)) + + zk3.stop() + zk3.close() + zk3 = get_fake_zk(node3) + zk3.sync("/test_four_0") + assert ku.configs_equal(config, ku.get_config_str(zk3)) + + for i in range(200): + assert zk2.exists(f"test_four_{i}") is not None + assert zk3.exists(f"test_four_{i}") is not None diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 5a6fd15d72c..8d685d65d1d 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -15,7 +15,7 @@ using namespace DB; void dumpMachine(std::shared_ptr machine) { - auto & storage = machine->getStorage(); + auto & storage = machine->getStorageForUnitTests(); std::queue keys; keys.push("/"); From b4f750ed66a7579580b88deaaedf15ac153785d0 Mon Sep 17 00:00:00 2001 From: Mike Kot Date: Mon, 3 Jul 2023 10:11:44 +0000 Subject: [PATCH 1306/1997] review fixes --- base/base/find_symbols.h | 2 +- src/Common/ZooKeeper/IKeeper.h | 2 +- src/Coordination/KeeperContext.cpp | 3 ++- src/Coordination/KeeperContext.h | 5 +++-- src/Coordination/KeeperDispatcher.cpp | 5 ++--- src/Coordination/KeeperReconfiguration.cpp | 5 ++--- src/Coordination/KeeperReconfiguration.h | 4 ++-- src/Coordination/KeeperServer.cpp | 14 +++++++------- src/Coordination/KeeperServer.h | 2 -- src/Coordination/KeeperStateMachine.cpp | 10 ++++++---- src/Coordination/KeeperStateMachine.h | 2 -- src/Coordination/KeeperStorage.cpp | 2 +- src/Coordination/RaftServerConfig.cpp | 2 +- 13 files changed, 28 insertions(+), 30 deletions(-) diff --git a/base/base/find_symbols.h b/base/base/find_symbols.h index f7d24ccfc11..fda94edaa88 100644 --- a/base/base/find_symbols.h +++ b/base/base/find_symbols.h @@ -448,7 +448,7 @@ inline char * find_last_not_symbols_or_null(char * begin, char * end) /// See https://github.com/boostorg/algorithm/issues/63 /// And https://bugs.llvm.org/show_bug.cgi?id=41141 template -inline To& splitInto(To & to, std::string_view what, bool token_compress = false) +inline To & splitInto(To & to, std::string_view what, bool token_compress = false) { const char * pos = what.data(); const char * end = pos + what.size(); diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index 20ce2a748e6..8567a53699e 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -573,7 +573,7 @@ public: template <> struct fmt::formatter : fmt::formatter { - constexpr auto format(Coordination::Error code, auto& ctx) + constexpr auto format(Coordination::Error code, auto & ctx) { return formatter::format(Coordination::errorMessage(code), ctx); } diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index 3c3c0500540..0c083971f74 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -32,8 +32,9 @@ KeeperContext::KeeperContext(bool standalone_keeper_) system_nodes_with_data[keeper_api_version_path] = toString(static_cast(KeeperApiVersion::WITH_MULTI_READ)); } -void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config) +void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_) { + dispatcher = dispatcher_; digest_enabled = config.getBool("keeper_server.digest_enabled", false); ignore_system_path_on_startup = config.getBool("keeper_server.ignore_system_path_on_startup", false); diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index 4fb552f20a3..ba1a81b4423 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -24,7 +24,7 @@ public: SHUTDOWN }; - void initialize(const Poco::Util::AbstractConfiguration & config); + void initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_); Phase getServerState() const; void setServerState(Phase server_state_); @@ -52,6 +52,8 @@ public: void dumpConfiguration(WriteBufferFromOwnString & buf) const; + constexpr KeeperDispatcher * getDispatcher() const { return dispatcher; } + private: /// local disk defined using path or disk name using Storage = std::variant; @@ -90,5 +92,4 @@ private: }; using KeeperContextPtr = std::shared_ptr; - } diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 178453b2f5b..26be2881780 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -337,8 +337,7 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf snapshot_s3.startup(config, macros); keeper_context = std::make_shared(standalone_keeper); - keeper_context->initialize(config); - keeper_context->dispatcher = this; + keeper_context->initialize(config, this); server = std::make_unique( configuration_and_settings, @@ -762,7 +761,7 @@ void KeeperDispatcher::clusterUpdateThread() } } -void KeeperDispatcher::pushClusterUpdates(ClusterUpdateActions&& actions) +void KeeperDispatcher::pushClusterUpdates(ClusterUpdateActions && actions) { if (shutdown_called) return; for (auto && action : actions) diff --git a/src/Coordination/KeeperReconfiguration.cpp b/src/Coordination/KeeperReconfiguration.cpp index dec3e1f155f..f262a07209d 100644 --- a/src/Coordination/KeeperReconfiguration.cpp +++ b/src/Coordination/KeeperReconfiguration.cpp @@ -1,5 +1,4 @@ #include "KeeperReconfiguration.h" -#include #include #include #include @@ -45,8 +44,8 @@ ClusterUpdateActions leavingToClusterUpdates(const ClusterConfigPtr & cfg, std:: for (std::string_view leaving_server : leaving_arr) { - int id; - if (std::from_chars(leaving_server.begin(), leaving_server.end(), id).ec != std::error_code{}) + int32_t id; + if (!tryParse(id, leaving_server)) return {}; if (remove_ids.contains(id)) diff --git a/src/Coordination/KeeperReconfiguration.h b/src/Coordination/KeeperReconfiguration.h index 71958f2035e..fdd81708da2 100644 --- a/src/Coordination/KeeperReconfiguration.h +++ b/src/Coordination/KeeperReconfiguration.h @@ -1,6 +1,6 @@ #pragma once -#include "Coordination/KeeperSnapshotManager.h" -#include "Coordination/RaftServerConfig.h" +#include +#include namespace DB { diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 1cde957ef3a..f6715b0da3f 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -793,10 +793,10 @@ bool KeeperServer::applyConfigUpdate(const ClusterUpdateAction& action) { std::lock_guard _{server_write_mutex}; - if (const auto* add = std::get_if(&action)) + if (const auto * add = std::get_if(&action)) return raft_instance->get_srv_config(add->id) != nullptr || raft_instance->add_srv(static_cast(*add))->get_accepted(); - else if (const auto* remove = std::get_if(&action)) + else if (const auto * remove = std::get_if(&action)) { if (isLeader() && remove->id == state_manager->server_id()) { @@ -807,7 +807,7 @@ bool KeeperServer::applyConfigUpdate(const ClusterUpdateAction& action) return raft_instance->get_srv_config(remove->id) == nullptr || raft_instance->remove_srv(remove->id)->get_accepted(); } - else if (const auto* update = std::get_if(&action)) + else if (const auto * update = std::get_if(&action)) { if (auto ptr = raft_instance->get_srv_config(update->id); ptr == nullptr) throw Exception(ErrorCodes::RAFT_ERROR, @@ -851,7 +851,7 @@ void KeeperServer::applyConfigUpdateWithReconfigDisabled(const ClusterUpdateActi std::this_thread::sleep_for(sleep_time * (i + 1)); }; - if (const auto* add = std::get_if(&action)) + if (const auto * add = std::get_if(&action)) { for (size_t i = 0; i < coordination_settings->configuration_change_tries_count && !is_recovering; ++i) { @@ -863,7 +863,7 @@ void KeeperServer::applyConfigUpdateWithReconfigDisabled(const ClusterUpdateActi backoff_on_refusal(i); } } - else if (const auto* remove = std::get_if(&action)) + else if (const auto * remove = std::get_if(&action)) { if (remove->id == state_manager->server_id()) { @@ -884,7 +884,7 @@ void KeeperServer::applyConfigUpdateWithReconfigDisabled(const ClusterUpdateActi backoff_on_refusal(i); } } - else if (const auto* update = std::get_if(&action)) + else if (const auto * update = std::get_if(&action)) { raft_instance->set_priority(update->id, update->priority, /*broadcast on live leader*/true); return; @@ -928,7 +928,7 @@ bool KeeperServer::waitForConfigUpdateWithReconfigDisabled(const ClusterUpdateAc backoff(i); } } - else if (std::get_if(&action) != nullptr) + else if (std::holds_alternative(action)) return true; return false; diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h index 61e29b67bbd..50d229c9e63 100644 --- a/src/Coordination/KeeperServer.h +++ b/src/Coordination/KeeperServer.h @@ -17,8 +17,6 @@ namespace DB using RaftAppendResult = nuraft::ptr>>; -class KeeperDispatcher; - class KeeperServer { private: diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 3e9850caa40..6ec03235a2d 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include #include @@ -14,15 +16,13 @@ #include #include #include -#include "Coordination/KeeperStorage.h" -#include "Coordination/KeeperReconfiguration.h" - #include namespace ProfileEvents { extern const Event KeeperCommits; + extern const Event KeeperReconfigRequest; extern const Event KeeperCommitsFailed; extern const Event KeeperSnapshotCreations; extern const Event KeeperSnapshotCreationsFailed; @@ -298,6 +298,8 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req KeeperStorage::ResponseForSession KeeperStateMachine::processReconfiguration( const KeeperStorage::RequestForSession& request_for_session) { + ProfileEvents::increment(ProfileEvents::KeeperReconfigRequest); + const auto& request = static_cast(*request_for_session.request); const int64_t session_id = request_for_session.session_id; const int64_t zxid = request_for_session.zxid; @@ -312,7 +314,7 @@ KeeperStorage::ResponseForSession KeeperStateMachine::processReconfiguration( return { session_id, std::move(res) }; }; - KeeperDispatcher& dispatcher = *keeper_context->dispatcher; + KeeperDispatcher& dispatcher = *keeper_context->getDispatcher(); if (!dispatcher.reconfigEnabled()) return bad_request(ZUNIMPLEMENTED); if (!dispatcher.clusterUpdateQueueEmpty()) diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 3b239adae45..4ff46394fcc 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -12,8 +12,6 @@ namespace DB { -class KeeperDispatcher; - using ResponsesQueue = ConcurrentBoundedQueue; using SnapshotsQueue = ConcurrentBoundedQueue; diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 2b245a455b7..7fe85857ccb 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -1088,7 +1088,7 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce if (request.path == Coordination::keeper_config_path) { response.data = serializeClusterConfig( - storage.keeper_context->dispatcher->getStateMachine().getClusterConfig()); + storage.keeper_context->getDispatcher()->getStateMachine().getClusterConfig()); response.error = Coordination::Error::ZOK; return response_ptr; } diff --git a/src/Coordination/RaftServerConfig.cpp b/src/Coordination/RaftServerConfig.cpp index 42923dd0b29..45b6d5d1dad 100644 --- a/src/Coordination/RaftServerConfig.cpp +++ b/src/Coordination/RaftServerConfig.cpp @@ -31,7 +31,7 @@ std::optional RaftServerConfig::parse(std::string_view server) if (!id_str.starts_with("server.")) return std::nullopt; - int id; + Int32 id; if (std::from_chars(std::next(id_str.begin(), 7), id_str.end(), id).ec != std::error_code{}) return std::nullopt; if (id <= 0) From 297d566600c3b36a552b456f8371440c5939b1d7 Mon Sep 17 00:00:00 2001 From: Mike Kot Date: Mon, 3 Jul 2023 14:13:26 +0000 Subject: [PATCH 1307/1997] acl check --- src/Coordination/KeeperStateMachine.cpp | 3 +++ src/Coordination/KeeperStateMachine.h | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 6ec03235a2d..e053e481b6b 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -314,6 +314,9 @@ KeeperStorage::ResponseForSession KeeperStateMachine::processReconfiguration( return { session_id, std::move(res) }; }; + if (!storage->checkACL(keeper_config_path, Coordination::ACL::Write, session_id, true)) + return bad_request(ZNOAUTH); + KeeperDispatcher& dispatcher = *keeper_context->getDispatcher(); if (!dispatcher.reconfigEnabled()) return bad_request(ZUNIMPLEMENTED); diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 4ff46394fcc..997a03a04d5 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -183,7 +183,8 @@ private: KeeperSnapshotManagerS3 * snapshot_manager_s3; - KeeperStorage::ResponseForSession processReconfiguration(const KeeperStorage::RequestForSession& request_for_session); + KeeperStorage::ResponseForSession processReconfiguration( + const KeeperStorage::RequestForSession& request_for_session) + TSA_REQUIRES(storage_and_responses_lock); }; - } From 1bef6fc76cf70b6faeb82b25e53e708bbf309bc6 Mon Sep 17 00:00:00 2001 From: Mike Kot Date: Mon, 3 Jul 2023 15:04:31 +0000 Subject: [PATCH 1308/1997] process reconfig in keeper dispatcher --- src/Coordination/KeeperDispatcher.cpp | 6 ++++++ src/Coordination/KeeperStateMachine.cpp | 21 +++++++++++++-------- src/Coordination/KeeperStateMachine.h | 2 ++ 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 26be2881780..9039b3a6d11 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -90,6 +90,12 @@ void KeeperDispatcher::requestThread() if (shutdown_called) break; + if (request.request->getOpNum() == Coordination::OpNum::Reconfig) + { + server->getKeeperStateMachine()->reconfigure(request); + continue; + } + KeeperStorage::RequestsForSessions current_batch; size_t current_batch_bytes_size = 0; diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index e053e481b6b..c837b93ffdd 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -295,6 +295,19 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req return true; } +void KeeperStateMachine::reconfigure(const KeeperStorage::RequestForSession& request_for_session) +{ + std::lock_guard _(storage_and_responses_lock); + KeeperStorage::ResponseForSession response = processReconfiguration(request_for_session); + if (!responses_queue.push(response)) + { + ProfileEvents::increment(ProfileEvents::KeeperCommitsFailed); + LOG_WARNING(log, + "Failed to push response with session id {} to the queue, probably because of shutdown", + response.session_id); + } +} + KeeperStorage::ResponseForSession KeeperStateMachine::processReconfiguration( const KeeperStorage::RequestForSession& request_for_session) { @@ -400,14 +413,6 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n response->session_id = session_id; try_push(response_for_session); } - // Processing reconfig request as an ordinary one (in KeeperStorage) brings multiple inconsistencies - // regarding replays of old reconfigurations in new nodes. Thus the storage is not involved. - // See https://github.com/ClickHouse/ClickHouse/pull/49450 for details - else if (op_num == Coordination::OpNum::Reconfig) - { - std::lock_guard lock(storage_and_responses_lock); - try_push(processReconfiguration(*request_for_session)); - } else { if (op_num == Coordination::OpNum::Close) diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 997a03a04d5..5762476886c 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -126,6 +126,8 @@ public: void recalculateStorageStats(); + void reconfigure(const KeeperStorage::RequestForSession& request_for_session); + private: CommitCallback commit_callback; /// In our state machine we always have a single snapshot which is stored From 4550b15876c7e57533e7aa700b1376682c95de69 Mon Sep 17 00:00:00 2001 From: Mike Kot Date: Mon, 3 Jul 2023 16:38:26 +0000 Subject: [PATCH 1309/1997] try updating tests to wait for cluster configs to come in sync --- tests/integration/helpers/keeper_utils.py | 14 +++++++++++--- tests/integration/test_keeper_reconfig_add/test.py | 4 ++-- .../test_keeper_reconfig_remove/test.py | 6 +++--- .../test_keeper_reconfig_remove_many/test.py | 10 +++++----- .../test_keeper_reconfig_replace_leader/test.py | 10 +++++----- .../test.py | 10 +++++----- 6 files changed, 31 insertions(+), 23 deletions(-) diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py index 3da1d5bd7b0..3970aa325ad 100644 --- a/tests/integration/helpers/keeper_utils.py +++ b/tests/integration/helpers/keeper_utils.py @@ -77,8 +77,16 @@ def get_config_str(zk: KazooClient) -> str: return zk.get("/keeper/config")[0].decode("utf-8") -def configs_equal(left: str, right: str) -> bool: +def wait_configs_equal(left_config: str, right_zk: KazooClient, timeout: float = 30.0): """ - Check whether /keeper/config nodes are equal + Check whether get /keeper/config result in left_config is equal + to get /keeper/config on right_zk ZK connection. """ - return sorted(left.split("\n")) == sorted(right.split("\n")) + elapsed: float = 0. + while sorted(left_config.split("\n")) != sorted(get_config_str(right_zk).split("\n")): + time.sleep(1) + elapsed += 1 + if elapsed >= timeout: + raise Exception( + f"timeout while checking nodes configs to get equal. " + f"Left: {left_config}, right: {get_config_str(right_zk)}") diff --git a/tests/integration/test_keeper_reconfig_add/test.py b/tests/integration/test_keeper_reconfig_add/test.py index c80279a0727..2c2da7403a1 100644 --- a/tests/integration/test_keeper_reconfig_add/test.py +++ b/tests/integration/test_keeper_reconfig_add/test.py @@ -91,7 +91,7 @@ def test_reconfig_add(started_cluster): assert "node3" not in config zk2 = get_fake_zk(node2) - assert ku.configs_equal(config, ku.get_config_str(zk2)) + ku.wait_configs_equal(config, zk2) for i in range(100): assert zk2.exists(f"/test_three_{i}") is not None @@ -132,7 +132,7 @@ def test_reconfig_add(started_cluster): assert "node3" in config zk3 = get_fake_zk(node3) - assert ku.configs_equal(config, ku.get_config_str(zk3)) + ku.wait_configs_equal(config, zk3) for i in range(100): assert zk3.exists(f"/test_four_{i}") is not None diff --git a/tests/integration/test_keeper_reconfig_remove/test.py b/tests/integration/test_keeper_reconfig_remove/test.py index 7f0b1ee92c6..fb0a9472df3 100644 --- a/tests/integration/test_keeper_reconfig_remove/test.py +++ b/tests/integration/test_keeper_reconfig_remove/test.py @@ -70,11 +70,11 @@ def test_reconfig_remove_followers_from_3(started_cluster): zk2 = get_fake_zk(node2) zk2.sync("/test_two_0") - assert ku.configs_equal(config, ku.get_config_str(zk2)) + ku.wait_configs_equal(config, zk2) zk3 = get_fake_zk(node3) zk3.sync("/test_two_0") - assert ku.configs_equal(config, ku.get_config_str(zk3)) + ku.wait_configs_equal(config, zk3) for i in range(100): assert zk2.exists(f"test_two_{i}") is not None @@ -92,7 +92,7 @@ def test_reconfig_remove_followers_from_3(started_cluster): zk2.stop() zk2.close() zk2 = get_fake_zk(node2) - assert ku.configs_equal(config, ku.get_config_str(zk2)) + ku.wait_configs_equal(config, zk2) for i in range(100): assert zk2.exists(f"test_two_{i}") is not None diff --git a/tests/integration/test_keeper_reconfig_remove_many/test.py b/tests/integration/test_keeper_reconfig_remove_many/test.py index 6bf477ff9c9..ec0d8b95eff 100644 --- a/tests/integration/test_keeper_reconfig_remove_many/test.py +++ b/tests/integration/test_keeper_reconfig_remove_many/test.py @@ -54,11 +54,11 @@ def test_reconfig_remove_2_and_leader(started_cluster): zk4 = get_fake_zk(node4) zk4.sync("/test_two_0") - assert ku.configs_equal(config, ku.get_config_str(zk4)) + ku.wait_configs_equal(config, zk4) zk5 = get_fake_zk(node5) zk5.sync("/test_two_0") - assert ku.configs_equal(config, ku.get_config_str(zk5)) + ku.wait_configs_equal(config, zk5) for i in range(100): assert zk4.exists(f"test_two_{i}") is not None @@ -83,7 +83,7 @@ def test_reconfig_remove_2_and_leader(started_cluster): zk1 = get_fake_zk(node1) zk1.sync("/test_two_0") - assert ku.configs_equal(config, ku.get_config_str(zk1)) + ku.wait_configs_equal(config, zk1) for i in range(200): assert zk1.exists(f"test_two_{i}") is not None @@ -128,11 +128,11 @@ def test_reconfig_remove_2_and_leader(started_cluster): zk2.close() zk2 = get_fake_zk(node2) zk2.sync("/test_leader_0") - assert ku.configs_equal(config, ku.get_config_str(zk2)) + ku.wait_configs_equal(config, zk2) zk3 = get_fake_zk(node3) zk3.sync("/test_leader_0") - assert ku.configs_equal(config, ku.get_config_str(zk3)) + ku.wait_configs_equal(config, zk3) for i in range(100): assert zk2.exists(f"test_leader_{i}") is not None diff --git a/tests/integration/test_keeper_reconfig_replace_leader/test.py b/tests/integration/test_keeper_reconfig_replace_leader/test.py index 1b23aa056c6..ca1ec3a0c92 100644 --- a/tests/integration/test_keeper_reconfig_replace_leader/test.py +++ b/tests/integration/test_keeper_reconfig_replace_leader/test.py @@ -62,11 +62,11 @@ def test_reconfig_replace_leader(started_cluster): zk2 = get_fake_zk(node2) zk2.sync("/test_four_0") - assert ku.configs_equal(config, ku.get_config_str(zk2)) + ku.wait_configs_equal(config, zk2) zk3 = get_fake_zk(node3) zk3.sync("/test_four_0") - assert ku.configs_equal(config, ku.get_config_str(zk3)) + ku.wait_configs_equal(config, zk3) for i in range(100): assert zk2.exists(f"/test_four_{i}") is not None @@ -104,7 +104,7 @@ def test_reconfig_replace_leader(started_cluster): assert "node4" in config zk4 = get_fake_zk(node4) - assert ku.configs_equal(config, ku.get_config_str(zk4)) + ku.wait_configs_equal(config, zk4) for i in range(100): assert zk4.exists(f"test_four_{i}") is not None @@ -114,13 +114,13 @@ def test_reconfig_replace_leader(started_cluster): zk2.close() zk2 = get_fake_zk(node2) zk2.sync("/test_four_0") - assert ku.configs_equal(config, ku.get_config_str(zk2)) + ku.wait_configs_equal(config, zk2) zk3.stop() zk3.close() zk3 = get_fake_zk(node3) zk3.sync("/test_four_0") - assert ku.configs_equal(config, ku.get_config_str(zk3)) + ku.wait_configs_equal(config, zk3) for i in range(200): assert zk2.exists(f"test_four_{i}") is not None diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py index c7aed945097..76aed1c7f3a 100644 --- a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py +++ b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py @@ -62,11 +62,11 @@ def test_reconfig_replace_leader_in_one_command(started_cluster): zk2 = get_fake_zk(node2) zk2.sync("/test_four_0") - assert ku.configs_equal(config, ku.get_config_str(zk2)) + ku.wait_configs_equal(config, zk2) zk3 = get_fake_zk(node3) zk3.sync("/test_four_0") - assert ku.configs_equal(config, ku.get_config_str(zk3)) + ku.wait_configs_equal(config, zk3) for i in range(100): assert zk2.exists(f"/test_four_{i}") is not None @@ -91,7 +91,7 @@ def test_reconfig_replace_leader_in_one_command(started_cluster): zk4 = get_fake_zk(node4) zk4.sync("/test_four_0") - assert ku.configs_equal(config, ku.get_config_str(zk4)) + ku.wait_configs_equal(config, zk4) for i in range(100): assert zk4.exists(f"test_four_{i}") is not None @@ -107,13 +107,13 @@ def test_reconfig_replace_leader_in_one_command(started_cluster): zk2.close() zk2 = get_fake_zk(node2) zk2.sync("/test_four_0") - assert ku.configs_equal(config, ku.get_config_str(zk2)) + ku.wait_configs_equal(config, zk2) zk3.stop() zk3.close() zk3 = get_fake_zk(node3) zk3.sync("/test_four_0") - assert ku.configs_equal(config, ku.get_config_str(zk3)) + ku.configs_equal(config, zk3) for i in range(200): assert zk2.exists(f"test_four_{i}") is not None From 2f0cd054970015799b394588b7ecf79ca34a6e9a Mon Sep 17 00:00:00 2001 From: Mike Kot Date: Mon, 3 Jul 2023 17:35:25 +0000 Subject: [PATCH 1310/1997] handle leader removal corner cases --- src/Coordination/KeeperServer.cpp | 7 +++++-- .../test.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index f6715b0da3f..fd82f220f9b 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -798,9 +798,12 @@ bool KeeperServer::applyConfigUpdate(const ClusterUpdateAction& action) || raft_instance->add_srv(static_cast(*add))->get_accepted(); else if (const auto * remove = std::get_if(&action)) { - if (isLeader() && remove->id == state_manager->server_id()) + if (remove->id == raft_instance->get_leader()) { - raft_instance->yield_leadership(); + if (isLeader()) + raft_instance->yield_leadership(); + else + raft_instance->request_leadership(); return false; } diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py index 76aed1c7f3a..e23d0674c12 100644 --- a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py +++ b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py @@ -113,7 +113,7 @@ def test_reconfig_replace_leader_in_one_command(started_cluster): zk3.close() zk3 = get_fake_zk(node3) zk3.sync("/test_four_0") - ku.configs_equal(config, zk3) + ku.wait_configs_equal(config, zk3) for i in range(200): assert zk2.exists(f"test_four_{i}") is not None From c2a0607cf890f95e94db2751a3e68b7acc59a5bf Mon Sep 17 00:00:00 2001 From: Mike Kot Date: Mon, 3 Jul 2023 17:52:57 +0000 Subject: [PATCH 1311/1997] fix --- tests/integration/helpers/keeper_utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py index 3970aa325ad..93ea3fa74b7 100644 --- a/tests/integration/helpers/keeper_utils.py +++ b/tests/integration/helpers/keeper_utils.py @@ -82,11 +82,14 @@ def wait_configs_equal(left_config: str, right_zk: KazooClient, timeout: float = Check whether get /keeper/config result in left_config is equal to get /keeper/config on right_zk ZK connection. """ - elapsed: float = 0. - while sorted(left_config.split("\n")) != sorted(get_config_str(right_zk).split("\n")): + elapsed: float = 0.0 + while sorted(left_config.split("\n")) != sorted( + get_config_str(right_zk).split("\n") + ): time.sleep(1) elapsed += 1 if elapsed >= timeout: raise Exception( f"timeout while checking nodes configs to get equal. " - f"Left: {left_config}, right: {get_config_str(right_zk)}") + f"Left: {left_config}, right: {get_config_str(right_zk)}" + ) From c46b125d0a8501241a4a726a32141e1215a2cbf5 Mon Sep 17 00:00:00 2001 From: Mike Kot Date: Wed, 5 Jul 2023 18:23:34 +0000 Subject: [PATCH 1312/1997] review fixes --- src/Coordination/KeeperDispatcher.cpp | 2 ++ src/Coordination/KeeperStateMachine.cpp | 2 +- src/Coordination/KeeperStateMachine.h | 5 ++++- src/Coordination/tests/gtest_coordination.cpp | 10 +++++----- utils/keeper-data-dumper/main.cpp | 2 +- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 9039b3a6d11..90996dfaff7 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -121,6 +121,8 @@ void KeeperDispatcher::requestThread() std::lock_guard lock(read_request_queue_mutex); read_request_queue[last_request.session_id][last_request.request->xid].push_back(request); } + else if (request.request->getOpNum() == Coordination::OpNum::Reconfig) + server->getKeeperStateMachine()->reconfigure(request); else { current_batch_bytes_size += request.request->bytesSize(); diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index c837b93ffdd..45c776e105b 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -313,7 +313,7 @@ KeeperStorage::ResponseForSession KeeperStateMachine::processReconfiguration( { ProfileEvents::increment(ProfileEvents::KeeperReconfigRequest); - const auto& request = static_cast(*request_for_session.request); + const auto & request = static_cast(*request_for_session.request); const int64_t session_id = request_for_session.session_id; const int64_t zxid = request_for_session.zxid; diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 5762476886c..116fa9257a0 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -88,7 +88,10 @@ public: int read_logical_snp_obj( nuraft::snapshot & s, void *& user_snp_ctx, uint64_t obj_id, nuraft::ptr & data_out, bool & is_last_obj) override; - KeeperStorage & getStorageForUnitTests() TSA_NO_THREAD_SAFETY_ANALYSIS + // This should be used only for tests or keeper-data-dumper because it violates + // TSA -- we can't acquire the lock outside of this class or return a storage under lock + // in a reasonable way. + KeeperStorage & getStorageUnsafe() TSA_NO_THREAD_SAFETY_ANALYSIS { return *storage; } diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index b302f9b13ca..03ce23e9233 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -1616,8 +1616,8 @@ void testLogAndStateMachine( restore_machine->commit(i, changelog.entry_at(i)->get_buf()); } - auto & source_storage = state_machine->getStorageForUnitTests(); - auto & restored_storage = restore_machine->getStorageForUnitTests(); + auto & source_storage = state_machine->getStorageUnsafe(); + auto & restored_storage = restore_machine->getStorageUnsafe(); EXPECT_EQ(source_storage.container.size(), restored_storage.container.size()); for (size_t i = 1; i < total_logs + 1; ++i) @@ -1719,7 +1719,7 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove) auto entry_c = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), request_c); state_machine->pre_commit(1, entry_c->get_buf()); state_machine->commit(1, entry_c->get_buf()); - const auto & storage = state_machine->getStorageForUnitTests(); + const auto & storage = state_machine->getStorageUnsafe(); EXPECT_EQ(storage.ephemerals.size(), 1); std::shared_ptr request_d = std::make_shared(); @@ -1768,7 +1768,7 @@ TEST_P(CoordinationTest, TestCreateNodeWithAuthSchemeForAclWhenAuthIsPrecommitte auto create_entry = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), create_req); state_machine->pre_commit(2, create_entry->get_buf()); - const auto & uncommitted_state = state_machine->getStorageForUnitTests().uncommitted_state; + const auto & uncommitted_state = state_machine->getStorageUnsafe().uncommitted_state; ASSERT_TRUE(uncommitted_state.nodes.contains(node_path)); // commit log entries @@ -1831,7 +1831,7 @@ TEST_P(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitted) state_machine->commit(2, create_entry->get_buf()); state_machine->commit(3, set_acl_entry->get_buf()); - const auto & uncommitted_state = state_machine->getStorageForUnitTests().uncommitted_state; + const auto & uncommitted_state = state_machine->getStorageUnsafe().uncommitted_state; auto node = uncommitted_state.getNode(node_path); ASSERT_NE(node, nullptr); diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 8d685d65d1d..22e5f47687a 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -15,7 +15,7 @@ using namespace DB; void dumpMachine(std::shared_ptr machine) { - auto & storage = machine->getStorageForUnitTests(); + auto & storage = machine->getStorageUnsafe(); std::queue keys; keys.push("/"); From bafcc3afdc79463915b53b5e441758cbb958b958 Mon Sep 17 00:00:00 2001 From: Mike Kot Date: Thu, 6 Jul 2023 15:18:49 +0000 Subject: [PATCH 1313/1997] remove reconfig in process flag as it's useless --- src/Common/ZooKeeper/IKeeper.h | 1 - src/Coordination/KeeperDispatcher.cpp | 5 ----- src/Coordination/KeeperDispatcher.h | 1 - src/Coordination/KeeperStateMachine.cpp | 4 +--- src/Coordination/RaftServerConfig.cpp | 9 ++++----- 5 files changed, 5 insertions(+), 15 deletions(-) diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index 8567a53699e..5240acc2616 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -82,7 +82,6 @@ enum class Error : int32_t ZOPERATIONTIMEOUT = -7, /// Operation timeout ZBADARGUMENTS = -8, /// Invalid arguments ZINVALIDSTATE = -9, /// Invalid zhandle state - ZRECONFIGINPROGRESS = -14, /// Another reconfig is running /** API errors. * This is never thrown by the server, it shouldn't be used other than diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 90996dfaff7..b956bba4031 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -780,11 +780,6 @@ void KeeperDispatcher::pushClusterUpdates(ClusterUpdateActions && actions) } } -bool KeeperDispatcher::clusterUpdateQueueEmpty() const -{ - return cluster_update_queue.empty(); -} - bool KeeperDispatcher::reconfigEnabled() const { return server->reconfigEnabled(); diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index a9b3d33eb51..40f1dac1570 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -136,7 +136,6 @@ public: void updateConfiguration(const Poco::Util::AbstractConfiguration & config, const MultiVersion::Version & macros); void pushClusterUpdates(ClusterUpdateActions&& actions); - bool clusterUpdateQueueEmpty() const; bool reconfigEnabled() const; /// Shutdown internal keeper parts (server, state machine, log storage, etc) diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 45c776e105b..b821050cccf 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -309,7 +309,7 @@ void KeeperStateMachine::reconfigure(const KeeperStorage::RequestForSession& req } KeeperStorage::ResponseForSession KeeperStateMachine::processReconfiguration( - const KeeperStorage::RequestForSession& request_for_session) + const KeeperStorage::RequestForSession & request_for_session) { ProfileEvents::increment(ProfileEvents::KeeperReconfigRequest); @@ -333,8 +333,6 @@ KeeperStorage::ResponseForSession KeeperStateMachine::processReconfiguration( KeeperDispatcher& dispatcher = *keeper_context->getDispatcher(); if (!dispatcher.reconfigEnabled()) return bad_request(ZUNIMPLEMENTED); - if (!dispatcher.clusterUpdateQueueEmpty()) - return bad_request(ZRECONFIGINPROGRESS); if (request.version != -1) return bad_request(ZBADVERSION); diff --git a/src/Coordination/RaftServerConfig.cpp b/src/Coordination/RaftServerConfig.cpp index 45b6d5d1dad..9090ed68fb6 100644 --- a/src/Coordination/RaftServerConfig.cpp +++ b/src/Coordination/RaftServerConfig.cpp @@ -1,7 +1,6 @@ #include "RaftServerConfig.h" -#include -#include #include +#include #include namespace DB @@ -32,7 +31,7 @@ std::optional RaftServerConfig::parse(std::string_view server) return std::nullopt; Int32 id; - if (std::from_chars(std::next(id_str.begin(), 7), id_str.end(), id).ec != std::error_code{}) + if (!tryParse(id, std::next(id_str.begin(), 7))) return std::nullopt; if (id <= 0) return std::nullopt; @@ -44,7 +43,7 @@ std::optional RaftServerConfig::parse(std::string_view server) const std::string_view port = endpoint.substr(port_delimiter + 1); uint16_t port_tmp; - if (std::from_chars(port.begin(), port.end(), port_tmp).ec != std::error_code{}) + if (!tryParse(port_tmp, port)) return std::nullopt; RaftServerConfig out{id, endpoint}; @@ -59,7 +58,7 @@ std::optional RaftServerConfig::parse(std::string_view server) return out; const std::string_view priority = parts[3]; - if (std::from_chars(priority.begin(), priority.end(), out.priority).ec != std::error_code{}) + if (!tryParse(out.priority, priority)) return std::nullopt; if (out.priority < 0) return std::nullopt; From 5302b478a4b512d080068563d4b5b983e4b13d77 Mon Sep 17 00:00:00 2001 From: Mike Kot Date: Thu, 6 Jul 2023 17:12:24 +0000 Subject: [PATCH 1314/1997] proper reconfig batch handling --- src/Common/ZooKeeper/IKeeper.cpp | 1 - src/Coordination/KeeperDispatcher.cpp | 32 +++++++++++++++++---------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/src/Common/ZooKeeper/IKeeper.cpp b/src/Common/ZooKeeper/IKeeper.cpp index 50160279506..f0a07241735 100644 --- a/src/Common/ZooKeeper/IKeeper.cpp +++ b/src/Common/ZooKeeper/IKeeper.cpp @@ -110,7 +110,6 @@ const char * errorMessage(Error code) case Error::ZCLOSING: return "ZooKeeper is closing"; case Error::ZNOTHING: return "(not error) no server responses to process"; case Error::ZSESSIONMOVED: return "Session moved to another server, so operation is ignored"; - case Error::ZRECONFIGINPROGRESS: return "Another reconfiguration is progress"; } UNREACHABLE(); diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index b956bba4031..daa65de0d89 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -82,6 +82,7 @@ void KeeperDispatcher::requestThread() /// requests into a batch we must check that the new request is not read request. Otherwise we have to /// process all already accumulated write requests, wait them synchronously and only after that process /// read request. So reads are some kind of "separator" for writes. + /// Also there is a special reconfig request also being a separator. try { if (requests_queue->tryPop(request, max_wait)) @@ -90,20 +91,17 @@ void KeeperDispatcher::requestThread() if (shutdown_called) break; - if (request.request->getOpNum() == Coordination::OpNum::Reconfig) - { - server->getKeeperStateMachine()->reconfigure(request); - continue; - } - KeeperStorage::RequestsForSessions current_batch; size_t current_batch_bytes_size = 0; bool has_read_request = false; + bool has_reconfig_request = false; - /// If new request is not read request or we must to process it through quorum. + /// If new request is not read request or reconfig request we must process it through quorum. /// Otherwise we will process it locally. - if (coordination_settings->quorum_reads || !request.request->isReadRequest()) + if (request.request->getOpNum() == Coordination::OpNum::Reconfig) + has_reconfig_request = true; + else if (coordination_settings->quorum_reads || !request.request->isReadRequest()) { current_batch_bytes_size += request.request->bytesSize(); current_batch.emplace_back(request); @@ -122,7 +120,10 @@ void KeeperDispatcher::requestThread() read_request_queue[last_request.session_id][last_request.request->xid].push_back(request); } else if (request.request->getOpNum() == Coordination::OpNum::Reconfig) - server->getKeeperStateMachine()->reconfigure(request); + { + has_reconfig_request = true; + return false; + } else { current_batch_bytes_size += request.request->bytesSize(); @@ -138,6 +139,7 @@ void KeeperDispatcher::requestThread() /// TODO: Deprecate max_requests_quick_batch_size and use only max_requests_batch_size and max_requests_batch_bytes_size size_t max_quick_batch_size = coordination_settings->max_requests_quick_batch_size; while (!shutdown_called && !has_read_request && + !has_reconfig_request && current_batch.size() < max_quick_batch_size && current_batch_bytes_size < max_batch_bytes_size && try_get_request()) ; @@ -150,8 +152,10 @@ void KeeperDispatcher::requestThread() }; /// Waiting until previous append will be successful, or batch is big enough - while (!shutdown_called && !has_read_request && !prev_result_done() && - current_batch.size() <= max_batch_size && current_batch_bytes_size < max_batch_bytes_size) + while (!shutdown_called && !has_read_request && + !has_reconfig_request && !prev_result_done() && + current_batch.size() <= max_batch_size + && current_batch_bytes_size < max_batch_bytes_size) { try_get_request(); } @@ -175,7 +179,8 @@ void KeeperDispatcher::requestThread() if (result) { - if (has_read_request) /// If we will execute read request next, than we have to process result now + /// If we will execute read or reconfig next, we have to process result now + if (has_read_request || has_reconfig_request) forceWaitAndProcessResult(result, current_batch); } else @@ -189,6 +194,9 @@ void KeeperDispatcher::requestThread() prev_result = result; } + if (has_reconfig_request) + server->getKeeperStateMachine()->reconfigure(request); + /// Read request always goes after write batch (last request) if (has_read_request) { From fc19e74ba9084e66a7ff43565ef80a78dda65570 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Jul 2023 01:12:17 +0200 Subject: [PATCH 1315/1997] fix deadlock on DatabaseCatalog shutdown --- src/Interpreters/DatabaseCatalog.cpp | 11 ++++++++++- src/Interpreters/DatabaseCatalog.h | 2 ++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 4cb2f6e3b3d..dc1861b3bd8 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -56,6 +56,7 @@ namespace ErrorCodes extern const int DATABASE_ACCESS_DENIED; extern const int LOGICAL_ERROR; extern const int HAVE_DEPENDENT_OBJECTS; + extern const int UNFINISHED; } TemporaryTableHolder::TemporaryTableHolder(ContextPtr context_, const TemporaryTableHolder::Creator & creator, const ASTPtr & query) @@ -196,6 +197,9 @@ void DatabaseCatalog::startupBackgroundCleanup() void DatabaseCatalog::shutdownImpl() { + is_shutting_down = true; + wait_table_finally_dropped.notify_all(); + if (cleanup_task) (*cleanup_task)->deactivate(); @@ -1160,8 +1164,13 @@ void DatabaseCatalog::waitTableFinallyDropped(const UUID & uuid) std::unique_lock lock{tables_marked_dropped_mutex}; wait_table_finally_dropped.wait(lock, [&]() TSA_REQUIRES(tables_marked_dropped_mutex) -> bool { - return !tables_marked_dropped_ids.contains(uuid); + return !tables_marked_dropped_ids.contains(uuid) || is_shutting_down; }); + + /// TSA doesn't support unique_lock + if (TSA_SUPPRESS_WARNING_FOR_READ(tables_marked_dropped_ids).contains(uuid)) + throw Exception(ErrorCodes::UNFINISHED, "Did not finish dropping the table with UUID {} because the server is shutting down, " + "will finish after restart", uuid); } void DatabaseCatalog::addDependencies( diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 258ea2dee7c..d502505027f 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -308,6 +308,8 @@ private: Poco::Logger * log; + std::atomic_bool is_shutting_down = false; + /// Do not allow simultaneous execution of DDL requests on the same table. /// database name -> database guard -> (table name mutex, counter), /// counter: how many threads are running a query on the table at the same time From 3ec617b1840e7a64761c0e45926719a6d41363c1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 01:31:52 +0200 Subject: [PATCH 1316/1997] Fix build --- programs/keeper-converter/KeeperConverter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp index a049e6bc2b3..20448aafa2f 100644 --- a/programs/keeper-converter/KeeperConverter.cpp +++ b/programs/keeper-converter/KeeperConverter.cpp @@ -42,7 +42,7 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv) { auto keeper_context = std::make_shared(true); keeper_context->setDigestEnabled(true); - keeper_context->setSnapshotDisk(std::make_shared("Keeper-snapshots", options["output-dir"].as(), 0)); + keeper_context->setSnapshotDisk(std::make_shared("Keeper-snapshots", options["output-dir"].as())); DB::KeeperStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false); From 48eb30de513f3561eef6cd8be661023438405e0e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 01:41:36 +0200 Subject: [PATCH 1317/1997] Fix build --- src/Coordination/KeeperContext.cpp | 6 +++--- src/Coordination/tests/gtest_coordination.cpp | 10 +++++----- utils/keeper-data-dumper/main.cpp | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index 3c3c0500540..408344ee67f 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -220,7 +220,7 @@ KeeperContext::Storage KeeperContext::getLogsPathFromConfig(const Poco::Util::Ab if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalLogDisk", path, 0); + return std::make_shared("LocalLogDisk", path); }; /// the most specialized path @@ -246,7 +246,7 @@ KeeperContext::Storage KeeperContext::getSnapshotsPathFromConfig(const Poco::Uti if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalSnapshotDisk", path, 0); + return std::make_shared("LocalSnapshotDisk", path); }; /// the most specialized path @@ -272,7 +272,7 @@ KeeperContext::Storage KeeperContext::getStatePathFromConfig(const Poco::Util::A if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalStateFileDisk", path, 0); + return std::make_shared("LocalStateFileDisk", path); }; if (config.has("keeper_server.state_storage_disk")) diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 0f60c960b8b..6df149bbfbe 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -71,16 +71,16 @@ protected: DB::KeeperContextPtr keeper_context = std::make_shared(true); Poco::Logger * log{&Poco::Logger::get("CoordinationTest")}; - void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path, 0)); } + void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path)); } void setSnapshotDirectory(const std::string & path) { - keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path, 0)); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path)); } void setStateFileDirectory(const std::string & path) { - keeper_context->setStateFileDisk(std::make_shared("StateFile", path, 0)); + keeper_context->setStateFileDisk(std::make_shared("StateFile", path)); } }; @@ -1503,9 +1503,9 @@ void testLogAndStateMachine( using namespace DB; ChangelogDirTest snapshots("./snapshots"); - keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots", 0)); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots")); ChangelogDirTest logs("./logs"); - keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs", 0)); + keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs")); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 5a6fd15d72c..51a09b676dc 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -64,8 +64,8 @@ int main(int argc, char *argv[]) SnapshotsQueue snapshots_queue{1}; CoordinationSettingsPtr settings = std::make_shared(); KeeperContextPtr keeper_context = std::make_shared(true); - keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2], 0)); - keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1], 0)); + keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2])); + keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1])); auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); state_machine->init(); From c47b32b17a59202f4b21f5cff09898d41d436925 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 01:52:18 +0200 Subject: [PATCH 1318/1997] Fix build --- src/Coordination/KeeperContext.cpp | 6 +++--- src/Coordination/tests/gtest_coordination.cpp | 10 +++++----- utils/keeper-data-dumper/main.cpp | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index 408344ee67f..3c3c0500540 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -220,7 +220,7 @@ KeeperContext::Storage KeeperContext::getLogsPathFromConfig(const Poco::Util::Ab if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalLogDisk", path); + return std::make_shared("LocalLogDisk", path, 0); }; /// the most specialized path @@ -246,7 +246,7 @@ KeeperContext::Storage KeeperContext::getSnapshotsPathFromConfig(const Poco::Uti if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalSnapshotDisk", path); + return std::make_shared("LocalSnapshotDisk", path, 0); }; /// the most specialized path @@ -272,7 +272,7 @@ KeeperContext::Storage KeeperContext::getStatePathFromConfig(const Poco::Util::A if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalStateFileDisk", path); + return std::make_shared("LocalStateFileDisk", path, 0); }; if (config.has("keeper_server.state_storage_disk")) diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 6df149bbfbe..0f60c960b8b 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -71,16 +71,16 @@ protected: DB::KeeperContextPtr keeper_context = std::make_shared(true); Poco::Logger * log{&Poco::Logger::get("CoordinationTest")}; - void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path)); } + void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path, 0)); } void setSnapshotDirectory(const std::string & path) { - keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path)); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path, 0)); } void setStateFileDirectory(const std::string & path) { - keeper_context->setStateFileDisk(std::make_shared("StateFile", path)); + keeper_context->setStateFileDisk(std::make_shared("StateFile", path, 0)); } }; @@ -1503,9 +1503,9 @@ void testLogAndStateMachine( using namespace DB; ChangelogDirTest snapshots("./snapshots"); - keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots")); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots", 0)); ChangelogDirTest logs("./logs"); - keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs")); + keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs", 0)); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 51a09b676dc..5a6fd15d72c 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -64,8 +64,8 @@ int main(int argc, char *argv[]) SnapshotsQueue snapshots_queue{1}; CoordinationSettingsPtr settings = std::make_shared(); KeeperContextPtr keeper_context = std::make_shared(true); - keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2])); - keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1])); + keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2], 0)); + keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1], 0)); auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); state_machine->init(); From 685f2949b75fad05bf1959931b626b73cdab55e9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 02:53:13 +0300 Subject: [PATCH 1319/1997] Revert "Fix build" --- src/Coordination/KeeperContext.cpp | 6 +++--- src/Coordination/tests/gtest_coordination.cpp | 10 +++++----- utils/keeper-data-dumper/main.cpp | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index 3c3c0500540..408344ee67f 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -220,7 +220,7 @@ KeeperContext::Storage KeeperContext::getLogsPathFromConfig(const Poco::Util::Ab if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalLogDisk", path, 0); + return std::make_shared("LocalLogDisk", path); }; /// the most specialized path @@ -246,7 +246,7 @@ KeeperContext::Storage KeeperContext::getSnapshotsPathFromConfig(const Poco::Uti if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalSnapshotDisk", path, 0); + return std::make_shared("LocalSnapshotDisk", path); }; /// the most specialized path @@ -272,7 +272,7 @@ KeeperContext::Storage KeeperContext::getStatePathFromConfig(const Poco::Util::A if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalStateFileDisk", path, 0); + return std::make_shared("LocalStateFileDisk", path); }; if (config.has("keeper_server.state_storage_disk")) diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 0f60c960b8b..6df149bbfbe 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -71,16 +71,16 @@ protected: DB::KeeperContextPtr keeper_context = std::make_shared(true); Poco::Logger * log{&Poco::Logger::get("CoordinationTest")}; - void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path, 0)); } + void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path)); } void setSnapshotDirectory(const std::string & path) { - keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path, 0)); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path)); } void setStateFileDirectory(const std::string & path) { - keeper_context->setStateFileDisk(std::make_shared("StateFile", path, 0)); + keeper_context->setStateFileDisk(std::make_shared("StateFile", path)); } }; @@ -1503,9 +1503,9 @@ void testLogAndStateMachine( using namespace DB; ChangelogDirTest snapshots("./snapshots"); - keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots", 0)); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots")); ChangelogDirTest logs("./logs"); - keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs", 0)); + keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs")); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 5a6fd15d72c..51a09b676dc 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -64,8 +64,8 @@ int main(int argc, char *argv[]) SnapshotsQueue snapshots_queue{1}; CoordinationSettingsPtr settings = std::make_shared(); KeeperContextPtr keeper_context = std::make_shared(true); - keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2], 0)); - keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1], 0)); + keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2])); + keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1])); auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); state_machine->init(); From d30be39224f94618393c9502961632422b6676f5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 03:28:33 +0200 Subject: [PATCH 1320/1997] Fix flaky test 00175_partition_by_ignore and move it to correct location --- .../00175_partition_by_ignore.reference | 0 .../{1_stateful => 0_stateless}/00175_partition_by_ignore.sql | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/queries/{1_stateful => 0_stateless}/00175_partition_by_ignore.reference (100%) rename tests/queries/{1_stateful => 0_stateless}/00175_partition_by_ignore.sql (90%) diff --git a/tests/queries/1_stateful/00175_partition_by_ignore.reference b/tests/queries/0_stateless/00175_partition_by_ignore.reference similarity index 100% rename from tests/queries/1_stateful/00175_partition_by_ignore.reference rename to tests/queries/0_stateless/00175_partition_by_ignore.reference diff --git a/tests/queries/1_stateful/00175_partition_by_ignore.sql b/tests/queries/0_stateless/00175_partition_by_ignore.sql similarity index 90% rename from tests/queries/1_stateful/00175_partition_by_ignore.sql rename to tests/queries/0_stateless/00175_partition_by_ignore.sql index 737d1b59fe3..19d63c82a87 100644 --- a/tests/queries/1_stateful/00175_partition_by_ignore.sql +++ b/tests/queries/0_stateless/00175_partition_by_ignore.sql @@ -2,7 +2,7 @@ SELECT '-- check that partition key with ignore works correctly'; DROP TABLE IF EXISTS partition_by_ignore SYNC; -CREATE TABLE partition_by_ignore (ts DateTime, ts_2 DateTime) ENGINE=MergeTree PARTITION BY (toYYYYMM(ts), ignore(ts_2)) ORDER BY tuple(); +CREATE TABLE partition_by_ignore (ts DateTime, ts_2 DateTime) ENGINE=MergeTree PARTITION BY (toYYYYMM(ts), ignore(ts_2)) ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO partition_by_ignore SELECT toDateTime('2022-08-03 00:00:00') + toIntervalDay(number), toDateTime('2022-08-04 00:00:00') + toIntervalDay(number) FROM numbers(60); EXPLAIN ESTIMATE SELECT count() FROM partition_by_ignore WHERE ts BETWEEN toDateTime('2022-08-07 00:00:00') AND toDateTime('2022-08-10 00:00:00') FORMAT CSV; From f8ac899c3fefb1268a5197dc4d85c2ee1eb174ca Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 03:49:50 +0200 Subject: [PATCH 1321/1997] Fix flaky test 02360_send_logs_level_colors: avoid usage of `file` tool --- tests/queries/0_stateless/02360_send_logs_level_colors.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02360_send_logs_level_colors.sh b/tests/queries/0_stateless/02360_send_logs_level_colors.sh index 0585e779815..a9b7d4dd3c1 100755 --- a/tests/queries/0_stateless/02360_send_logs_level_colors.sh +++ b/tests/queries/0_stateless/02360_send_logs_level_colors.sh @@ -1,11 +1,12 @@ #!/usr/bin/env bash CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=trace + # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh file_name="${CLICKHOUSE_TMP}/res_${CLICKHOUSE_DATABASE}.log" -CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=trace/g') # Run query via expect to make isatty() return true function run() @@ -20,8 +21,7 @@ spawn bash -c "$command" expect 1 EOF - file "$file_name" | grep -o "ASCII text" - file "$file_name" | grep -o "with escape sequences" + rg -F $'\x1b' "$file_name" && cat "$file_name" || echo "ASCII text" } run "$CLICKHOUSE_CLIENT -q 'SELECT 1' 2>$file_name" From ef3551fea00b6eeaa76884880a977e9a0768bb82 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 03:54:10 +0200 Subject: [PATCH 1322/1997] Maybe better tests --- tests/queries/0_stateless/00995_exception_while_insert.sh | 3 +-- tests/queries/0_stateless/01030_limit_by_with_ties_error.sh | 3 +-- tests/queries/0_stateless/01187_set_profile_as_setting.sh | 2 +- tests/queries/0_stateless/01442_merge_detach_attach_long.sh | 3 +-- tests/queries/0_stateless/01515_logtrace_function.sh | 3 +-- .../01583_parallel_parsing_exception_with_offset.sh | 3 +-- tests/queries/0_stateless/02359_send_logs_source_regexp.sh | 2 +- .../0_stateless/02360_rename_table_along_with_log_name.sh | 2 +- 8 files changed, 8 insertions(+), 13 deletions(-) diff --git a/tests/queries/0_stateless/00995_exception_while_insert.sh b/tests/queries/0_stateless/00995_exception_while_insert.sh index e0cd264a2b7..927ac6a54e5 100755 --- a/tests/queries/0_stateless/00995_exception_while_insert.sh +++ b/tests/queries/0_stateless/00995_exception_while_insert.sh @@ -1,11 +1,10 @@ #!/usr/bin/env bash CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=none/g') - $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS check;" $CLICKHOUSE_CLIENT --query="CREATE TABLE check (x UInt64, y UInt64 DEFAULT throwIf(x > 1500000)) ENGINE = Memory;" diff --git a/tests/queries/0_stateless/01030_limit_by_with_ties_error.sh b/tests/queries/0_stateless/01030_limit_by_with_ties_error.sh index 711a015f044..c3414838789 100755 --- a/tests/queries/0_stateless/01030_limit_by_with_ties_error.sh +++ b/tests/queries/0_stateless/01030_limit_by_with_ties_error.sh @@ -1,11 +1,10 @@ #!/usr/bin/env bash CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=none/g') - $CLICKHOUSE_CLIENT --query=""" SELECT * FROM (SELECT number % 5 AS a, count() AS b, c FROM numbers(10) ARRAY JOIN [1,2] AS c GROUP BY a,c) AS table diff --git a/tests/queries/0_stateless/01187_set_profile_as_setting.sh b/tests/queries/0_stateless/01187_set_profile_as_setting.sh index dacb609d790..fccac57aea8 100755 --- a/tests/queries/0_stateless/01187_set_profile_as_setting.sh +++ b/tests/queries/0_stateless/01187_set_profile_as_setting.sh @@ -4,13 +4,13 @@ unset CLICKHOUSE_LOG_COMMENT CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh $CLICKHOUSE_CLIENT -n -m -q "select value, changed from system.settings where name='readonly';" $CLICKHOUSE_CLIENT -n -m -q "set profile='default'; select value, changed from system.settings where name='readonly';" $CLICKHOUSE_CLIENT -n -m -q "set profile='readonly'; select value, changed from system.settings where name='readonly';" 2>&1| grep -Fa "Cannot modify 'send_logs_level' setting in readonly mode" > /dev/null && echo "OK" -CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=fatal/g') $CLICKHOUSE_CLIENT -n -m -q "set profile='readonly'; select value, changed from system.settings where name='readonly';" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=select+value,changed+from+system.settings+where+name='readonly'" diff --git a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh index c080dded1c8..acb2550d48c 100755 --- a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh +++ b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh @@ -4,11 +4,10 @@ set -e CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=none/g') - ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS t" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE t (x Int8) ENGINE = MergeTree ORDER BY tuple()" diff --git a/tests/queries/0_stateless/01515_logtrace_function.sh b/tests/queries/0_stateless/01515_logtrace_function.sh index 131ec0edb9e..4ebecd0cc18 100755 --- a/tests/queries/0_stateless/01515_logtrace_function.sh +++ b/tests/queries/0_stateless/01515_logtrace_function.sh @@ -2,9 +2,8 @@ # Tags: race CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=debug # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=debug/g') - ${CLICKHOUSE_CLIENT} --query="SELECT logTrace('logTrace Function Test');" 2>&1 | grep -q "logTrace Function Test" && echo "OK" || echo "FAIL" diff --git a/tests/queries/0_stateless/01583_parallel_parsing_exception_with_offset.sh b/tests/queries/0_stateless/01583_parallel_parsing_exception_with_offset.sh index aa3a25096c0..00d22cb8e83 100755 --- a/tests/queries/0_stateless/01583_parallel_parsing_exception_with_offset.sh +++ b/tests/queries/0_stateless/01583_parallel_parsing_exception_with_offset.sh @@ -1,11 +1,10 @@ #!/usr/bin/env bash CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=none/g') - $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS check;" $CLICKHOUSE_CLIENT --query="CREATE TABLE check (x UInt64) ENGINE = Memory;" diff --git a/tests/queries/0_stateless/02359_send_logs_source_regexp.sh b/tests/queries/0_stateless/02359_send_logs_source_regexp.sh index d3b60bc59f4..f287e323ca7 100755 --- a/tests/queries/0_stateless/02359_send_logs_source_regexp.sh +++ b/tests/queries/0_stateless/02359_send_logs_source_regexp.sh @@ -1,11 +1,11 @@ #!/usr/bin/env bash CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=trace # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh [ ! -z "$CLICKHOUSE_CLIENT_REDEFINED" ] && CLICKHOUSE_CLIENT=$CLICKHOUSE_CLIENT_REDEFINED -CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=trace/g') regexp="executeQuery|InterpreterSelectQuery" $CLICKHOUSE_CLIENT --send_logs_source_regexp "$regexp" -q "SELECT 1;" 2> >(grep -v -E "$regexp" 1>&2) diff --git a/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh b/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh index e8c7f844b5c..c07dcdd549b 100755 --- a/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh +++ b/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=trace # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh @@ -11,7 +12,6 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS y;" $CLICKHOUSE_CLIENT -q "CREATE TABLE x(i int) ENGINE MergeTree ORDER BY i;" $CLICKHOUSE_CLIENT -q "RENAME TABLE x TO y;" -CLICKHOUSE_CLIENT_WITH_LOG=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=trace/g') regexp="${CLICKHOUSE_DATABASE}\\.x" # Check if there are still log entries with old table name $CLICKHOUSE_CLIENT_WITH_LOG --send_logs_source_regexp "$regexp" -q "INSERT INTO y VALUES(1);" From f0cc90a7fb0dcf75725e0f4e437828cbb4465143 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 04:07:03 +0200 Subject: [PATCH 1323/1997] Revert "Merge pull request #51822 from kssenii/minor-changes" This reverts commit 5ac85f4fa888b4cca9d433b98505d52777281c6e, reversing changes made to 376c903da9502fb2efce180178d96c14a664f298. --- src/Interpreters/FilesystemCacheLog.h | 11 ++++++++++- src/Interpreters/InterpreterSystemQuery.cpp | 8 ++++---- src/Parsers/ASTSystemQuery.cpp | 8 ++++---- src/Parsers/ASTSystemQuery.h | 4 ++-- src/Parsers/ParserSystemQuery.cpp | 4 ++-- 5 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index 0d088a922e0..d6dd00e5463 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -11,7 +11,16 @@ namespace DB { - +/// +/// -------- Column --------- Type ------ +/// | event_date | DateTime | +/// | event_time | UInt64 | +/// | query_id | String | +/// | remote_file_path | String | +/// | segment_range | Tuple | +/// | read_type | String | +/// ------------------------------------- +/// struct FilesystemCacheLogElement { enum class CacheType diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 1c2eb66923e..e1ff8676bc7 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -370,15 +370,15 @@ BlockIO InterpreterSystemQuery::execute() else { auto cache = FileCacheFactory::instance().getByName(query.filesystem_cache_name).cache; - if (query.key_to_drop.empty()) + if (query.delete_key.empty()) { cache->removeAllReleasable(); } else { - auto key = FileCacheKey::fromKeyString(query.key_to_drop); - if (query.offset_to_drop.has_value()) - cache->removeFileSegment(key, query.offset_to_drop.value()); + auto key = FileCacheKey::fromKeyString(query.delete_key); + if (query.delete_offset.has_value()) + cache->removeFileSegment(key, query.delete_offset.value()); else cache->removeKey(key); } diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 22244a7075c..9c5e7bff61e 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -212,11 +212,11 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, if (!filesystem_cache_name.empty()) { settings.ostr << (settings.hilite ? hilite_none : "") << " " << filesystem_cache_name; - if (!key_to_drop.empty()) + if (!delete_key.empty()) { - settings.ostr << (settings.hilite ? hilite_none : "") << " KEY " << key_to_drop; - if (offset_to_drop.has_value()) - settings.ostr << (settings.hilite ? hilite_none : "") << " OFFSET " << offset_to_drop.value(); + settings.ostr << (settings.hilite ? hilite_none : "") << " KEY " << delete_key; + if (delete_offset.has_value()) + settings.ostr << (settings.hilite ? hilite_none : "") << " OFFSET " << delete_offset.value(); } } } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 6c81162f103..ebc3e9cd430 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -107,8 +107,8 @@ public: UInt64 seconds{}; String filesystem_cache_name; - std::string key_to_drop; - std::optional offset_to_drop; + std::string delete_key; + std::optional delete_offset; String backup_name; diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 09c86876b48..ef71e994d56 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -409,9 +409,9 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & res->filesystem_cache_name = ast->as()->value.safeGet(); if (ParserKeyword{"KEY"}.ignore(pos, expected) && ParserIdentifier().parse(pos, ast, expected)) { - res->key_to_drop = ast->as()->name(); + res->delete_key = ast->as()->name(); if (ParserKeyword{"OFFSET"}.ignore(pos, expected) && ParserLiteral().parse(pos, ast, expected)) - res->offset_to_drop = ast->as()->value.safeGet(); + res->delete_offset = ast->as()->value.safeGet(); } } if (!parseQueryWithOnCluster(res, pos, expected)) From 7cece62d26d01621f2cd9e8cc8b6b7a68d808dd9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 04:07:19 +0200 Subject: [PATCH 1324/1997] Revert "Merge pull request #51547 from kssenii/more-flexible-drop-cache" This reverts commit 2ce7bcaa3d5fb36a11ae0211eabd5a89c2a8c5de, reversing changes made to e897207cd5402307295fb3dcf5c8650d5e0a4668. --- .../IO/CachedOnDiskReadBufferFromFile.cpp | 13 ++-- src/Disks/IO/CachedOnDiskReadBufferFromFile.h | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 2 - src/Interpreters/Cache/FileCache.cpp | 34 +++++----- src/Interpreters/Cache/FileCache.h | 12 +--- src/Interpreters/Cache/FileCacheKey.cpp | 5 -- src/Interpreters/Cache/FileCacheKey.h | 2 - src/Interpreters/Cache/Metadata.cpp | 26 +------ src/Interpreters/Cache/Metadata.h | 8 +-- src/Interpreters/FilesystemCacheLog.cpp | 4 -- src/Interpreters/FilesystemCacheLog.h | 2 - src/Interpreters/InterpreterSystemQuery.cpp | 13 +--- src/Parsers/ASTSystemQuery.cpp | 8 --- src/Parsers/ASTSystemQuery.h | 2 - src/Parsers/ParserSystemQuery.cpp | 8 --- ...2808_filesystem_cache_drop_query.reference | 4 -- .../02808_filesystem_cache_drop_query.sh | 67 ------------------- 17 files changed, 30 insertions(+), 182 deletions(-) delete mode 100644 tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference delete mode 100755 tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 15b6a9211de..81aa29639ac 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -74,22 +74,19 @@ CachedOnDiskReadBufferFromFile::CachedOnDiskReadBufferFromFile( } void CachedOnDiskReadBufferFromFile::appendFilesystemCacheLog( - const FileSegment & file_segment, CachedOnDiskReadBufferFromFile::ReadType type) + const FileSegment::Range & file_segment_range, CachedOnDiskReadBufferFromFile::ReadType type) { if (!cache_log) return; - const auto range = file_segment.range(); FilesystemCacheLogElement elem { .event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()), .query_id = query_id, .source_file_path = source_file_path, - .file_segment_range = { range.left, range.right }, + .file_segment_range = { file_segment_range.left, file_segment_range.right }, .requested_range = { first_offset, read_until_position }, - .file_segment_key = file_segment.key().toString(), - .file_segment_offset = file_segment.offset(), - .file_segment_size = range.size(), + .file_segment_size = file_segment_range.size(), .read_from_cache_attempted = true, .read_buffer_id = current_buffer_id, .profile_counters = std::make_shared( @@ -498,7 +495,7 @@ bool CachedOnDiskReadBufferFromFile::completeFileSegmentAndGetNext() auto completed_range = current_file_segment->range(); if (cache_log) - appendFilesystemCacheLog(*current_file_segment, read_type); + appendFilesystemCacheLog(completed_range, read_type); chassert(file_offset_of_buffer_end > completed_range.right); @@ -521,7 +518,7 @@ CachedOnDiskReadBufferFromFile::~CachedOnDiskReadBufferFromFile() { if (cache_log && file_segments && !file_segments->empty()) { - appendFilesystemCacheLog(file_segments->front(), read_type); + appendFilesystemCacheLog(file_segments->front().range(), read_type); } } diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h index 36cf8a54183..b4e7701de75 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h @@ -90,7 +90,7 @@ private: bool completeFileSegmentAndGetNext(); - void appendFilesystemCacheLog(const FileSegment & file_segment, ReadType read_type); + void appendFilesystemCacheLog(const FileSegment::Range & file_segment_range, ReadType read_type); bool writeCache(char * data, size_t size, size_t offset, FileSegment & file_segment); diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 2cd90731f1d..16c1def7b11 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -90,8 +90,6 @@ void ReadBufferFromRemoteFSGather::appendUncachedReadInfo() .source_file_path = current_object.remote_path, .file_segment_range = { 0, current_object.bytes_size }, .cache_type = FilesystemCacheLogElement::CacheType::READ_FROM_FS_BYPASSING_CACHE, - .file_segment_key = {}, - .file_segment_offset = {}, .file_segment_size = current_object.bytes_size, .read_from_cache_attempted = false, }; diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index de8ae33433a..91d1c63e832 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -806,13 +806,6 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) return true; } -void FileCache::removeKey(const Key & key) -{ - assertInitialized(); - auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW); - locked_key->removeAll(); -} - void FileCache::removeKeyIfExists(const Key & key) { assertInitialized(); @@ -825,14 +818,7 @@ void FileCache::removeKeyIfExists(const Key & key) /// But if we have multiple replicated zero-copy tables on the same server /// it became possible to start removing something from cache when it is used /// by other "zero-copy" tables. That is why it's not an error. - locked_key->removeAll(/* if_releasable */true); -} - -void FileCache::removeFileSegment(const Key & key, size_t offset) -{ - assertInitialized(); - auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW); - locked_key->removeFileSegment(offset); + locked_key->removeAllReleasable(); } void FileCache::removePathIfExists(const String & path) @@ -844,12 +830,22 @@ void FileCache::removeAllReleasable() { assertInitialized(); - metadata.iterate([](LockedKey & locked_key) { locked_key.removeAll(/* if_releasable */true); }); + auto lock = lockCache(); + + main_priority->iterate([&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) + { + if (segment_metadata->releasable()) + { + auto file_segment = segment_metadata->file_segment; + locked_key.removeFileSegment(file_segment->offset(), file_segment->lock()); + return PriorityIterationResult::REMOVE_AND_CONTINUE; + } + return PriorityIterationResult::CONTINUE; + }, lock); if (stash) { /// Remove all access information. - auto lock = lockCache(); stash->records.clear(); stash->queue->removeAll(lock); } @@ -919,7 +915,7 @@ void FileCache::loadMetadata() continue; } - const auto key = Key::fromKeyString(key_directory.filename().string()); + const auto key = Key(unhexUInt(key_directory.filename().string().data())); auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY, /* is_initial_load */true); for (fs::directory_iterator offset_it{key_directory}; offset_it != fs::directory_iterator(); ++offset_it) @@ -1074,7 +1070,7 @@ FileSegmentsHolderPtr FileCache::getSnapshot() FileSegmentsHolderPtr FileCache::getSnapshot(const Key & key) { FileSegments file_segments; - auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW_LOGICAL); + auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW); for (const auto & [_, file_segment_metadata] : *locked_key->getKeyMetadata()) file_segments.push_back(FileSegment::getSnapshot(file_segment_metadata->file_segment)); return std::make_unique(std::move(file_segments)); diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 2e6a5094758..0e3b17baa2f 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -83,19 +83,13 @@ public: FileSegmentsHolderPtr set(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings); - /// Remove file segment by `key` and `offset`. Throws if file segment does not exist. - void removeFileSegment(const Key & key, size_t offset); - - /// Remove files by `key`. Throws if key does not exist. - void removeKey(const Key & key); - - /// Remove files by `key`. + /// Remove files by `key`. Removes files which might be used at the moment. void removeKeyIfExists(const Key & key); - /// Removes files by `path`. + /// Removes files by `path`. Removes files which might be used at the moment. void removePathIfExists(const String & path); - /// Remove files by `key`. + /// Remove files by `key`. Will not remove files which are used at the moment. void removeAllReleasable(); std::vector tryGetCachePaths(const Key & key); diff --git a/src/Interpreters/Cache/FileCacheKey.cpp b/src/Interpreters/Cache/FileCacheKey.cpp index 772fcd600bf..f97cdc058aa 100644 --- a/src/Interpreters/Cache/FileCacheKey.cpp +++ b/src/Interpreters/Cache/FileCacheKey.cpp @@ -28,9 +28,4 @@ FileCacheKey FileCacheKey::random() return FileCacheKey(UUIDHelpers::generateV4().toUnderType()); } -FileCacheKey FileCacheKey::fromKeyString(const std::string & key_str) -{ - return FileCacheKey(unhexUInt(key_str.data())); -} - } diff --git a/src/Interpreters/Cache/FileCacheKey.h b/src/Interpreters/Cache/FileCacheKey.h index e788cd5e7cd..bab8359732c 100644 --- a/src/Interpreters/Cache/FileCacheKey.h +++ b/src/Interpreters/Cache/FileCacheKey.h @@ -21,8 +21,6 @@ struct FileCacheKey static FileCacheKey random(); bool operator==(const FileCacheKey & other) const { return key == other.key; } - - static FileCacheKey fromKeyString(const std::string & key_str); }; using FileCacheKeyAndOffset = std::pair; diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 0a2d58432e4..bfaa00eac2c 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -25,7 +25,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int BAD_ARGUMENTS; } FileSegmentMetadata::FileSegmentMetadata(FileSegmentPtr && file_segment_) @@ -192,8 +191,6 @@ LockedKeyPtr CacheMetadata::lockKeyMetadata( if (it == end()) { if (key_not_found_policy == KeyNotFoundPolicy::THROW) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}` in cache", key); - else if (key_not_found_policy == KeyNotFoundPolicy::THROW_LOGICAL) throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key); else if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL) return nullptr; @@ -218,8 +215,6 @@ LockedKeyPtr CacheMetadata::lockKeyMetadata( return locked_metadata; if (key_not_found_policy == KeyNotFoundPolicy::THROW) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}` in cache", key); - else if (key_not_found_policy == KeyNotFoundPolicy::THROW_LOGICAL) throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key); if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL) @@ -563,11 +558,11 @@ bool LockedKey::isLastOwnerOfFileSegment(size_t offset) const return file_segment_metadata->file_segment.use_count() == 2; } -void LockedKey::removeAll(bool if_releasable) +void LockedKey::removeAllReleasable() { for (auto it = key_metadata->begin(); it != key_metadata->end();) { - if (if_releasable && !it->second->releasable()) + if (!it->second->releasable()) { ++it; continue; @@ -588,32 +583,17 @@ void LockedKey::removeAll(bool if_releasable) } } -KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset) -{ - auto it = key_metadata->find(offset); - if (it == key_metadata->end()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no offset {}", offset); - - auto file_segment = it->second->file_segment; - return removeFileSegmentImpl(it, file_segment->lock()); -} - KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegmentGuard::Lock & segment_lock) { auto it = key_metadata->find(offset); if (it == key_metadata->end()) throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no offset {}", offset); - return removeFileSegmentImpl(it, segment_lock); -} - -KeyMetadata::iterator LockedKey::removeFileSegmentImpl(KeyMetadata::iterator it, const FileSegmentGuard::Lock & segment_lock) -{ auto file_segment = it->second->file_segment; LOG_DEBUG( key_metadata->log, "Remove from cache. Key: {}, offset: {}, size: {}", - getKey(), file_segment->offset(), file_segment->reserved_size); + getKey(), offset, file_segment->reserved_size); chassert(file_segment->assertCorrectnessUnlocked(segment_lock)); diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index 42d74338e12..503c19f4150 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -87,7 +87,7 @@ struct CacheMetadata : public std::unordered_map, { public: using Key = FileCacheKey; - using IterateCacheMetadataFunc = std::function; + using IterateCacheMetadataFunc = std::function; explicit CacheMetadata(const std::string & path_); @@ -106,7 +106,6 @@ public: enum class KeyNotFoundPolicy { THROW, - THROW_LOGICAL, CREATE_EMPTY, RETURN_NULL, }; @@ -170,10 +169,9 @@ struct LockedKey : private boost::noncopyable std::shared_ptr getKeyMetadata() const { return key_metadata; } std::shared_ptr getKeyMetadata() { return key_metadata; } - void removeAll(bool if_releasable = true); + void removeAllReleasable(); KeyMetadata::iterator removeFileSegment(size_t offset, const FileSegmentGuard::Lock &); - KeyMetadata::iterator removeFileSegment(size_t offset); void shrinkFileSegmentToDownloadedSize(size_t offset, const FileSegmentGuard::Lock &); @@ -190,8 +188,6 @@ struct LockedKey : private boost::noncopyable std::string toString() const; private: - KeyMetadata::iterator removeFileSegmentImpl(KeyMetadata::iterator it, const FileSegmentGuard::Lock &); - const std::shared_ptr key_metadata; KeyGuard::Lock lock; /// `lock` must be destructed before `key_metadata`. }; diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index b660db064d1..17f0fda71ec 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -40,8 +40,6 @@ NamesAndTypesList FilesystemCacheLogElement::getNamesAndTypes() {"source_file_path", std::make_shared()}, {"file_segment_range", std::make_shared(types)}, {"total_requested_range", std::make_shared(types)}, - {"key", std::make_shared()}, - {"offset", std::make_shared()}, {"size", std::make_shared()}, {"read_type", std::make_shared()}, {"read_from_cache_attempted", std::make_shared()}, @@ -62,8 +60,6 @@ void FilesystemCacheLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(source_file_path); columns[i++]->insert(Tuple{file_segment_range.first, file_segment_range.second}); columns[i++]->insert(Tuple{requested_range.first, requested_range.second}); - columns[i++]->insert(file_segment_key); - columns[i++]->insert(file_segment_offset); columns[i++]->insert(file_segment_size); columns[i++]->insert(typeToString(cache_type)); columns[i++]->insert(read_from_cache_attempted); diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index d6dd00e5463..1b22d561c51 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -39,8 +39,6 @@ struct FilesystemCacheLogElement std::pair file_segment_range{}; std::pair requested_range{}; CacheType cache_type{}; - std::string file_segment_key; - size_t file_segment_offset; size_t file_segment_size; bool read_from_cache_attempted; String read_buffer_id; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index e1ff8676bc7..f2d011b12d1 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -370,18 +370,7 @@ BlockIO InterpreterSystemQuery::execute() else { auto cache = FileCacheFactory::instance().getByName(query.filesystem_cache_name).cache; - if (query.delete_key.empty()) - { - cache->removeAllReleasable(); - } - else - { - auto key = FileCacheKey::fromKeyString(query.delete_key); - if (query.delete_offset.has_value()) - cache->removeFileSegment(key, query.delete_offset.value()); - else - cache->removeKey(key); - } + cache->removeAllReleasable(); } break; } diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 9c5e7bff61e..a91449ff035 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -210,15 +210,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, else if (type == Type::DROP_FILESYSTEM_CACHE) { if (!filesystem_cache_name.empty()) - { settings.ostr << (settings.hilite ? hilite_none : "") << " " << filesystem_cache_name; - if (!delete_key.empty()) - { - settings.ostr << (settings.hilite ? hilite_none : "") << " KEY " << delete_key; - if (delete_offset.has_value()) - settings.ostr << (settings.hilite ? hilite_none : "") << " OFFSET " << delete_offset.value(); - } - } } else if (type == Type::UNFREEZE) { diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index ebc3e9cd430..ca4802d9a9b 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -107,8 +107,6 @@ public: UInt64 seconds{}; String filesystem_cache_name; - std::string delete_key; - std::optional delete_offset; String backup_name; diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index ef71e994d56..48dbe60e241 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -405,15 +405,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & ParserLiteral path_parser; ASTPtr ast; if (path_parser.parse(pos, ast, expected)) - { res->filesystem_cache_name = ast->as()->value.safeGet(); - if (ParserKeyword{"KEY"}.ignore(pos, expected) && ParserIdentifier().parse(pos, ast, expected)) - { - res->delete_key = ast->as()->name(); - if (ParserKeyword{"OFFSET"}.ignore(pos, expected) && ParserLiteral().parse(pos, ast, expected)) - res->delete_offset = ast->as()->value.safeGet(); - } - } if (!parseQueryWithOnCluster(res, pos, expected)) return false; break; diff --git a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference deleted file mode 100644 index d80fc78e03d..00000000000 --- a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference +++ /dev/null @@ -1,4 +0,0 @@ -1 -0 -1 -0 diff --git a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh deleted file mode 100755 index 9d987d0ebf2..00000000000 --- a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings - -# set -x - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - - -disk_name="${CLICKHOUSE_TEST_UNIQUE_NAME}" -$CLICKHOUSE_CLIENT -nm --query """ -DROP TABLE IF EXISTS test; -CREATE TABLE test (a Int32, b String) -ENGINE = MergeTree() ORDER BY tuple() -SETTINGS disk = disk_$disk_name(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); - -INSERT INTO test SELECT 1, 'test'; -""" - -query_id=$RANDOM - -$CLICKHOUSE_CLIENT --query_id "$query_id" --query "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1" - -${CLICKHOUSE_CLIENT} -q " system flush logs" - -key=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT key FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; -""") - -offset=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT offset FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; -""") - -$CLICKHOUSE_CLIENT -nm --query """ -SELECT count() FROM system.filesystem_cache WHERE key = '$key' AND file_segment_range_begin = $offset; -""" - -$CLICKHOUSE_CLIENT -nm --query """ -SYSTEM DROP FILESYSTEM CACHE '$disk_name' KEY $key OFFSET $offset; -""" - -$CLICKHOUSE_CLIENT -nm --query """ -SELECT count() FROM system.filesystem_cache WHERE key = '$key' AND file_segment_range_begin = $offset; -""" - -query_id=$RANDOM$RANDOM - -$CLICKHOUSE_CLIENT --query_id "$query_id" --query "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1" - -${CLICKHOUSE_CLIENT} -q " system flush logs" - -key=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT key FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; -""") - -$CLICKHOUSE_CLIENT -nm --query """ -SELECT count() FROM system.filesystem_cache WHERE key = '$key'; -""" - -$CLICKHOUSE_CLIENT -nm --query """ -SYSTEM DROP FILESYSTEM CACHE '$disk_name' KEY $key -""" - -$CLICKHOUSE_CLIENT -nm --query """ -SELECT count() FROM system.filesystem_cache WHERE key = '$key'; -""" From acd17c7974637714138a76fb83f73ec31946aa79 Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Fri, 7 Jul 2023 10:40:04 +0800 Subject: [PATCH 1325/1997] Make a deal with the "Style check" --- src/Loggers/Loggers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 1e169190ca4..4c85ea79a63 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -39,7 +39,7 @@ static std::string renderFileNameTemplate(time_t now, const std::string & file_p fs::path path{file_path}; std::tm buf; localtime_r(&now, &buf); - std::stringstream ss; + std::ostringstream ss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM ss << std::put_time(&buf, file_path.c_str()); return path.replace_filename(ss.str()); } From d0ad416e352f39e20b034c5ee1b51cb9efdc6aec Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 04:41:47 +0200 Subject: [PATCH 1326/1997] Fix flaky test detach_attach_partition_race --- .../0_stateless/01164_detach_attach_partition_race.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh index 7640b9dddf2..3aec4c3445d 100755 --- a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh +++ b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh @@ -2,9 +2,12 @@ # Tags: race CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +$CLICKHOUSE_CLIENT -q "drop table if exists mt" + $CLICKHOUSE_CLIENT -q "create table mt (n int) engine=MergeTree order by n settings parts_to_throw_insert=1000" $CLICKHOUSE_CLIENT -q "insert into mt values (1)" $CLICKHOUSE_CLIENT -q "insert into mt values (2)" @@ -13,7 +16,9 @@ $CLICKHOUSE_CLIENT -q "insert into mt values (3)" function thread_insert() { while true; do - $CLICKHOUSE_CLIENT -q "insert into mt values (rand())"; + # It might be the case that the threads are terminated and exited, but some children didn't and they are still sending queries when we are dropping tables. + # That's why the "Table doesn't exist" error is allowed, while other errors don't. + $CLICKHOUSE_CLIENT -q "insert into mt values (rand())" 2>&1 | tr -d '\n' | rg -v "Table .+ doesn't exist"; done } From 7080d85d2de6c743cc5759fa2a50d1ada1d51068 Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Fri, 7 Jul 2023 10:54:36 +0800 Subject: [PATCH 1327/1997] Amend the documentation --- .../settings.md | 49 +++++++++++++++++- .../settings.md | 50 ++++++++++++++++++- 2 files changed, 95 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index bad7e388377..48361b0f157 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1201,13 +1201,58 @@ Keys: - `console` – Send `log` and `errorlog` to the console instead of file. To enable, set to `1` or `true`. - `stream_compress` – Compress `log` and `errorlog` with `lz4` stream compression. To enable, set to `1` or `true`. +Both log and error log file names (only file names, not directories) support date and time format placeholders. + +**Placeholders** +Using the following placeholders, you can define a pattern for the resulting file name. “Example” column shows formatting result for `2023-07-06 18:32:07`. + +| Placeholder | Description | Example | +|-------------|---------------------------------------------------------------------------------------------------------------------|--------------------------| +| %% | Literal % | % | +| %n | New-line character | | +| %t | Horizontal tab character | | +| %Y | Year as a decimal number, e.g. 2017 | 2023 | +| %y | Last 2 digits of year as a decimal number (range [00,99]) | 23 | +| %C | First 2 digits of year as a decimal number (range [00,99]) | 20 | +| %G | Four-digit [ISO 8601 week-based year](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), i.e. the year that contains the specified week. Normally useful only with %V | 2023 | +| %g | Last 2 digits of [ISO 8601 week-based year](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), i.e. the year that contains the specified week. | 23 | +| %b | Abbreviated month name, e.g. Oct (locale dependent) | Jul | +| %h | Synonym of %b | Jul | +| %B | Full month name, e.g. October (locale dependent) | July | +| %m | Month as a decimal number (range [01,12]) | 07 | +| %U | Week of the year as a decimal number (Sunday is the first day of the week) (range [00,53]) | 27 | +| %W | Week of the year as a decimal number (Monday is the first day of the week) (range [00,53]) | 27 | +| %V | ISO 8601 week number (range [01,53]) | 27 | +| %j | Day of the year as a decimal number (range [001,366]) | 187 | +| %d | Day of the month as a zero-padded decimal number (range [01,31]). Single digit is preceded by zero. | 06 | +| %e | Day of the month as a space-padded decimal number (range [1,31]). Single digit is preceded by a space. |   6 | +| %a | Abbreviated weekday name, e.g. Fri (locale dependent) | Thu | +| %A | Full weekday name, e.g. Friday (locale dependent) | Thursday | +| %w | Weekday as a integer number with Sunday as 0 (range [0-6]) | 4 | +| %u | Weekday as a decimal number, where Monday is 1 (ISO 8601 format) (range [1-7]) | 4 | +| %H | Hour as a decimal number, 24 hour clock (range [00-23]) | 18 | +| %I | Hour as a decimal number, 12 hour clock (range [01,12]) | 06 | +| %M | Minute as a decimal number (range [00,59]) | 32 | +| %S | Second as a decimal number (range [00,60]) | 07 | +| %c | Standard date and time string, e.g. Sun Oct 17 04:41:13 2010 (locale dependent) | Thu Jul 6 18:32:07 2023 | +| %x | Localized date representation (locale dependent) | 07/06/23 | +| %X | Localized time representation, e.g. 18:40:20 or 6:40:20 PM (locale dependent) | 18:32:07 | +| %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 07/06/23 | +| %F | Short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2023-07-06 | +| %r | Localized 12-hour clock time (locale dependent) | 06:32:07 PM | +| %R | Equivalent to "%H:%M" | 18:32 | +| %T | Equivalent to "%H:%M:%S" (the ISO 8601 time format) | 18:32:07 | +| %p | Localized a.m. or p.m. designation (locale dependent) | PM | +| %z | Offset from UTC in the ISO 8601 format (e.g. -0430), or no characters if the time zone information is not available | +0800 | +| %Z | Locale-dependent time zone name or abbreviation, or no characters if the time zone information is not available | Z AWST | + **Example** ``` xml trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log + /var/log/clickhouse-server/clickhouse-server-%F-%T.log + /var/log/clickhouse-server/clickhouse-server-%F-%T.err.log 1000M 10 true diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 5430469ea18..421df3fe3eb 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -575,14 +575,60 @@ ClickHouse поддерживает динамическое изменение - `errorlog` - Файл лога ошибок. - `size` - Размер файла. Действует для `log` и `errorlog`. Как только файл достиг размера `size`, ClickHouse архивирует и переименовывает его, а на его месте создает новый файл лога. - `count` - Количество заархивированных файлов логов, которые сохраняет ClickHouse. +- `stream_compress` – Сжимать `log` и `errorlog` с помощью алгоритма `lz4`. Чтобы активировать, узтановите значение `1` или `true`. + +Имена файлов `log` и `errorlog` (только имя файла, а не директорий) поддерживают спецификаторы шаблонов даты и времени. + +**Спецификаторы форматирования** +С помощью следующих спецификаторов, можно определить шаблон для формирования имени файла. Столбец “Пример” показывает возможные значения на момент времени `2023-07-06 18:32:07`. + +| Спецификатор | Описание | Пример | +|--------------|---------------------------------------------------------------------------------------------------------------------|--------------------------| +| %% | Литерал % | % | +| %n | Символ новой строки | | +| %t | Символ горизонтальной табуляции | | +| %Y | Год как десятичное число, например, 2017 | 2023 | +| %y | Последние 2 цифры года в виде десятичного числа (диапазон [00,99]) | 23 | +| %C | Первые 2 цифры года в виде десятичного числа (диапазон [00,99]) | 20 | +| %G | Год по неделям согласно [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), то есть год, который содержит указанную неделю. Обычно используется вместе с %V. | 2023 | +| %g | Последние 2 цифры [года по неделям ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), т.е. года, содержащего указанную неделю (диапазон [00,99]). | 23 | +| %b | Сокращённое название месяца, например Oct (зависит от локали) | Jul | +| %h | Синоним %b | Jul | +| %B | Полное название месяца, например, October (зависит от локали) | July | +| %m | Месяц в виде десятичного числа (диапазон [01,12]) | 07 | +| %U | Неделя года в виде десятичного числа (воскресенье - первый день недели) (диапазон [00,53]) | 27 | +| %W | Неделя года в виде десятичного числа (понедельник - первый день недели) (диапазон [00,53]) | 27 | +| %V | Неделя года ISO 8601 (диапазон [01,53]) | 27 | +| %j | День года в виде десятичного числа (диапазон [001,366]) | 187 | +| %d | День месяца в виде десятичного числа (диапазон [01,31]) Перед одиночной цифрой ставится ноль. | 06 | +| %e | День месяца в виде десятичного числа (диапазон [1,31]). Перед одиночной цифрой ставится пробел. |   6 | +| %a | Сокращённое название дня недели, например, Fri (зависит от локали) | Thu | +| %A | Полный день недели, например, Friday (зависит от локали) | Thursday | +| %w | День недели в виде десятичного числа, где воскресенье равно 0 (диапазон [0-6]) | 4 | +| %u | День недели в виде десятичного числа, где понедельник равен 1 (формат ISO 8601) (диапазон [1-7]) | 4 | +| %H | Час в виде десятичного числа, 24-часовой формат (диапазон [00-23]) | 18 | +| %I | Час в виде десятичного числа, 12-часовой формат (диапазон [01,12]) | 06 | +| %M | Минуты в виде десятичного числа (диапазон [00,59]) | 32 | +| %S | Секунды как десятичное число (диапазон [00,60]) | 07 | +| %c | Стандартная строка даты и времени, например, Sun Oct 17 04:41:13 2010 (зависит от локали) | Thu Jul 6 18:32:07 2023 | +| %x | Локализованное представление даты (зависит от локали) | 07/06/23 | +| %X | Локализованное представление времени, например, 18:40:20 или 6:40:20 PM (зависит от локали) | 18:32:07 | +| %D | Эквивалентно "%m/%d/%y" | 07/06/23 | +| %F | Эквивалентно "%Y-%m-%d" (формат даты ISO 8601) | 2023-07-06 | +| %r | Локализованное 12-часовое время (зависит от локали) | 06:32:07 PM | +| %R | Эквивалентно "%H:%M" | 18:32 | +| %T | Эквивалентно "%H:%M:%S" (формат времени ISO 8601) | 18:32:07 | +| %p | Локализованное обозначение a.m. или p.m. (зависит от локали) | PM | +| %z | Смещение от UTC в формате ISO 8601 (например, -0430), или без символов, если информация о часовом поясе недоступна | +0800 | +| %Z | Зависящее от локали название или аббревиатура часового пояса, если информация о часовом поясе доступна | Z AWST | **Пример** ``` xml trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log + /var/log/clickhouse-server/clickhouse-server-%F-%T.log + /var/log/clickhouse-server/clickhouse-server-%F-%T.err.log 1000M 10 From 63fbde41fee5fb8c0133dc5a576ed4e3caa5c3f2 Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Fri, 7 Jul 2023 11:01:39 +0800 Subject: [PATCH 1328/1997] Reformat cluster.py (add empty line) --- tests/integration/helpers/cluster.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 5b583b865de..d4b1ee76712 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -72,6 +72,7 @@ CLICKHOUSE_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.log" CLICKHOUSE_ERROR_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.err.log" + # to create docker-compose env file def _create_env_file(path, variables): logging.debug(f"Env {variables} stored in {path}") From 2246e86159824f9e658ca28ecb796295a1b8585c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 05:30:32 +0200 Subject: [PATCH 1329/1997] Fix error in subquery operators --- .../AggregateFunctionMinMaxAny.h | 48 +++++++++++-------- .../02812_subquery_operators.reference | 6 +++ .../0_stateless/02812_subquery_operators.sql | 6 +++ 3 files changed, 41 insertions(+), 19 deletions(-) create mode 100644 tests/queries/0_stateless/02812_subquery_operators.reference create mode 100644 tests/queries/0_stateless/02812_subquery_operators.sql diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index 5312df32459..6bfa6895a5c 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -51,7 +51,8 @@ private: T value = T{}; public: - static constexpr bool is_nullable = false; + static constexpr bool result_is_nullable = false; + static constexpr bool should_skip_null_arguments = true; static constexpr bool is_any = false; bool has() const @@ -501,7 +502,8 @@ private: char small_data[MAX_SMALL_STRING_SIZE]; /// Including the terminating zero. public: - static constexpr bool is_nullable = false; + static constexpr bool result_is_nullable = false; + static constexpr bool should_skip_null_arguments = true; static constexpr bool is_any = false; bool has() const @@ -769,7 +771,7 @@ static_assert( /// For any other value types. -template +template struct SingleValueDataGeneric { private: @@ -779,12 +781,13 @@ private: bool has_value = false; public: - static constexpr bool is_nullable = IS_NULLABLE; + static constexpr bool result_is_nullable = RESULT_IS_NULLABLE; + static constexpr bool should_skip_null_arguments = !RESULT_IS_NULLABLE; static constexpr bool is_any = false; bool has() const { - if constexpr (is_nullable) + if constexpr (result_is_nullable) return has_value; return !value.isNull(); } @@ -820,14 +823,14 @@ public: void change(const IColumn & column, size_t row_num, Arena *) { column.get(row_num, value); - if constexpr (is_nullable) + if constexpr (result_is_nullable) has_value = true; } void change(const Self & to, Arena *) { value = to.value; - if constexpr (is_nullable) + if constexpr (result_is_nullable) has_value = true; } @@ -844,7 +847,7 @@ public: bool changeFirstTime(const Self & to, Arena * arena) { - if (!has() && (is_nullable || to.has())) + if (!has() && (result_is_nullable || to.has())) { change(to, arena); return true; @@ -879,7 +882,7 @@ public: } else { - if constexpr (is_nullable) + if constexpr (result_is_nullable) { Field new_value; column.get(row_num, new_value); @@ -910,7 +913,7 @@ public: { if (!to.has()) return false; - if constexpr (is_nullable) + if constexpr (result_is_nullable) { if (!has()) { @@ -945,7 +948,7 @@ public: } else { - if constexpr (is_nullable) + if constexpr (result_is_nullable) { Field new_value; column.get(row_num, new_value); @@ -975,7 +978,7 @@ public: { if (!to.has()) return false; - if constexpr (is_nullable) + if constexpr (result_is_nullable) { if (!value.isNull() && (to.value.isNull() || value < to.value)) { @@ -1138,13 +1141,20 @@ struct AggregateFunctionAnyLastData : Data #endif }; + +/** The aggregate function 'singleValueOrNull' is used to implement subquery operators, + * such as x = ALL (SELECT ...) + * It checks if there is only one unique non-NULL value in the data. + * If there is only one unique value - returns it. + * If there are zero or at least two distinct values - returns NULL. + */ template struct AggregateFunctionSingleValueOrNullData : Data { - static constexpr bool is_nullable = true; - using Self = AggregateFunctionSingleValueOrNullData; + static constexpr bool result_is_nullable = true; + bool first_value = true; bool is_null = false; @@ -1166,7 +1176,7 @@ struct AggregateFunctionSingleValueOrNullData : Data if (!to.has()) return; - if (first_value) + if (first_value && !to.first_value) { first_value = false; this->change(to, arena); @@ -1311,7 +1321,7 @@ public: static DataTypePtr createResultType(const DataTypePtr & type_) { - if constexpr (Data::is_nullable) + if constexpr (Data::result_is_nullable) return makeNullable(type_); return type_; } @@ -1431,13 +1441,13 @@ public: } AggregateFunctionPtr getOwnNullAdapter( - const AggregateFunctionPtr & nested_function, + const AggregateFunctionPtr & original_function, const DataTypes & /*arguments*/, const Array & /*params*/, const AggregateFunctionProperties & /*properties*/) const override { - if (Data::is_nullable) - return nested_function; + if (Data::result_is_nullable && !Data::should_skip_null_arguments) + return original_function; return nullptr; } diff --git a/tests/queries/0_stateless/02812_subquery_operators.reference b/tests/queries/0_stateless/02812_subquery_operators.reference new file mode 100644 index 00000000000..aed0a046f99 --- /dev/null +++ b/tests/queries/0_stateless/02812_subquery_operators.reference @@ -0,0 +1,6 @@ + +Hello +Hello +123 +1 + ['\0'] [] \0 [''] diff --git a/tests/queries/0_stateless/02812_subquery_operators.sql b/tests/queries/0_stateless/02812_subquery_operators.sql new file mode 100644 index 00000000000..b0638b43e89 --- /dev/null +++ b/tests/queries/0_stateless/02812_subquery_operators.sql @@ -0,0 +1,6 @@ +SELECT singleValueOrNull(toNullable('')); +SELECT singleValueOrNull(toNullable('Hello')); +SELECT singleValueOrNull((SELECT 'Hello')); +SELECT singleValueOrNull(toNullable(123)); +SELECT '' = ALL (SELECT toNullable('')); +SELECT '', ['\0'], [], singleValueOrNull(( SELECT '\0' ) ), ['']; From 3edee4174c040b079015ce6524c0d4c56926e348 Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Fri, 7 Jul 2023 11:34:03 +0800 Subject: [PATCH 1330/1997] Add AWST time zone abbreviation to the ignore list --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 2802e52c288..6ddca6db538 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -12,6 +12,7 @@ ARMv ASLR ASOF ASan +AWST Actian ActionsMenu ActiveRecord From 87ea1b6667ed9a79272e3b77c529369f2acc4e4e Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Fri, 7 Jul 2023 13:01:30 +0800 Subject: [PATCH 1331/1997] Recover the integration test --- .../test.py | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tests/integration/test_render_log_file_name_templates/test.py b/tests/integration/test_render_log_file_name_templates/test.py index e69de29bb2d..9fa87056d2c 100644 --- a/tests/integration/test_render_log_file_name_templates/test.py +++ b/tests/integration/test_render_log_file_name_templates/test.py @@ -0,0 +1,58 @@ +import pytest +import logging +from helpers.cluster import ClickHouseCluster +from datetime import datetime + + +log_dir = "/var/log/clickhouse-server/" +cluster = ClickHouseCluster(__file__) + + +@pytest.fixture(scope="module") +def started_cluster(): + cluster.add_instance( + "file-names-from-config", + main_configs=["configs/config-file-template.xml"], + clickhouse_log_file=None, + clickhouse_error_log_file=None, + ) + cluster.add_instance( + "file-names-from-params", + clickhouse_log_file=log_dir + "clickhouse-server-%Y-%m.log", + clickhouse_error_log_file=log_dir + "clickhouse-server-%Y-%m.err.log", + ) + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_check_file_names(started_cluster): + now = datetime.now() + log_file = ( + log_dir + f"clickhouse-server-{now.strftime('%Y')}-{now.strftime('%m')}.log" + ) + err_log_file = ( + log_dir + f"clickhouse-server-{now.strftime('%Y')}-{now.strftime('%m')}.err.log" + ) + logging.debug(f"log_file {log_file} err_log_file {err_log_file}") + + for name, instance in started_cluster.instances.items(): + files = instance.exec_in_container( + ["bash", "-c", f"ls -lh {log_dir}"], nothrow=True + ) + + logging.debug(f"check instance '{name}': {log_dir} contains: {files}") + + assert ( + instance.exec_in_container(["bash", "-c", f"ls {log_file}"], nothrow=True) + == log_file + "\n" + ) + + assert ( + instance.exec_in_container( + ["bash", "-c", f"ls {err_log_file}"], nothrow=True + ) + == err_log_file + "\n" + ) From 4c44c1f6ea422356bbed589aa5053fcd08139cb6 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Jul 2023 06:32:42 +0000 Subject: [PATCH 1332/1997] Wait inside the function --- tests/queries/0_stateless/02481_async_insert_race_long.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02481_async_insert_race_long.sh b/tests/queries/0_stateless/02481_async_insert_race_long.sh index c4b026c6aba..d8153967e9a 100755 --- a/tests/queries/0_stateless/02481_async_insert_race_long.sh +++ b/tests/queries/0_stateless/02481_async_insert_race_long.sh @@ -32,6 +32,8 @@ function insert3() ${MY_CLICKHOUSE_CLIENT} --wait_for_async_insert 1 -q "INSERT INTO async_inserts_race VALUES (7, 'g') (8, 'h')" & sleep 0.05 done + + wait } function select1() From 95fedaedff3ad3e3cdb15d3cc2b06ab6d9ea1e9b Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Fri, 7 Jul 2023 15:16:10 +0800 Subject: [PATCH 1333/1997] Refine the integration test code --- .../__init__.py | 58 ------------------- .../test.py | 8 +-- 2 files changed, 2 insertions(+), 64 deletions(-) diff --git a/tests/integration/test_render_log_file_name_templates/__init__.py b/tests/integration/test_render_log_file_name_templates/__init__.py index 9fa87056d2c..e69de29bb2d 100644 --- a/tests/integration/test_render_log_file_name_templates/__init__.py +++ b/tests/integration/test_render_log_file_name_templates/__init__.py @@ -1,58 +0,0 @@ -import pytest -import logging -from helpers.cluster import ClickHouseCluster -from datetime import datetime - - -log_dir = "/var/log/clickhouse-server/" -cluster = ClickHouseCluster(__file__) - - -@pytest.fixture(scope="module") -def started_cluster(): - cluster.add_instance( - "file-names-from-config", - main_configs=["configs/config-file-template.xml"], - clickhouse_log_file=None, - clickhouse_error_log_file=None, - ) - cluster.add_instance( - "file-names-from-params", - clickhouse_log_file=log_dir + "clickhouse-server-%Y-%m.log", - clickhouse_error_log_file=log_dir + "clickhouse-server-%Y-%m.err.log", - ) - try: - cluster.start() - yield cluster - finally: - cluster.shutdown() - - -def test_check_file_names(started_cluster): - now = datetime.now() - log_file = ( - log_dir + f"clickhouse-server-{now.strftime('%Y')}-{now.strftime('%m')}.log" - ) - err_log_file = ( - log_dir + f"clickhouse-server-{now.strftime('%Y')}-{now.strftime('%m')}.err.log" - ) - logging.debug(f"log_file {log_file} err_log_file {err_log_file}") - - for name, instance in started_cluster.instances.items(): - files = instance.exec_in_container( - ["bash", "-c", f"ls -lh {log_dir}"], nothrow=True - ) - - logging.debug(f"check instance '{name}': {log_dir} contains: {files}") - - assert ( - instance.exec_in_container(["bash", "-c", f"ls {log_file}"], nothrow=True) - == log_file + "\n" - ) - - assert ( - instance.exec_in_container( - ["bash", "-c", f"ls {err_log_file}"], nothrow=True - ) - == err_log_file + "\n" - ) diff --git a/tests/integration/test_render_log_file_name_templates/test.py b/tests/integration/test_render_log_file_name_templates/test.py index 9fa87056d2c..58df32b823e 100644 --- a/tests/integration/test_render_log_file_name_templates/test.py +++ b/tests/integration/test_render_log_file_name_templates/test.py @@ -30,12 +30,8 @@ def started_cluster(): def test_check_file_names(started_cluster): now = datetime.now() - log_file = ( - log_dir + f"clickhouse-server-{now.strftime('%Y')}-{now.strftime('%m')}.log" - ) - err_log_file = ( - log_dir + f"clickhouse-server-{now.strftime('%Y')}-{now.strftime('%m')}.err.log" - ) + log_file = log_dir + f"clickhouse-server-{now.strftime('%Y-%m')}.log" + err_log_file = log_dir + f"clickhouse-server-{now.strftime('%Y-%m')}.err.log" logging.debug(f"log_file {log_file} err_log_file {err_log_file}") for name, instance in started_cluster.instances.items(): From fb2affcae31afa1558706592860cc8f32e44ecde Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Jul 2023 08:37:08 +0000 Subject: [PATCH 1334/1997] Dump all rules --- tests/integration/helpers/network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py index 60b46926589..fe3a858b867 100644 --- a/tests/integration/helpers/network.py +++ b/tests/integration/helpers/network.py @@ -161,7 +161,7 @@ class _NetworkManager: self._exec_run(cmd, privileged=True) def dump_rules(self): - cmd = ["iptables", "-L", "DOCKER-USER"] + cmd = ["iptables", "-L"] return self._exec_run(cmd, privileged=True) @staticmethod From 5b102ce7d44d678a674d29e4140a60950c69f537 Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Fri, 7 Jul 2023 17:21:47 +0800 Subject: [PATCH 1335/1997] Amend English version of settings.md --- .../server-configuration-parameters/settings.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 48361b0f157..82dac74e647 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1201,12 +1201,12 @@ Keys: - `console` – Send `log` and `errorlog` to the console instead of file. To enable, set to `1` or `true`. - `stream_compress` – Compress `log` and `errorlog` with `lz4` stream compression. To enable, set to `1` or `true`. -Both log and error log file names (only file names, not directories) support date and time format placeholders. +Both log and error log file names (only file names, not directories) support date and time format specifiers. -**Placeholders** -Using the following placeholders, you can define a pattern for the resulting file name. “Example” column shows formatting result for `2023-07-06 18:32:07`. +**Format specifiers** +Using the following format specifiers, you can define a pattern for the resulting file name. “Example” column shows possible results for `2023-07-06 18:32:07`. -| Placeholder | Description | Example | +| Specifier | Description | Example | |-------------|---------------------------------------------------------------------------------------------------------------------|--------------------------| | %% | Literal % | % | | %n | New-line character | | From 23bd23802fc160a34e09db83c87fda53ef645e19 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 7 Jul 2023 12:26:15 +0300 Subject: [PATCH 1336/1997] CacheDictionary request only unique keys from source --- src/Dictionaries/CacheDictionary.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index c5c88a9f142..e27e25ea7c4 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -552,13 +552,14 @@ void CacheDictionary::update(CacheDictionaryUpdateUnitPtr Date: Fri, 7 Jul 2023 09:58:35 +0000 Subject: [PATCH 1337/1997] Skip parallel keepermap test --- tests/integration/parallel_skip.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index e9089fcde73..d060218456a 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -66,5 +66,7 @@ "test_server_reload/test.py::test_remove_http_port", "test_server_reload/test.py::test_remove_mysql_port", "test_server_reload/test.py::test_remove_postgresql_port", - "test_server_reload/test.py::test_remove_tcp_port" + "test_server_reload/test.py::test_remove_tcp_port", + + "test_keeper_map/test.py::test_keeper_map_without_zk" ] From 2ada80aa109991f664f6a9495eddcc013215b94d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 13:32:39 +0300 Subject: [PATCH 1338/1997] Update 02360_send_logs_level_colors.sh --- tests/queries/0_stateless/02360_send_logs_level_colors.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02360_send_logs_level_colors.sh b/tests/queries/0_stateless/02360_send_logs_level_colors.sh index a9b7d4dd3c1..127c94c88e2 100755 --- a/tests/queries/0_stateless/02360_send_logs_level_colors.sh +++ b/tests/queries/0_stateless/02360_send_logs_level_colors.sh @@ -21,7 +21,7 @@ spawn bash -c "$command" expect 1 EOF - rg -F $'\x1b' "$file_name" && cat "$file_name" || echo "ASCII text" + grep -F $'\x1b' "$file_name" && cat "$file_name" || echo "ASCII text" } run "$CLICKHOUSE_CLIENT -q 'SELECT 1' 2>$file_name" From 602392bb6206590e0d24df05eabf69a970767756 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 7 Jul 2023 12:37:16 +0200 Subject: [PATCH 1339/1997] Print short fault info only from safe fields --- src/Daemon/BaseDaemon.cpp | 83 ++++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 31 deletions(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 6d29523a354..f766880bd34 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -310,6 +310,56 @@ private: { ThreadStatus thread_status; + /// First log those fields that are safe to access and that should not cause new fault. + /// That way we will have some duplicated info in the log but we don't loose important info + /// in case of double fault. + + std::string signal_description = "Unknown signal"; + + /// Some of these are not really signals, but our own indications on failure reason. + if (sig == StdTerminate) + signal_description = "std::terminate"; + else if (sig == SanitizerTrap) + signal_description = "sanitizer trap"; + else if (sig >= 0) + signal_description = strsignal(sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context + + String error_message; + + if (sig != SanitizerTrap) + error_message = signalToErrorMessage(sig, info, *context); + else + error_message = "Sanitizer trap."; + + LOG_FATAL(log, "########## Short fault info ############"); + + LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {} ({})", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash, + thread_num, signal_description, sig); + + LOG_FATAL(log, fmt::runtime(error_message)); + + String bare_stacktrace_str; + if (stack_trace.getSize()) + { + /// Write bare stack trace (addresses) just in case if we will fail to print symbolized stack trace. + /// NOTE: This still require memory allocations and mutex lock inside logger. + /// BTW we can also print it to stderr using write syscalls. + + WriteBufferFromOwnString bare_stacktrace; + writeString("Stack trace:", bare_stacktrace); + for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i) + { + writeChar(' ', bare_stacktrace); + writePointerHex(stack_trace.getFramePointers()[i], bare_stacktrace); + } + + LOG_FATAL(log, fmt::runtime(bare_stacktrace.str())); + bare_stacktrace_str = bare_stacktrace.str(); + } + + /// Now try to access potentially unsafe data in thread_ptr. + String query_id; String query; @@ -326,16 +376,6 @@ private: } } - std::string signal_description = "Unknown signal"; - - /// Some of these are not really signals, but our own indications on failure reason. - if (sig == StdTerminate) - signal_description = "std::terminate"; - else if (sig == SanitizerTrap) - signal_description = "sanitizer trap"; - else if (sig >= 0) - signal_description = strsignal(sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context - LOG_FATAL(log, "########################################"); if (query_id.empty()) @@ -351,30 +391,11 @@ private: thread_num, query_id, query, signal_description, sig); } - String error_message; - - if (sig != SanitizerTrap) - error_message = signalToErrorMessage(sig, info, *context); - else - error_message = "Sanitizer trap."; - LOG_FATAL(log, fmt::runtime(error_message)); - if (stack_trace.getSize()) + if (!bare_stacktrace_str.empty()) { - /// Write bare stack trace (addresses) just in case if we will fail to print symbolized stack trace. - /// NOTE: This still require memory allocations and mutex lock inside logger. - /// BTW we can also print it to stderr using write syscalls. - - WriteBufferFromOwnString bare_stacktrace; - writeString("Stack trace:", bare_stacktrace); - for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i) - { - writeChar(' ', bare_stacktrace); - writePointerHex(stack_trace.getFramePointers()[i], bare_stacktrace); - } - - LOG_FATAL(log, fmt::runtime(bare_stacktrace.str())); + LOG_FATAL(log, fmt::runtime(bare_stacktrace_str)); } /// Write symbolized stack trace line by line for better grep-ability. From cd3080428ea3da6a71169c929e959a0c3f9c5d5b Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 7 Jul 2023 10:58:01 +0000 Subject: [PATCH 1340/1997] Fix async connect to hosts with multiple ips --- src/Client/Connection.cpp | 2 + src/Client/Connection.h | 4 ++ src/Client/ConnectionEstablisher.cpp | 7 +- src/Client/ConnectionEstablisher.h | 2 + .../configs/enable_hedged.xml | 8 +++ .../configs/listen_host.xml | 4 ++ .../test.py | 65 +++++++++++++++++++ 7 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_async_connect_to_multiple_ips/configs/enable_hedged.xml create mode 100644 tests/integration/test_async_connect_to_multiple_ips/configs/listen_host.xml create mode 100644 tests/integration/test_async_connect_to_multiple_ips/test.py diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index cd102f46ffe..cac5600fbcb 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -105,6 +105,8 @@ void Connection::connect(const ConnectionTimeouts & timeouts) for (auto it = addresses.begin(); it != addresses.end();) { + have_more_addresses_to_connect = it != std::prev(addresses.end()); + if (connected) disconnect(); diff --git a/src/Client/Connection.h b/src/Client/Connection.h index cb3f2507cb9..f4daf8e3aeb 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -159,6 +159,8 @@ public: out->setAsyncCallback(async_callback); } + bool haveMoreAddressesToConnect() const { return have_more_addresses_to_connect; } + private: String host; UInt16 port; @@ -227,6 +229,8 @@ private: std::shared_ptr maybe_compressed_out; std::unique_ptr block_out; + bool have_more_addresses_to_connect = false; + /// Logger is created lazily, for avoid to run DNS request in constructor. class LoggerWrapper { diff --git a/src/Client/ConnectionEstablisher.cpp b/src/Client/ConnectionEstablisher.cpp index 897fb5fde73..439025447ca 100644 --- a/src/Client/ConnectionEstablisher.cpp +++ b/src/Client/ConnectionEstablisher.cpp @@ -179,7 +179,7 @@ bool ConnectionEstablisherAsync::checkTimeout() is_timeout_alarmed = true; } - if (is_timeout_alarmed && !is_socket_ready) + if (is_timeout_alarmed && !is_socket_ready && !haveMoreAddressesToConnect()) { /// In not async case timeout exception would be thrown and caught in ConnectionEstablisher::run, /// but in async case we process timeout outside and cannot throw exception. So, we just save fail message. @@ -225,6 +225,11 @@ void ConnectionEstablisherAsync::resetResult() } } +bool ConnectionEstablisherAsync::haveMoreAddressesToConnect() +{ + return !result.entry.isNull() && result.entry->haveMoreAddressesToConnect(); +} + #endif } diff --git a/src/Client/ConnectionEstablisher.h b/src/Client/ConnectionEstablisher.h index 5b58563dc01..a8126900d3b 100644 --- a/src/Client/ConnectionEstablisher.h +++ b/src/Client/ConnectionEstablisher.h @@ -104,6 +104,8 @@ private: void resetResult(); + bool haveMoreAddressesToConnect(); + ConnectionEstablisher connection_establisher; TryResult result; std::string fail_message; diff --git a/tests/integration/test_async_connect_to_multiple_ips/configs/enable_hedged.xml b/tests/integration/test_async_connect_to_multiple_ips/configs/enable_hedged.xml new file mode 100644 index 00000000000..238370176af --- /dev/null +++ b/tests/integration/test_async_connect_to_multiple_ips/configs/enable_hedged.xml @@ -0,0 +1,8 @@ + + + + 1 + 0 + + + diff --git a/tests/integration/test_async_connect_to_multiple_ips/configs/listen_host.xml b/tests/integration/test_async_connect_to_multiple_ips/configs/listen_host.xml new file mode 100644 index 00000000000..df0247fd651 --- /dev/null +++ b/tests/integration/test_async_connect_to_multiple_ips/configs/listen_host.xml @@ -0,0 +1,4 @@ + + :: + + diff --git a/tests/integration/test_async_connect_to_multiple_ips/test.py b/tests/integration/test_async_connect_to_multiple_ips/test.py new file mode 100644 index 00000000000..0c18a316d4b --- /dev/null +++ b/tests/integration/test_async_connect_to_multiple_ips/test.py @@ -0,0 +1,65 @@ +import pytest +from helpers.cluster import ClickHouseCluster + + +cluster = ClickHouseCluster(__file__) + + +@pytest.fixture(scope="module") +def cluster_without_dns_cache_update(): + try: + cluster.start() + + yield cluster + + except Exception as ex: + print(ex) + + finally: + cluster.shutdown() + pass + + +node1 = cluster.add_instance( + "node1", + main_configs=["configs/listen_host.xml"], + user_configs=["configs/enable_hedged.xml"], + with_zookeeper=True, + ipv4_address="10.5.95.11", +) + +node2 = cluster.add_instance( + "node2", + main_configs=["configs/listen_host.xml"], + user_configs=["configs/enable_hedged.xml"], + with_zookeeper=True, + ipv4_address="10.5.95.12", +) + +# node1 - source with table, have invalid ipv6 +# node2 - destination, doing remote query +def test(cluster_without_dns_cache_update): + node1.query( + "CREATE TABLE test(t Date, label UInt8) ENGINE = MergeTree PARTITION BY t ORDER BY label;" + ) + node1.query( + "INSERT INTO test SELECT toDate('2022-12-28'), 1;" + ) + assert node1.query( + 'SELECT count(*) FROM test' + ) == '1\n' + + wrong_ip = '2001:3984:3989::1:1118' + + node2.exec_in_container( + (["bash", "-c", "echo '{} {}' >> /etc/hosts".format(wrong_ip, node1.name)]) + ) + node2.exec_in_container( + (["bash", "-c", "echo '{} {}' >> /etc/hosts".format(node1.ipv4_address, node1.name)]) + ) + + assert node1.query("SELECT count(*) from test") == "1\n" + node2.query("SYSTEM DROP DNS CACHE") + node1.query("SYSTEM DROP DNS CACHE") + assert node2.query(f"SELECT count(*) FROM remote('{node1.name}', default.test) limit 1;") == "1\n" + From fc94cc8b87fceb8b6631b72e34a6c10fdc197f83 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 7 Jul 2023 10:59:27 +0000 Subject: [PATCH 1341/1997] Update config for test --- .../test_async_connect_to_multiple_ips/configs/enable_hedged.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_async_connect_to_multiple_ips/configs/enable_hedged.xml b/tests/integration/test_async_connect_to_multiple_ips/configs/enable_hedged.xml index 238370176af..399d886ee6a 100644 --- a/tests/integration/test_async_connect_to_multiple_ips/configs/enable_hedged.xml +++ b/tests/integration/test_async_connect_to_multiple_ips/configs/enable_hedged.xml @@ -2,7 +2,6 @@ 1 - 0 From e618dd05cc73b7ad38296e7c28f66b6f077343f8 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Fri, 7 Jul 2023 13:03:44 +0200 Subject: [PATCH 1342/1997] Fix clang tidy and race --- programs/server/Server.cpp | 1 + src/Storages/StorageReplicatedMergeTree.cpp | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 58cf3e5d210..4b47da9affb 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -2285,6 +2285,7 @@ void Server::updateServers( Poco::Util::AbstractConfiguration & previous_config = latest_config ? *latest_config : this->config(); std::vector all_servers; + all_servers.reserve(servers.size() + servers_to_start_before_tables.size()); for (auto & server : servers) all_servers.push_back(&server); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 0f5a52b275c..504cf0326f0 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4890,14 +4890,23 @@ void StorageReplicatedMergeTree::shutdown() if (shutdown_called.exchange(true)) return; - if (!shutdown_prepared_called.load()) - flushAndPrepareForShutdown(); + flushAndPrepareForShutdown(); auto settings_ptr = getSettings(); if (!shutdown_deadline.has_value()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Shutdown deadline is not set in shutdown"); - waitForUniquePartsToBeFetchedByOtherReplicas(*shutdown_deadline); + try + { + waitForUniquePartsToBeFetchedByOtherReplicas(*shutdown_deadline); + } + catch (const Exception & ex) + { + if (ex.code() == ErrorCodes::LOGICAL_ERROR) + throw; + + tryLogCurrentException(log, __PRETTY_FUNCTION__); + } session_expired_callback_handler.reset(); stopOutdatedDataPartsLoadingTask(); @@ -4905,7 +4914,6 @@ void StorageReplicatedMergeTree::shutdown() partialShutdown(); part_moves_between_shards_orchestrator.shutdown(); - background_operations_assignee.finish(); { auto lock = queue.lockQueue(); From 50bda59a0d226b108ab1521ae6499d35bab01ad0 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Fri, 7 Jul 2023 11:05:42 +0000 Subject: [PATCH 1343/1997] Fix typo --- .../test_s3_zero_copy_replication/test.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/integration/test_s3_zero_copy_replication/test.py b/tests/integration/test_s3_zero_copy_replication/test.py index bc13c127610..2a4e0eece08 100644 --- a/tests/integration/test_s3_zero_copy_replication/test.py +++ b/tests/integration/test_s3_zero_copy_replication/test.py @@ -48,7 +48,7 @@ def get_large_objects_count(cluster, size=100, folder="data"): return counter -def check_objects_exisis(cluster, object_list, folder="data"): +def check_objects_exist(cluster, object_list, folder="data"): minio = cluster.minio_client for obj in object_list: if obj: @@ -466,7 +466,7 @@ def s3_zero_copy_unfreeze_base(cluster, unfreeze_query_template): assert objects01 == objects02 - check_objects_exisis(cluster, objects01) + check_objects_exist(cluster, objects01) node1.query("TRUNCATE TABLE unfreeze_test") node2.query("SYSTEM SYNC REPLICA unfreeze_test", timeout=30) @@ -477,12 +477,12 @@ def s3_zero_copy_unfreeze_base(cluster, unfreeze_query_template): assert objects01 == objects11 assert objects01 == objects12 - check_objects_exisis(cluster, objects11) + check_objects_exist(cluster, objects11) node1.query(f"{unfreeze_query_template} 'freeze_backup1'") wait_mutations(node1, "unfreeze_test", 10) - check_objects_exisis(cluster, objects12) + check_objects_exist(cluster, objects12) node2.query(f"{unfreeze_query_template} 'freeze_backup2'") wait_mutations(node2, "unfreeze_test", 10) @@ -540,8 +540,8 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template): wait_mutations(node1, "drop_detached_test", 10) wait_mutations(node2, "drop_detached_test", 10) - check_objects_exisis(cluster, objects1) - check_objects_exisis(cluster, objects2) + check_objects_exist(cluster, objects1) + check_objects_exist(cluster, objects2) node2.query( "ALTER TABLE drop_detached_test DROP DETACHED PARTITION '1'", @@ -551,8 +551,8 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template): wait_mutations(node1, "drop_detached_test", 10) wait_mutations(node2, "drop_detached_test", 10) - check_objects_exisis(cluster, objects1) - check_objects_exisis(cluster, objects2) + check_objects_exist(cluster, objects1) + check_objects_exist(cluster, objects2) node1.query( "ALTER TABLE drop_detached_test DROP DETACHED PARTITION '1'", @@ -562,7 +562,7 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template): wait_mutations(node1, "drop_detached_test", 10) wait_mutations(node2, "drop_detached_test", 10) - check_objects_exisis(cluster, objects1) + check_objects_exist(cluster, objects1) check_objects_not_exisis(cluster, objects_diff) node1.query( @@ -573,7 +573,7 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template): wait_mutations(node1, "drop_detached_test", 10) wait_mutations(node2, "drop_detached_test", 10) - check_objects_exisis(cluster, objects1) + check_objects_exist(cluster, objects1) node2.query( "ALTER TABLE drop_detached_test DROP DETACHED PARTITION '0'", @@ -682,7 +682,7 @@ def test_s3_zero_copy_keeps_data_after_mutation(started_cluster): wait_for_active_parts(node2, 4, "zero_copy_mutation") objects1 = node1.get_table_objects("zero_copy_mutation") - check_objects_exisis(cluster, objects1) + check_objects_exist(cluster, objects1) node1.query( """ @@ -710,7 +710,7 @@ def test_s3_zero_copy_keeps_data_after_mutation(started_cluster): nodeY = node2 objectsY = nodeY.get_table_objects("zero_copy_mutation") - check_objects_exisis(cluster, objectsY) + check_objects_exist(cluster, objectsY) nodeX.query( """ @@ -745,7 +745,7 @@ def test_s3_zero_copy_keeps_data_after_mutation(started_cluster): """ ) - check_objects_exisis(cluster, objectsY) + check_objects_exist(cluster, objectsY) nodeY.query( """ From d439db31397e8576a6e49e209bf069612ef9d2f5 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 7 Jul 2023 13:10:55 +0200 Subject: [PATCH 1344/1997] Print just signal number first, and only then get its description --- src/Daemon/BaseDaemon.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index f766880bd34..422f6ffb63f 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -314,6 +314,11 @@ private: /// That way we will have some duplicated info in the log but we don't loose important info /// in case of double fault. + LOG_FATAL(log, "########## Short fault info ############"); + LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {}", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash, + thread_num, sig); + std::string signal_description = "Unknown signal"; /// Some of these are not really signals, but our own indications on failure reason. @@ -324,6 +329,8 @@ private: else if (sig >= 0) signal_description = strsignal(sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context + LOG_FATAL(log, "Signal description: {}", signal_description); + String error_message; if (sig != SanitizerTrap) @@ -331,12 +338,6 @@ private: else error_message = "Sanitizer trap."; - LOG_FATAL(log, "########## Short fault info ############"); - - LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash, - thread_num, signal_description, sig); - LOG_FATAL(log, fmt::runtime(error_message)); String bare_stacktrace_str; From 05649c7b384cb412fa9e25150413460cc969893e Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 7 Jul 2023 13:22:52 +0200 Subject: [PATCH 1345/1997] Removed duplicate header --- src/Storages/System/attachSystemTables.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index a9873c821ce..84965b3196b 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -1,4 +1,3 @@ -#include "Storages/System/StorageSystemJemalloc.h" #include "config.h" #include From 3dd9c09579887d5627a2486b3e0cddcc15b2487d Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 7 Jul 2023 11:25:48 +0000 Subject: [PATCH 1346/1997] Just save --- .../Optimizations/optimizePrewhere.cpp | 79 +++++++++++++++++++ .../MergeTreeBaseSelectProcessor.cpp | 2 + 2 files changed, 81 insertions(+) diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index ca8a412bf2e..bcd3244b5a9 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB { @@ -60,6 +61,74 @@ void matchDAGOutputNodesOrderWithHeader(ActionsDAGPtr & actions_dag, const Block namespace QueryPlanOptimizations { +#ifdef WHATEVERSOMETHING +static void removeAliases(ActionsDAG * dag) +{ + using Node = ActionsDAG::Node; + struct Frame + { + const ActionsDAG::Node * node; + const ActionsDAG::Node * parent; + size_t next_child = 0; + }; + std::vector stack; + std::vector> aliases; + + /// collect aliases + auto output_nodes = dag->getOutputs(); + for (const auto * output_node : output_nodes) + { + stack.push_back({output_node, nullptr}); + while (!stack.empty()) + { + auto & frame = stack.back(); + const auto * parent = frame.parent; + const auto * node = frame.node; + + if (frame.next_child < node->children.size()) + { + auto next_frame = Frame{.node = node->children[frame.next_child], .parent = node}; + ++frame.next_child; + stack.push_back(next_frame); + continue; + } + + if (parent && node->type == ActionsDAG::ActionType::ALIAS) + aliases.emplace_back(const_cast(node), const_cast(parent)); + + stack.pop_back(); + } + } + + /// remove aliases from output nodes if any + for(auto it = output_nodes.begin(); it != output_nodes.end();) + { + if ((*it)->type == ActionsDAG::ActionType::ALIAS) + it = output_nodes.erase(it); + else + ++it; + } + + LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "aliases found: {}", aliases.size()); + + /// disconnect aliases + for(auto [alias, parent]: aliases) + { + /// find alias in parent's children and replace it with alias child + for (auto & child : parent->children) + { + if (child == alias) + { + child = alias->children.front(); + break; + } + } + } + + /// remove aliases + dag->removeUnusedActions(); +} +#endif void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes) { @@ -162,6 +231,8 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes) storage.supportedPrewhereColumns(), &Poco::Logger::get("QueryPlanOptimizePrewhere")}; + LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "filter expression\n{}", filter_step->getExpression()->dumpDAG()); + auto optimize_result = where_optimizer.optimize(filter_step->getExpression(), filter_step->getFilterColumnName(), read_from_merge_tree->getContext(), @@ -178,6 +249,10 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes) prewhere_info->need_filter = true; auto & prewhere_filter_actions = optimize_result->prewhere_filter_actions; + LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "prewhere_filter_actions\n{}", prewhere_filter_actions->dumpDAG()); + + // removeAliases(prewhere_filter_actions.get()); + // LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "removeAliases\n{}", prewhere_filter_actions->dumpDAG()); ActionsChain actions_chain; @@ -260,7 +335,9 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes) prewhere_info->prewhere_column_name = prewere_filter_node_name; prewhere_info->remove_prewhere_column = !prewhere_actions_chain_node->getChildRequiredOutputColumnsNames().contains(prewere_filter_node_name); + LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "header BEFORE prewhere update\n{}", read_from_merge_tree->getOutputStream().header.dumpStructure()); read_from_merge_tree->updatePrewhereInfo(prewhere_info); + LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "header AFTER prewhere update\n{}", read_from_merge_tree->getOutputStream().header.dumpStructure()); QueryPlan::Node * replace_old_filter_node = nullptr; bool remove_filter_node = false; @@ -321,10 +398,12 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes) bool apply_match_step = false; + LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "read header\n{}", read_from_merge_tree->getOutputStream().header.dumpStructure()); /// If column order does not match old filter step column order, match dag output nodes with header if (!blocksHaveEqualStructure(read_from_merge_tree->getOutputStream().header, filter_step->getOutputStream().header)) { apply_match_step = true; + LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "rename_actions_dag\n{}", rename_actions_dag->dumpDAG()); matchDAGOutputNodesOrderWithHeader(rename_actions_dag, filter_step->getOutputStream().header); } diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 48adf36e678..d3d8c0f2bc8 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -644,6 +644,7 @@ Block IMergeTreeSelectAlgorithm::applyPrewhereActions(Block block, const Prewher if (prewhere_info->prewhere_actions) { block = prewhere_info->prewhere_actions->updateHeader(std::move(block)); + LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "updateHeader()\n{}", block.dumpStructure()); auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name); if (!prewhere_column.type->canBeUsedInBooleanContext()) @@ -654,6 +655,7 @@ Block IMergeTreeSelectAlgorithm::applyPrewhereActions(Block block, const Prewher if (prewhere_info->remove_prewhere_column) { + LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "remove_column_name\n{}", prewhere_info->prewhere_column_name); block.erase(prewhere_info->prewhere_column_name); } else if (prewhere_info->need_filter) From 6d798e0bde13416488409718fd2db6191dde1197 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Jul 2023 10:16:36 +0000 Subject: [PATCH 1347/1997] Better check for current_thread --- src/Common/ThreadStatus.cpp | 9 +++++---- src/Common/ThreadStatus.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index 7a602afe7e7..b39ea7e8ea8 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -199,13 +199,14 @@ ThreadStatus::~ThreadStatus() if (deleter) deleter(); + chassert(!check_current_thread_on_destruction || current_thread == this); + /// Only change current_thread if it's currently being used by this ThreadStatus /// For example, PushingToViews chain creates and deletes ThreadStatus instances while running in the main query thread - if (check_current_thread_on_destruction) - { - assert(current_thread == this); + if (current_thread == this) current_thread = nullptr; - } + else if (check_current_thread_on_destruction) + LOG_ERROR(log, "current_thread contains invalid address"); } void ThreadStatus::updatePerformanceCounters() diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 7c8dbdb68bd..aa1e3eea6e5 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -224,7 +224,7 @@ private: Poco::Logger * log = nullptr; - bool check_current_thread_on_destruction; + [[maybe_unused]] bool check_current_thread_on_destruction; public: explicit ThreadStatus(bool check_current_thread_on_destruction_ = true); From d9d0e9062a4f30775b1b0d32121fef3da1ea33bf Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Jul 2023 11:41:01 +0000 Subject: [PATCH 1348/1997] Remove maybe_unused --- src/Common/ThreadStatus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index aa1e3eea6e5..7c8dbdb68bd 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -224,7 +224,7 @@ private: Poco::Logger * log = nullptr; - [[maybe_unused]] bool check_current_thread_on_destruction; + bool check_current_thread_on_destruction; public: explicit ThreadStatus(bool check_current_thread_on_destruction_ = true); From 36e52efc3e7602e43628246562b2db70ca85e765 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Jul 2023 11:57:12 +0000 Subject: [PATCH 1349/1997] Remove timeout --- .../01164_detach_attach_partition_race.sh | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh index 3aec4c3445d..e645cb5aae7 100755 --- a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh +++ b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh @@ -2,7 +2,6 @@ # Tags: race CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh @@ -15,16 +14,16 @@ $CLICKHOUSE_CLIENT -q "insert into mt values (3)" function thread_insert() { - while true; do - # It might be the case that the threads are terminated and exited, but some children didn't and they are still sending queries when we are dropping tables. - # That's why the "Table doesn't exist" error is allowed, while other errors don't. - $CLICKHOUSE_CLIENT -q "insert into mt values (rand())" 2>&1 | tr -d '\n' | rg -v "Table .+ doesn't exist"; + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + $CLICKHOUSE_CLIENT -q "insert into mt values (rand())"; done } function thread_detach_attach() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do $CLICKHOUSE_CLIENT -q "alter table mt detach partition id 'all'"; $CLICKHOUSE_CLIENT -q "alter table mt attach partition id 'all'"; done @@ -32,7 +31,8 @@ function thread_detach_attach() function thread_drop_detached() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do $CLICKHOUSE_CLIENT --allow_drop_detached 1 -q "alter table mt drop detached partition id 'all'"; done } @@ -43,10 +43,10 @@ export -f thread_drop_detached; TIMEOUT=10 -timeout $TIMEOUT bash -c thread_insert & -timeout $TIMEOUT bash -c thread_detach_attach 2> /dev/null & -timeout $TIMEOUT bash -c thread_detach_attach 2> /dev/null & -timeout $TIMEOUT bash -c thread_drop_detached 2> /dev/null & +thread_insert $TIMEOUT & +thread_detach_attach $TIMEOUT 2> /dev/null & +thread_detach_attach $TIMEOUT 2> /dev/null & +thread_drop_detached $TIMEOUT 2> /dev/null & wait From 1e0d97c282b1415aed77dd7198ab244a84c7aea9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 7 Jul 2023 12:19:10 +0000 Subject: [PATCH 1350/1997] Do not remove inputs after ActionsDAG::merge --- src/Interpreters/ActionsDAG.cpp | 2 +- .../0_stateless/02812_bug_with_unused_join_columns.reference | 0 .../queries/0_stateless/02812_bug_with_unused_join_columns.sql | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02812_bug_with_unused_join_columns.reference create mode 100644 tests/queries/0_stateless/02812_bug_with_unused_join_columns.sql diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 906875dd314..46c14c503e4 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1366,7 +1366,7 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second) first.mergeInplace(std::move(second)); /// Drop unused inputs and, probably, some actions. - first.removeUnusedActions(); + first.removeUnusedActions(false); return std::make_shared(std::move(first)); } diff --git a/tests/queries/0_stateless/02812_bug_with_unused_join_columns.reference b/tests/queries/0_stateless/02812_bug_with_unused_join_columns.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02812_bug_with_unused_join_columns.sql b/tests/queries/0_stateless/02812_bug_with_unused_join_columns.sql new file mode 100644 index 00000000000..6c801b5b73e --- /dev/null +++ b/tests/queries/0_stateless/02812_bug_with_unused_join_columns.sql @@ -0,0 +1 @@ +SELECT concat(func.name, comb.name) AS x FROM system.functions AS func JOIN system.aggregate_function_combinators AS comb using name WHERE is_aggregate settings allow_experimental_analyzer=1; From fa7fe5277c99c036ff488997aab46b36c6901610 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 7 Jul 2023 12:25:13 +0000 Subject: [PATCH 1351/1997] Better comment. --- src/Interpreters/ActionsDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 46c14c503e4..2f9fc7e5746 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1365,7 +1365,7 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second) { first.mergeInplace(std::move(second)); - /// Drop unused inputs and, probably, some actions. + /// Some actions could become unused. Do not drop inputs to preserve the header. first.removeUnusedActions(false); return std::make_shared(std::move(first)); From 39d0b309bd730748b52acfb32de729e8f8496f83 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Fri, 7 Jul 2023 13:15:26 +0000 Subject: [PATCH 1352/1997] Make own function with slices --- src/Functions/HasSubsequenceImpl.h | 187 ++++++++---------- src/Functions/hasSubsequence.cpp | 2 +- .../hasSubsequenceCaseInsensitive.cpp | 2 +- .../hasSubsequenceCaseInsensitiveUTF8.cpp | 2 +- src/Functions/hasSubsequenceUTF8.cpp | 2 +- 5 files changed, 84 insertions(+), 111 deletions(-) diff --git a/src/Functions/HasSubsequenceImpl.h b/src/Functions/HasSubsequenceImpl.h index bcb8e8e99e6..1396e64ade5 100644 --- a/src/Functions/HasSubsequenceImpl.h +++ b/src/Functions/HasSubsequenceImpl.h @@ -1,124 +1,109 @@ #pragma once + +#include +#include +#include +#include +#include namespace DB { namespace { -template -struct HasSubsequenceImpl -{ - using ResultType = UInt8; - static constexpr bool use_default_implementation_for_constants = false; - static constexpr bool supports_start_pos = false; +using namespace GatherUtils; + +template +class FunctionsHasSubsequenceImpl : public IFunction +{ +public: static constexpr auto name = Name::name; - static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {};} + static FunctionPtr create(ContextPtr) { return std::make_shared(); } - static void vectorConstant( - const ColumnString::Chars & haystack_data, - const ColumnString::Offsets & haystack_offsets, - const String & needle, - const ColumnPtr & /*start_pos*/, - PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * /*res_null*/) + String getName() const override { return name; } + + bool isVariadic() const override { return false; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + size_t getNumberOfArguments() const override { return 2; } + + bool useDefaultImplementationForConstants() const override { return false; } + + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {};} + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (needle.empty()) - { - for (auto & r : res) - r = 1; - return; - } + if (!isString(arguments[0])) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}", + arguments[0]->getName(), getName()); - ColumnString::Offset prev_haystack_offset = 0; - for (size_t i = 0; i < haystack_offsets.size(); ++i) - { - size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1; - const char * haystack = reinterpret_cast(&haystack_data[prev_haystack_offset]); - res[i] = hasSubsequence(haystack, haystack_size, needle.c_str(), needle.size()); - prev_haystack_offset = haystack_offsets[i]; - } + if (!isString(arguments[1])) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}", + arguments[1]->getName(), getName()); + + return std::make_shared>(); } - static void vectorVector( - const ColumnString::Chars & haystack_data, - const ColumnString::Offsets & haystack_offsets, - const ColumnString::Chars & needle_data, - const ColumnString::Offsets & needle_offsets, - const ColumnPtr & /*start_pos*/, - PaddedPODArray & res, - ColumnUInt8 * /*res_null*/) + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override { - ColumnString::Offset prev_haystack_offset = 0; - ColumnString::Offset prev_needle_offset = 0; + const ColumnPtr & column_haystack = arguments[0].column; + const ColumnPtr & column_needle = arguments[1].column; - size_t size = haystack_offsets.size(); + const ColumnConst * haystack_const_string = checkAndGetColumnConst(column_haystack.get()); + const ColumnConst * needle_const_string = checkAndGetColumnConst(column_needle.get()); + const ColumnString * haystack_string = checkAndGetColumn(&*column_haystack); + const ColumnString * needle_string = checkAndGetColumn(&*column_needle); - for (size_t i = 0; i < size; ++i) - { - size_t needle_size = needle_offsets[i] - prev_needle_offset - 1; - size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1; + auto col_res = ColumnVector::create(); + typename ColumnVector::Container & vec_res = col_res->getData(); + vec_res.resize(input_rows_count); - if (0 == needle_size) - { - res[i] = 1; - } - else - { - const char * needle = reinterpret_cast(&needle_data[prev_needle_offset]); - const char * haystack = reinterpret_cast(&haystack_data[prev_haystack_offset]); - res[i] = hasSubsequence(haystack, haystack_size, needle, needle_size); - } + if (haystack_string && needle_string) + execute(StringSource{*haystack_string}, StringSource{*needle_string}, vec_res); + else if (haystack_string && needle_const_string) + execute(StringSource{*haystack_string}, ConstSource{*needle_const_string}, vec_res); + else if (haystack_const_string && needle_string) + execute(ConstSource{*haystack_const_string}, StringSource{*needle_string}, vec_res); + else if (haystack_const_string && needle_const_string) + execute(ConstSource{*haystack_const_string}, ConstSource{*needle_const_string}, vec_res); + else + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {}, first argument of function {} must be a string", + arguments[0].column->getName(), + getName()); - prev_haystack_offset = haystack_offsets[i]; - prev_needle_offset = needle_offsets[i]; - } + return col_res; } - static void constantVector( - const String & haystack, - const ColumnString::Chars & needle_data, - const ColumnString::Offsets & needle_offsets, - const ColumnPtr & /*start_pos*/, - PaddedPODArray & res, - ColumnUInt8 * /*res_null*/) +private: + + template + void execute( + SourceHaystack && haystacks, + SourceNeedle && needles, + PaddedPODArray & res_data) const { - ColumnString::Offset prev_needle_offset = 0; + size_t row_num = 0; - size_t size = needle_offsets.size(); - - for (size_t i = 0; i < size; ++i) + while (!haystacks.isEnd()) { - size_t needle_size = needle_offsets[i] - prev_needle_offset - 1; + [[maybe_unused]] auto haystack_slice = haystacks.getWhole(); + [[maybe_unused]] auto needle_slice = needles.getWhole(); - if (0 == needle_size) - { - res[i] = 1; - } - else - { - const char * needle = reinterpret_cast(&needle_data[prev_needle_offset]); - res[i] = hasSubsequence(haystack.c_str(), haystack.size(), needle, needle_size); - } - prev_needle_offset = needle_offsets[i]; - } - } + auto haystack = std::string(reinterpret_cast(haystack_slice.data), haystack_slice.size); + auto needle = std::string(reinterpret_cast(needle_slice.data), needle_slice.size); - static void constantConstant( - String haystack, - String needle, - const ColumnPtr & /*start_pos*/, - PaddedPODArray & res, - ColumnUInt8 * /*res_null*/) - { - size_t size = res.size(); - Impl::toLowerIfNeed(haystack); - Impl::toLowerIfNeed(needle); + Impl::toLowerIfNeed(haystack); + Impl::toLowerIfNeed(needle); - UInt8 result = hasSubsequence(haystack.c_str(), haystack.size(), needle.c_str(), needle.size()); - - for (size_t i = 0; i < size; ++i) - { - res[i] = result; + res_data[row_num] = hasSubsequence(haystack.c_str(), haystack.size(), needle.c_str(), needle.size()); + haystacks.next(); + needles.next(); + ++row_num; } } @@ -130,18 +115,6 @@ struct HasSubsequenceImpl ++j; return j == needle_size; } - - template - static void vectorFixedConstant(Args &&...) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name); - } - - template - static void vectorFixedVector(Args &&...) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name); - } }; } diff --git a/src/Functions/hasSubsequence.cpp b/src/Functions/hasSubsequence.cpp index bb1f295cee4..900e80f5524 100644 --- a/src/Functions/hasSubsequence.cpp +++ b/src/Functions/hasSubsequence.cpp @@ -18,7 +18,7 @@ struct NameHasSubsequence static constexpr auto name = "hasSubsequence"; }; -using FunctionHasSubsequence = FunctionsStringSearch>; +using FunctionHasSubsequence = FunctionsHasSubsequenceImpl; } REGISTER_FUNCTION(hasSubsequence) diff --git a/src/Functions/hasSubsequenceCaseInsensitive.cpp b/src/Functions/hasSubsequenceCaseInsensitive.cpp index fe50ada9be9..dbac62d7f09 100644 --- a/src/Functions/hasSubsequenceCaseInsensitive.cpp +++ b/src/Functions/hasSubsequenceCaseInsensitive.cpp @@ -17,7 +17,7 @@ struct NameHasSubsequenceCaseInsensitive static constexpr auto name = "hasSubsequenceCaseInsensitive"; }; -using FunctionHasSubsequenceCaseInsensitive = FunctionsStringSearch>; +using FunctionHasSubsequenceCaseInsensitive = FunctionsHasSubsequenceImpl; } REGISTER_FUNCTION(hasSubsequenceCaseInsensitive) diff --git a/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp b/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp index 2908c284a25..c104ff52857 100644 --- a/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp +++ b/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp @@ -17,7 +17,7 @@ struct NameHasSubsequenceCaseInsensitiveUTF8 static constexpr auto name = "hasSubsequenceCaseInsensitiveUTF8"; }; -using FunctionHasSubsequenceCaseInsensitiveUTF8 = FunctionsStringSearch>; +using FunctionHasSubsequenceCaseInsensitiveUTF8 = FunctionsHasSubsequenceImpl; } REGISTER_FUNCTION(hasSubsequenceCaseInsensitiveUTF8) diff --git a/src/Functions/hasSubsequenceUTF8.cpp b/src/Functions/hasSubsequenceUTF8.cpp index c0811de6575..c67ce7d9c74 100644 --- a/src/Functions/hasSubsequenceUTF8.cpp +++ b/src/Functions/hasSubsequenceUTF8.cpp @@ -18,7 +18,7 @@ struct NameHasSubsequenceUTF8 static constexpr auto name = "hasSubsequenceUTF8"; }; -using FunctionHasSubsequenceUTF8 = FunctionsStringSearch>; +using FunctionHasSubsequenceUTF8 = FunctionsHasSubsequenceImpl; } REGISTER_FUNCTION(hasSubsequenceUTF8) From ee33000fc24367166ebf56772b0be4ca0ee25192 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 7 Jul 2023 14:08:54 +0000 Subject: [PATCH 1353/1997] Fixing tests. --- src/Interpreters/ActionsDAG.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 2f9fc7e5746..e68e2580231 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -465,8 +465,12 @@ void ActionsDAG::removeUnusedActions(const Names & required_names, bool allow_re void ActionsDAG::removeUnusedActions(bool allow_remove_inputs, bool allow_constant_folding) { std::unordered_set visited_nodes; + std::unordered_set used_inputs; std::stack stack; + for (const auto * input : inputs) + used_inputs.insert(input); + for (const auto * node : outputs) { visited_nodes.insert(node); @@ -484,7 +488,7 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs, bool allow_consta stack.push(&node); } - if (node.type == ActionType::INPUT && !allow_remove_inputs) + if (node.type == ActionType::INPUT && !allow_remove_inputs && used_inputs.contains(&node)) visited_nodes.insert(&node); } From e08f140d62988cd0340ec75f441891a2c01539c3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Jul 2023 17:32:10 +0300 Subject: [PATCH 1354/1997] Update 02254_projection_broken_part.sh --- tests/queries/0_stateless/02254_projection_broken_part.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02254_projection_broken_part.sh b/tests/queries/0_stateless/02254_projection_broken_part.sh index 6ba5093f234..3521d1d9d16 100755 --- a/tests/queries/0_stateless/02254_projection_broken_part.sh +++ b/tests/queries/0_stateless/02254_projection_broken_part.sh @@ -26,7 +26,7 @@ path=$($CLICKHOUSE_CLIENT -q "select path from system.parts where database='$CLI $CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path')" || exit rm -f "$path/ab.proj/data.bin" -$CLICKHOUSE_CLIENT -q "select 3, sum(b) from projection_broken_parts_1 group by a;" 2>/dev/null +$CLICKHOUSE_CLIENT -q "select 3, sum(b) from projection_broken_parts_1 group by a format Null;" 2>/dev/null num_tries=0 while ! $CLICKHOUSE_CLIENT -q "select 4, sum(b) from projection_broken_parts_1 group by a format Null;" 2>/dev/null; do From 05b7da78130b21367b69a2cc22a319be11de8207 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Fri, 7 Jul 2023 10:32:44 -0400 Subject: [PATCH 1355/1997] add doc note for MongoDB Atlas --- docs/en/engines/table-engines/integrations/mongodb.md | 9 +++++++++ docs/en/sql-reference/table-functions/mongodb.md | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md index 912f81573db..f87e8da8b5b 100644 --- a/docs/en/engines/table-engines/integrations/mongodb.md +++ b/docs/en/engines/table-engines/integrations/mongodb.md @@ -33,6 +33,15 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name - `options` — MongoDB connection string options (optional parameter). +:::tip +If you are using the MongoDB Atlas cloud offering please add these options: + +``` +'connectTimeoutMS=10000&ssl=true&authSource=admin' +``` + +::: + ## Usage Example {#usage-example} Create a table in ClickHouse which allows to read data from MongoDB collection: diff --git a/docs/en/sql-reference/table-functions/mongodb.md b/docs/en/sql-reference/table-functions/mongodb.md index aad60a7003c..a483414c0d4 100644 --- a/docs/en/sql-reference/table-functions/mongodb.md +++ b/docs/en/sql-reference/table-functions/mongodb.md @@ -30,6 +30,14 @@ mongodb(host:port, database, collection, user, password, structure [, options]) - `options` - MongoDB connection string options (optional parameter). +:::tip +If you are using the MongoDB Atlas cloud offering please add these options: + +``` +'connectTimeoutMS=10000&ssl=true&authSource=admin' +``` + +::: **Returned Value** From ed37b01b515ea204223dd03cee5482ee6faad351 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 7 Jul 2023 14:39:11 +0000 Subject: [PATCH 1356/1997] Fix style --- .../__init__.py | 0 .../test.py | 29 ++++++++++++------- 2 files changed, 18 insertions(+), 11 deletions(-) create mode 100644 tests/integration/test_async_connect_to_multiple_ips/__init__.py diff --git a/tests/integration/test_async_connect_to_multiple_ips/__init__.py b/tests/integration/test_async_connect_to_multiple_ips/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_async_connect_to_multiple_ips/test.py b/tests/integration/test_async_connect_to_multiple_ips/test.py index 0c18a316d4b..acc4d24d0fa 100644 --- a/tests/integration/test_async_connect_to_multiple_ips/test.py +++ b/tests/integration/test_async_connect_to_multiple_ips/test.py @@ -36,30 +36,37 @@ node2 = cluster.add_instance( ipv4_address="10.5.95.12", ) + # node1 - source with table, have invalid ipv6 # node2 - destination, doing remote query def test(cluster_without_dns_cache_update): node1.query( "CREATE TABLE test(t Date, label UInt8) ENGINE = MergeTree PARTITION BY t ORDER BY label;" ) - node1.query( - "INSERT INTO test SELECT toDate('2022-12-28'), 1;" - ) - assert node1.query( - 'SELECT count(*) FROM test' - ) == '1\n' - - wrong_ip = '2001:3984:3989::1:1118' + node1.query("INSERT INTO test SELECT toDate('2022-12-28'), 1;") + assert node1.query("SELECT count(*) FROM test") == "1\n" + + wrong_ip = "2001:3984:3989::1:1118" node2.exec_in_container( (["bash", "-c", "echo '{} {}' >> /etc/hosts".format(wrong_ip, node1.name)]) ) node2.exec_in_container( - (["bash", "-c", "echo '{} {}' >> /etc/hosts".format(node1.ipv4_address, node1.name)]) + ( + [ + "bash", + "-c", + "echo '{} {}' >> /etc/hosts".format(node1.ipv4_address, node1.name), + ] + ) ) assert node1.query("SELECT count(*) from test") == "1\n" node2.query("SYSTEM DROP DNS CACHE") node1.query("SYSTEM DROP DNS CACHE") - assert node2.query(f"SELECT count(*) FROM remote('{node1.name}', default.test) limit 1;") == "1\n" - + assert ( + node2.query( + f"SELECT count(*) FROM remote('{node1.name}', default.test) limit 1;" + ) + == "1\n" + ) From 0bd16d47be2c2040ab1d6787e953b0c4154ee0a1 Mon Sep 17 00:00:00 2001 From: Slach Date: Fri, 7 Jul 2023 19:44:20 +0500 Subject: [PATCH 1357/1997] fix documentation insconsistency about additional_tables_filter during reproduce https://github.com/ClickHouse/ClickHouse/issues/51948 Signed-off-by: Slach --- docs/en/operations/settings/settings.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 5f6cf98646b..195a9e26b53 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -17,7 +17,8 @@ Default value: 0. **Example** ``` sql -insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +INSERT INTO table_1 VALUES (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +SELECT * FROM table_1; ``` ```response ┌─x─┬─y────┐ @@ -30,7 +31,7 @@ insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); ```sql SELECT * FROM table_1 -SETTINGS additional_table_filters = (('table_1', 'x != 2')) +SETTINGS additional_table_filters = {'table_1': 'x != 2'} ``` ```response ┌─x─┬─y────┐ @@ -50,7 +51,8 @@ Default value: `''`. **Example** ``` sql -insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +INSERT INTO table_1 VALUES (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +SElECT * FROM table_1; ``` ```response ┌─x─┬─y────┐ From 50ea0855bf622ede96cb9726d5010d03c8dbebf4 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Jul 2023 17:47:09 +0300 Subject: [PATCH 1358/1997] Update 02439_merge_selecting_partitions.sql (#51862) * Update 02439_merge_selecting_partitions.sql * Update 02439_merge_selecting_partitions.reference * Update 02439_merge_selecting_partitions.reference * fix --- .../0_stateless/02439_merge_selecting_partitions.reference | 1 - .../0_stateless/02439_merge_selecting_partitions.sql | 6 ++++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02439_merge_selecting_partitions.reference b/tests/queries/0_stateless/02439_merge_selecting_partitions.reference index e836994b3aa..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02439_merge_selecting_partitions.reference +++ b/tests/queries/0_stateless/02439_merge_selecting_partitions.reference @@ -1 +0,0 @@ -/test/02439/s1/default/block_numbers/123 diff --git a/tests/queries/0_stateless/02439_merge_selecting_partitions.sql b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql index 88ce2834d6b..3d0c0af84d5 100644 --- a/tests/queries/0_stateless/02439_merge_selecting_partitions.sql +++ b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql @@ -22,7 +22,9 @@ select sleepEachRow(3) as higher_probability_of_reproducing_the_issue format Nul system flush logs; -- it should not list unneeded partitions where we cannot merge anything -select distinct path from system.zookeeper_log where path like '/test/02439/s1/' || currentDatabase() || '/block_numbers/%' - and op_num in ('List', 'SimpleList', 'FilteredList') and path not like '%/block_numbers/1'; +select * from system.zookeeper_log where path like '/test/02439/s1/' || currentDatabase() || '/block_numbers/%' + and op_num in ('List', 'SimpleList', 'FilteredList') + and path not like '%/block_numbers/1' and path not like '%/block_numbers/123' + and event_time >= now() - interval 1 minute; drop table rmt; From eed1ecb6ba7ba4fdebd1c572881d064c66a0a102 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 7 Jul 2023 15:01:23 +0000 Subject: [PATCH 1359/1997] Revert "Remove parts in order for object storage always" This reverts commit c35294317dbff31b8ff8b48f6256162d6d5dc02e. --- src/Storages/MergeTree/MergeTreeData.cpp | 34 +++++++++--------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0ef71895999..fa9bfd38a23 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2137,20 +2137,20 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) /// Please don't use "zero-copy replication" (a non-production feature) in production. /// It is not ready for production usage. Don't use it. - /// It also is disabled for any object storage, because it can lead to race conditions on blob removal. - /// (see comment at `clearPartsFromFilesystemImpl`). - bool need_remove_parts_in_order = false; + bool need_remove_parts_in_order = supportsReplication() && getSettings()->allow_remote_fs_zero_copy_replication; - if (supportsReplication()) + if (need_remove_parts_in_order) { + bool has_zero_copy_disk = false; for (const auto & disk : getDisks()) { - if (disk->isRemote()) + if (disk->supportZeroCopyReplication()) { - need_remove_parts_in_order = true; + has_zero_copy_disk = true; break; } } + need_remove_parts_in_order = has_zero_copy_disk; } std::vector parts_to_delete; @@ -2394,28 +2394,18 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t std::mutex part_names_mutex; auto runner = threadPoolCallbackRunner(getPartsCleaningThreadPool().get(), "PartsCleaning"); - /** Straightforward concurrent parts removal can be applied for the case - * when we have parts on object storage disk + at least some of them were mutated - * (thus, can contains hardlinks to files in the previous parts). - * If we are deleting parts that contains hardlinks to the same file we may face into race condition - * and delete only local metadata files, but not the blobs on object storage. - * Given that, we remove in parallel only "independent" parts that don't have such hardlinks. - * Note that it also may be applicable for the regular MergeTree, fixed only for Replicated. - * - * To avoid this we need to fix race conditions on parts and blob removal. - */ + /// This flag disallow straightforward concurrent parts removal. It's required only in case + /// when we have parts on zero-copy disk + at least some of them were mutated. bool remove_parts_in_order = false; - if (dynamic_cast(this) != nullptr) + if (settings->allow_remote_fs_zero_copy_replication && dynamic_cast(this) != nullptr) { remove_parts_in_order = std::any_of( parts_to_remove.begin(), parts_to_remove.end(), - [] (const auto & data_part) - { - return data_part->isStoredOnRemoteDisk() && data_part->info.getMutationVersion() > 0; - } + [] (const auto & data_part) { return data_part->isStoredOnRemoteDiskWithZeroCopySupport() && data_part->info.getMutationVersion() > 0; } ); } + if (!remove_parts_in_order) { /// NOTE: Under heavy system load you may get "Cannot schedule a task" from ThreadPool. @@ -2451,7 +2441,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t /// NOTE: Under heavy system load you may get "Cannot schedule a task" from ThreadPool. LOG_DEBUG( - log, "Removing {} parts from filesystem (concurrently in order): Parts: [{}]", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); + log, "Removing {} parts from filesystem (concurrently): Parts: [{}]", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); /// We have "zero copy replication" parts and we are going to remove them in parallel. /// The problem is that all parts in a mutation chain must be removed sequentially to avoid "key does not exits" issues. From 227e415d6d71ca49b486052513786c5f050a6279 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 7 Jul 2023 15:08:21 +0000 Subject: [PATCH 1360/1997] Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` --- .../DiskObjectStorageTransaction.cpp | 39 ++++++++++++------- src/Disks/ObjectStorages/IMetadataStorage.h | 5 ++- .../MetadataStorageFromDisk.cpp | 7 +++- .../ObjectStorages/MetadataStorageFromDisk.h | 5 ++- ...taStorageFromDiskTransactionOperations.cpp | 5 +++ ...dataStorageFromDiskTransactionOperations.h | 12 ++++++ .../MetadataStorageFromPlainObjectStorage.cpp | 5 ++- .../MetadataStorageFromPlainObjectStorage.h | 5 ++- 8 files changed, 63 insertions(+), 20 deletions(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index bd66ada492f..f3dbac445a5 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -6,6 +6,8 @@ #include #include +#include + namespace DB { @@ -150,7 +152,15 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati RemoveBatchRequest remove_paths; bool keep_all_batch_data; NameSet file_names_remove_metadata_only; - StoredObjects objects_to_remove; + + struct ObjectsToRemove + { + StoredObjects objects; + UnlinkMetadataFileOperationOutcomePtr unlink_outcome; + }; + + std::vector objects_to_remove; + bool remove_from_cache = false; RemoveManyObjectStorageOperation( @@ -174,7 +184,6 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati { for (const auto & [path, if_exists] : remove_paths) { - if (!metadata_storage.exists(path)) { if (if_exists) @@ -188,14 +197,12 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati try { - uint32_t hardlink_count = metadata_storage.getHardlinkCount(path); - auto objects = metadata_storage.getStorageObjects(path); - - tx->unlinkMetadata(path); - - /// File is really redundant - if (hardlink_count == 0 && !keep_all_batch_data && !file_names_remove_metadata_only.contains(fs::path(path).filename())) - std::move(objects.begin(), objects.end(), std::back_inserter(objects_to_remove)); + auto unlink_outcome = tx->unlinkMetadata(path); + if (unlink_outcome && !keep_all_batch_data && !file_names_remove_metadata_only.contains(fs::path(path).filename())) + { + auto objects = metadata_storage.getStorageObjects(path); + objects_to_remove.emplace_back(ObjectsToRemove{std::move(objects), std::move(unlink_outcome)}); + } } catch (const Exception & e) { @@ -215,15 +222,21 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati void undo() override { - } void finalize() override { + StoredObjects remove_from_remote; + for (auto && [objects, unlink_outcome] : objects_to_remove) + { + if (unlink_outcome->num_hardlinks == 0) + std::move(objects.begin(), objects.end(), std::back_inserter(remove_from_remote)); + } + /// Read comment inside RemoveObjectStorageOperation class /// TL;DR Don't pay any attention to 404 status code - if (!objects_to_remove.empty()) - object_storage.removeObjectsIfExist(objects_to_remove); + if (!remove_from_remote.empty()) + object_storage.removeObjectsIfExist(remove_from_remote); } }; diff --git a/src/Disks/ObjectStorages/IMetadataStorage.h b/src/Disks/ObjectStorages/IMetadataStorage.h index 00150df9fa3..264c481ee08 100644 --- a/src/Disks/ObjectStorages/IMetadataStorage.h +++ b/src/Disks/ObjectStorages/IMetadataStorage.h @@ -22,6 +22,8 @@ namespace ErrorCodes } class IMetadataStorage; +struct UnlinkMetadataFileOperationOutcome; +using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr; /// Tries to provide some "transactions" interface, which allow /// to execute (commit) operations simultaneously. We don't provide @@ -127,9 +129,10 @@ public: /// Unlink metadata file and do something special if required /// By default just remove file (unlink file). - virtual void unlinkMetadata(const std::string & path) + virtual UnlinkMetadataFileOperationOutcomePtr unlinkMetadata(const std::string & path) { unlinkFile(path); + return nullptr; } virtual ~IMetadataTransaction() = default; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp index 9461a82845f..53428c2f6e1 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp @@ -340,9 +340,12 @@ void MetadataStorageFromDiskTransaction::addBlobToMetadata(const std::string & p addOperation(std::make_unique(path, blob_name, metadata_storage.object_storage_root_path, size_in_bytes, *metadata_storage.disk, metadata_storage)); } -void MetadataStorageFromDiskTransaction::unlinkMetadata(const std::string & path) +UnlinkMetadataFileOperationOutcomePtr MetadataStorageFromDiskTransaction::unlinkMetadata(const std::string & path) { - addOperation(std::make_unique(path, *metadata_storage.disk, metadata_storage)); + auto operation = std::make_unique(path, *metadata_storage.getDisk(), metadata_storage); + auto result = operation->outcome; + addOperation(std::move(operation)); + return result; } } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h index 5273f0b041e..b518f5e3622 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h @@ -11,6 +11,9 @@ namespace DB { +struct UnlinkMetadataFileOperationOutcome; +using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr; + /// Store metadata on a separate disk /// (used for object storages, like S3 and related). class MetadataStorageFromDisk final : public IMetadataStorage @@ -131,7 +134,7 @@ public: void replaceFile(const std::string & path_from, const std::string & path_to) override; - void unlinkMetadata(const std::string & path) override; + UnlinkMetadataFileOperationOutcomePtr unlinkMetadata(const std::string & path) override; }; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp index 7463622cb06..78e8764f8fc 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp @@ -319,6 +319,8 @@ void UnlinkMetadataFileOperation::execute(std::unique_lock & metada write_operation = std::make_unique(path, disk, metadata->serializeToString()); write_operation->execute(metadata_lock); } + outcome->num_hardlinks = ref_count; + unlink_operation = std::make_unique(path, disk); unlink_operation->execute(metadata_lock); } @@ -334,6 +336,9 @@ void UnlinkMetadataFileOperation::undo() if (write_operation) write_operation->undo(); + + /// Update outcome to reflect the fact that we have restored the file. + outcome->num_hardlinks++; } void SetReadonlyFileOperation::execute(std::unique_lock & metadata_lock) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h index d8e4892a0a5..4662ebc3423 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h @@ -3,6 +3,8 @@ #include #include +#include + namespace DB { class MetadataStorageFromDisk; @@ -242,9 +244,19 @@ private: std::unique_ptr write_operation; }; +/// Return the result of operation to the caller. +/// It is used in `IDiskObjectStorageOperation::finalize` after metadata transaction executed to make decision on blob removal. +struct UnlinkMetadataFileOperationOutcome +{ + UInt32 num_hardlinks = std::numeric_limits::max(); +}; + +using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr; struct UnlinkMetadataFileOperation final : public IMetadataOperation { + const UnlinkMetadataFileOperationOutcomePtr outcome = std::make_shared(); + UnlinkMetadataFileOperation( const std::string & path_, IDisk & disk_, diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index c119e9f3adc..3650c7eaac8 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -135,9 +135,10 @@ void MetadataStorageFromPlainObjectStorageTransaction::addBlobToMetadata( { /// Noop, local metadata files is only one file, it is the metadata file itself. } -void MetadataStorageFromPlainObjectStorageTransaction::unlinkMetadata(const std::string &) + +UnlinkMetadataFileOperationOutcomePtr MetadataStorageFromPlainObjectStorageTransaction::unlinkMetadata(const std::string &) { - /// Noop, no separate metadata. + return nullptr; } } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index fb5b6d0757c..bd068c1362f 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -9,6 +9,9 @@ namespace DB { +struct UnlinkMetadataFileOperationOutcome; +using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr; + /// Object storage is used as a filesystem, in a limited form: /// - no directory concept, files only /// - no stat/chmod/... @@ -104,7 +107,7 @@ public: void unlinkFile(const std::string & path) override; - void unlinkMetadata(const std::string & path) override; + UnlinkMetadataFileOperationOutcomePtr unlinkMetadata(const std::string & path) override; void commit() override { From 8266067e1a650453968f278f64e20bd4addc7aa2 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 7 Jul 2023 19:09:55 +0300 Subject: [PATCH 1361/1997] Fixed style check --- src/Dictionaries/CacheDictionary.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index e27e25ea7c4..3011151ef00 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -549,12 +549,12 @@ void CacheDictionary::update(CacheDictionaryUpdateUnitPtr Date: Fri, 7 Jul 2023 18:39:20 +0200 Subject: [PATCH 1362/1997] comments for the tests --- ...nal_block_structure_mismatch_bug.reference | 1 - ...791_final_block_structure_mismatch_bug.sql | 38 ++++++++----------- 2 files changed, 15 insertions(+), 24 deletions(-) diff --git a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference index ca810c46a2d..a8401b1cae8 100644 --- a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference +++ b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference @@ -7,4 +7,3 @@ 1 2 3 -2 diff --git a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql index a82e43d81f4..394e3bff87b 100644 --- a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql +++ b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql @@ -17,10 +17,18 @@ INSERT INTO test_block_mismatch VALUES (1, toDateTime('2023-02-02 12:12:12')); INSERT INTO test_block_mismatch VALUES (1, toDateTime('2023-02-02 12:12:12')); SELECT count(*) FROM test_block_mismatch FINAL; +optimize table test_block_mismatch final; +system stop merges test_block_mismatch; + INSERT INTO test_block_mismatch VALUES (2, toDateTime('2023-01-01 12:12:12')); INSERT INTO test_block_mismatch VALUES (2, toDateTime('2023-01-01 12:12:12')); +-- one lonely part in 2023-02-02 partition and 3 parts in 2023-01-01 partition. +-- lonely part will not be processed by PartsSplitter and 2023-01-01's parts will be - previously this led to the `Block structure mismatch in Pipe::unitePipes` exception. SELECT count(*) FROM test_block_mismatch FINAL; + +-- variations of the test above with slightly modified table definitions + CREATE TABLE test_block_mismatch_sk1 ( a UInt32, @@ -39,10 +47,14 @@ INSERT INTO test_block_mismatch_sk1 VALUES (1, toDateTime('2023-02-02 12:12:12') INSERT INTO test_block_mismatch_sk1 VALUES (1, toDateTime('2023-02-02 12:12:12')); SELECT count(*) FROM test_block_mismatch_sk1 FINAL; +optimize table test_block_mismatch_sk1 final; +system stop merges test_block_mismatch_sk1; + INSERT INTO test_block_mismatch_sk1 VALUES (2, toDateTime('2023-01-01 12:12:12')); INSERT INTO test_block_mismatch_sk1 VALUES (2, toDateTime('2023-01-01 12:12:12')); SELECT count(*) FROM test_block_mismatch_sk1 FINAL; + CREATE TABLE test_block_mismatch_sk2 ( a UInt32, @@ -61,29 +73,9 @@ INSERT INTO test_block_mismatch_sk2 VALUES (1, toDateTime('2023-02-02 12:12:12') INSERT INTO test_block_mismatch_sk2 VALUES (1, toDateTime('2023-02-02 12:12:12')); SELECT count(*) FROM test_block_mismatch_sk2 FINAL; +optimize table test_block_mismatch_sk2 final; +system stop merges test_block_mismatch_sk2; + INSERT INTO test_block_mismatch_sk2 VALUES (2, toDateTime('2023-01-01 12:12:12')); INSERT INTO test_block_mismatch_sk2 VALUES (2, toDateTime('2023-01-01 12:12:12')); SELECT count(*) FROM test_block_mismatch_sk2 FINAL; - -CREATE TABLE test_block_mismatch_magic_row_dist -( - a UInt32, - b DateTime -) -ENGINE = ReplacingMergeTree -PARTITION BY toYYYYMM(b) -ORDER BY (toDate(b), a); - -INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); -INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); -INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); -INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); - -optimize table test_block_mismatch_magic_row_dist final; - -system stop merges test_block_mismatch_magic_row_dist; - -INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-01-01 12:12:12')); -INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-01-01 12:12:12')); - -SELECT count(*) FROM test_block_mismatch_magic_row_dist FINAL; From 93b76c93210bccfda6d6b2413bf07cf48c4f9fa3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Jul 2023 16:40:47 +0200 Subject: [PATCH 1363/1997] better logs on shutdown --- base/base/getThreadId.cpp | 41 +++++++++++++++++----------- base/base/getThreadId.h | 2 ++ src/Daemon/BaseDaemon.cpp | 1 + src/Interpreters/Context.cpp | 41 ++++++++++++++++------------ src/Interpreters/DatabaseCatalog.cpp | 2 ++ 5 files changed, 53 insertions(+), 34 deletions(-) diff --git a/base/base/getThreadId.cpp b/base/base/getThreadId.cpp index b6c22bb8856..a42d79c5698 100644 --- a/base/base/getThreadId.cpp +++ b/base/base/getThreadId.cpp @@ -15,25 +15,34 @@ static thread_local uint64_t current_tid = 0; + +static void setCurrentThreadId() +{ +#if defined(OS_ANDROID) + current_tid = gettid(); +#elif defined(OS_LINUX) + current_tid = static_cast(syscall(SYS_gettid)); /// This call is always successful. - man gettid +#elif defined(OS_FREEBSD) + current_tid = pthread_getthreadid_np(); +#elif defined(OS_SUNOS) + // On Solaris-derived systems, this returns the ID of the LWP, analogous + // to a thread. + current_tid = static_cast(pthread_self()); +#else + if (0 != pthread_threadid_np(nullptr, ¤t_tid)) + throw std::logic_error("pthread_threadid_np returned error"); +#endif +} + uint64_t getThreadId() { if (!current_tid) - { -#if defined(OS_ANDROID) - current_tid = gettid(); -#elif defined(OS_LINUX) - current_tid = static_cast(syscall(SYS_gettid)); /// This call is always successful. - man gettid -#elif defined(OS_FREEBSD) - current_tid = pthread_getthreadid_np(); -#elif defined(OS_SUNOS) - // On Solaris-derived systems, this returns the ID of the LWP, analogous - // to a thread. - current_tid = static_cast(pthread_self()); -#else - if (0 != pthread_threadid_np(nullptr, ¤t_tid)) - throw std::logic_error("pthread_threadid_np returned error"); -#endif - } + setCurrentThreadId(); return current_tid; } + +void updateCurrentThreadIdAfterFork() +{ + setCurrentThreadId(); +} diff --git a/base/base/getThreadId.h b/base/base/getThreadId.h index a1b5ff5f3e8..f90c76029e1 100644 --- a/base/base/getThreadId.h +++ b/base/base/getThreadId.h @@ -3,3 +3,5 @@ /// Obtain thread id from OS. The value is cached in thread local variable. uint64_t getThreadId(); + +void updateCurrentThreadIdAfterFork(); diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 6d29523a354..d63e9976437 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -1101,6 +1101,7 @@ void BaseDaemon::setupWatchdog() if (0 == pid) { + updateCurrentThreadIdAfterFork(); logger().information("Forked a child process to watch"); #if defined(OS_LINUX) if (0 != prctl(PR_SET_PDEATHSIG, SIGKILL)) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 7482450d529..7b3d419cce4 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -176,6 +176,15 @@ namespace ErrorCodes extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; } +#define SHUTDOWN(log, desc, ptr, method) do \ +{ \ + if (ptr) \ + { \ + LOG_DEBUG(log, "Shutting down " desc); \ + ptr->method; \ + } \ +} while (false) \ + /** Set of known objects (environment), that could be used in query. * Shared (global) part. Order of members (especially, order of destruction) is very important. @@ -479,35 +488,29 @@ struct ContextSharedPart : boost::noncopyable /// Stop periodic reloading of the configuration files. /// This must be done first because otherwise the reloading may pass a changed config /// to some destroyed parts of ContextSharedPart. - if (external_dictionaries_loader) - external_dictionaries_loader->enablePeriodicUpdates(false); - if (external_user_defined_executable_functions_loader) - external_user_defined_executable_functions_loader->enablePeriodicUpdates(false); - if (user_defined_sql_objects_loader) - user_defined_sql_objects_loader->stopWatching(); + SHUTDOWN(log, "dictionaries loader", external_dictionaries_loader, enablePeriodicUpdates(false)); + SHUTDOWN(log, "UDFs loader", external_user_defined_executable_functions_loader, enablePeriodicUpdates(false)); + SHUTDOWN(log, "another UDFs loader", user_defined_sql_objects_loader, stopWatching()); + + LOG_TRACE(log, "Shutting down named sessions"); Session::shutdownNamedSessions(); /// Waiting for current backups/restores to be finished. This must be done before `DatabaseCatalog::shutdown()`. - if (backups_worker) - backups_worker->shutdown(); + SHUTDOWN(log, "backups worker", backups_worker, shutdown()); /** After system_logs have been shut down it is guaranteed that no system table gets created or written to. * Note that part changes at shutdown won't be logged to part log. */ - if (system_logs) - system_logs->shutdown(); + SHUTDOWN(log, "system logs", system_logs, shutdown()); + LOG_TRACE(log, "Shutting down database catalog"); DatabaseCatalog::shutdown(); - if (merge_mutate_executor) - merge_mutate_executor->wait(); - if (fetch_executor) - fetch_executor->wait(); - if (moves_executor) - moves_executor->wait(); - if (common_executor) - common_executor->wait(); + SHUTDOWN(log, "merges executor", merge_mutate_executor, wait()); + SHUTDOWN(log, "fetches executor", fetch_executor, wait()); + SHUTDOWN(log, "moves executor", moves_executor, wait()); + SHUTDOWN(log, "common executor", common_executor, wait()); TransactionLog::shutdownIfAny(); @@ -533,10 +536,12 @@ struct ContextSharedPart : boost::noncopyable /// DDLWorker should be deleted without lock, cause its internal thread can /// take it as well, which will cause deadlock. + LOG_TRACE(log, "Shutting down DDLWorker"); delete_ddl_worker.reset(); /// Background operations in cache use background schedule pool. /// Deactivate them before destructing it. + LOG_TRACE(log, "Shutting down caches"); const auto & caches = FileCacheFactory::instance().getAll(); for (const auto & [_, cache] : caches) cache->cache->deactivateBackgroundOperations(); diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 4cb2f6e3b3d..271330bc64a 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -227,9 +227,11 @@ void DatabaseCatalog::shutdownImpl() databases_with_delayed_shutdown.push_back(database.second); continue; } + LOG_TRACE(log, "Shutting down database {}", database.first); database.second->shutdown(); } + LOG_TRACE(log, "Shutting down system databases"); for (auto & database : databases_with_delayed_shutdown) { database->shutdown(); From 88911e1378900d6687e05f08c6cbe592b5d32001 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 7 Jul 2023 16:42:03 +0000 Subject: [PATCH 1364/1997] Check refcount in finalize for other RemoveObjectStorageOperations --- .../DiskObjectStorageTransaction.cpp | 50 +++++++++---------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index f3dbac445a5..0ae577602b1 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -65,11 +65,18 @@ struct PureMetadataObjectStorageOperation final : public IDiskObjectStorageOpera std::string getInfoForLog() const override { return fmt::format("PureMetadataObjectStorageOperation"); } }; + +struct ObjectsToRemove +{ + StoredObjects objects; + UnlinkMetadataFileOperationOutcomePtr unlink_outcome; +}; + struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation { std::string path; bool delete_metadata_only; - StoredObjects objects_to_remove; + ObjectsToRemove objects_to_remove; bool if_exists; bool remove_from_cache = false; @@ -105,15 +112,12 @@ struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation try { - uint32_t hardlink_count = metadata_storage.getHardlinkCount(path); auto objects = metadata_storage.getStorageObjects(path); - tx->unlinkMetadata(path); + auto unlink_outcome = tx->unlinkMetadata(path); - if (hardlink_count == 0) - { - objects_to_remove = std::move(objects); - } + if (unlink_outcome) + objects_to_remove = ObjectsToRemove{std::move(objects), std::move(unlink_outcome)}; } catch (const Exception & e) { @@ -142,8 +146,11 @@ struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation /// due to network error or similar. And when it will retry an operation it may receive /// a 404 HTTP code. We don't want to threat this code as a real error for deletion process /// (e.g. throwing some exceptions) and thus we just use method `removeObjectsIfExists` - if (!delete_metadata_only && !objects_to_remove.empty()) - object_storage.removeObjectsIfExist(objects_to_remove); + if (!delete_metadata_only && !objects_to_remove.objects.empty() + && objects_to_remove.unlink_outcome->num_hardlinks == 0) + { + object_storage.removeObjectsIfExist(objects_to_remove.objects); + } } }; @@ -153,12 +160,6 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati bool keep_all_batch_data; NameSet file_names_remove_metadata_only; - struct ObjectsToRemove - { - StoredObjects objects; - UnlinkMetadataFileOperationOutcomePtr unlink_outcome; - }; - std::vector objects_to_remove; bool remove_from_cache = false; @@ -197,10 +198,10 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati try { + auto objects = metadata_storage.getStorageObjects(path); auto unlink_outcome = tx->unlinkMetadata(path); if (unlink_outcome && !keep_all_batch_data && !file_names_remove_metadata_only.contains(fs::path(path).filename())) { - auto objects = metadata_storage.getStorageObjects(path); objects_to_remove.emplace_back(ObjectsToRemove{std::move(objects), std::move(unlink_outcome)}); } } @@ -244,10 +245,9 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOperation { std::string path; - std::unordered_map objects_to_remove; + std::unordered_map objects_to_remove_by_path; bool keep_all_batch_data; NameSet file_names_remove_metadata_only; - StoredObjects objects_to_remove_from_cache; RemoveRecursiveObjectStorageOperation( IObjectStorage & object_storage_, @@ -274,14 +274,11 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp { try { - uint32_t hardlink_count = metadata_storage.getHardlinkCount(path_to_remove); auto objects_paths = metadata_storage.getStorageObjects(path_to_remove); - - tx->unlinkMetadata(path_to_remove); - - if (hardlink_count == 0) + auto unlink_outcome = tx->unlinkMetadata(path_to_remove); + if (unlink_outcome) { - objects_to_remove[path_to_remove] = std::move(objects_paths); + objects_to_remove_by_path[path_to_remove] = ObjectsToRemove{std::move(objects_paths), std::move(unlink_outcome)}; } } catch (const Exception & e) @@ -331,11 +328,12 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp if (!keep_all_batch_data) { StoredObjects remove_from_remote; - for (auto && [local_path, remote_paths] : objects_to_remove) + for (auto && [local_path, objects_to_remove] : objects_to_remove_by_path) { if (!file_names_remove_metadata_only.contains(fs::path(local_path).filename())) { - std::move(remote_paths.begin(), remote_paths.end(), std::back_inserter(remove_from_remote)); + if (objects_to_remove.unlink_outcome->num_hardlinks == 0) + std::move(objects_to_remove.objects.begin(), objects_to_remove.objects.end(), std::back_inserter(remove_from_remote)); } } /// Read comment inside RemoveObjectStorageOperation class From 39a440fa0e99849d710e09bd031de5a52708fd6f Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 7 Jul 2023 22:08:31 +0000 Subject: [PATCH 1365/1997] Build sort description based on original header --- .../QueryPlan/ReadFromMergeTree.cpp | 41 +++++++++++++++---- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 32bfa6935df..3d1e2650188 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -139,17 +139,42 @@ static bool checkAllPartsOnRemoteFS(const RangesInDataParts & parts) /// build sort description for output stream static void updateSortDescriptionForOutputStream( - DataStream & output_stream, const Names & sorting_key_columns, const int sort_direction, InputOrderInfoPtr input_order_info) + DataStream & output_stream, const Names & sorting_key_columns, const int sort_direction, InputOrderInfoPtr input_order_info, PrewhereInfoPtr prewhere_info) { + Block original_header = output_stream.header.cloneEmpty(); + /// build original header + if (prewhere_info && prewhere_info->prewhere_actions) + { + FindOriginalNodeForOutputName original_column_finder(prewhere_info->prewhere_actions); + + for (auto & column : original_header) + { + const auto * original_node = original_column_finder.find(column.name); + if (original_node) + { + LOG_DEBUG( + &Poco::Logger::get(__PRETTY_FUNCTION__), + "Found original column '{}' for '{}'", + original_node->result_name, + column.name); + column.name = original_node->result_name; + } + } + } + SortDescription sort_description; const Block & header = output_stream.header; - for (const auto & column_name : sorting_key_columns) + for (const auto & sorting_key : sorting_key_columns) { - if (std::find_if(header.begin(), header.end(), [&](ColumnWithTypeAndName const & col) { return col.name == column_name; }) - == header.end()) + const auto it = std::find_if( + original_header.begin(), original_header.end(), [&sorting_key](const auto & column) { return column.name == sorting_key; }); + if (it == original_header.end()) break; - sort_description.emplace_back(column_name, sort_direction); + + const size_t column_pos = std::distance(original_header.begin(), it); + sort_description.emplace_back((header.begin() + column_pos)->name, sort_direction); } + if (!sort_description.empty()) { if (input_order_info) @@ -283,7 +308,8 @@ ReadFromMergeTree::ReadFromMergeTree( *output_stream, storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(), getSortDirection(), - query_info.getInputOrderInfo()); + query_info.getInputOrderInfo(), + prewhere_info); } @@ -1575,7 +1601,8 @@ void ReadFromMergeTree::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info *output_stream, storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(), getSortDirection(), - query_info.getInputOrderInfo()); + query_info.getInputOrderInfo(), + prewhere_info); } bool ReadFromMergeTree::requestOutputEachPartitionThroughSeparatePort() From a96874850ec0faaf049cce01feee6c4a572d7961 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 00:55:57 +0200 Subject: [PATCH 1366/1997] Revert "Merge pull request #48115 from save-my-heart/throw_non_parametric_function" This reverts commit 5f930aeb2619bda8f27f3cfc6ba01ffaf48c3d64, reversing changes made to 35572321a14d617cfd110a48d8d3416615bd75c9. --- .../UserDefined/UserDefinedSQLFunctionVisitor.cpp | 7 ------- src/Interpreters/ActionsVisitor.cpp | 7 ------- .../0_stateless/02701_non_parametric_function.reference | 0 .../0_stateless/02701_non_parametric_function.sql | 9 --------- 4 files changed, 23 deletions(-) delete mode 100644 tests/queries/0_stateless/02701_non_parametric_function.reference delete mode 100644 tests/queries/0_stateless/02701_non_parametric_function.sql diff --git a/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp b/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp index 597e4efe35e..360d1cdf76c 100644 --- a/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp @@ -20,7 +20,6 @@ namespace DB namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; - extern const int FUNCTION_CANNOT_HAVE_PARAMETERS; } void UserDefinedSQLFunctionVisitor::visit(ASTPtr & ast) @@ -139,12 +138,6 @@ ASTPtr UserDefinedSQLFunctionVisitor::tryToReplaceFunction(const ASTFunction & f if (!user_defined_function) return nullptr; - /// All UDFs are not parametric for now. - if (function.parameters) - { - throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", function.name); - } - const auto & function_arguments_list = function.children.at(0)->as(); auto & function_arguments = function_arguments_list->children; diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 10502b7e66d..01f2d4cf22e 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -78,7 +78,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; - extern const int FUNCTION_CANNOT_HAVE_PARAMETERS; } static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols) @@ -1106,12 +1105,6 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & } } - /// Normal functions are not parametric for now. - if (node.parameters) - { - throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", node.name); - } - Names argument_names; DataTypes argument_types; bool arguments_present = true; diff --git a/tests/queries/0_stateless/02701_non_parametric_function.reference b/tests/queries/0_stateless/02701_non_parametric_function.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/02701_non_parametric_function.sql b/tests/queries/0_stateless/02701_non_parametric_function.sql deleted file mode 100644 index 5261fa7b082..00000000000 --- a/tests/queries/0_stateless/02701_non_parametric_function.sql +++ /dev/null @@ -1,9 +0,0 @@ --- Tags: no-parallel - -SELECT * FROM system.numbers WHERE number > toUInt64(10)(number) LIMIT 10; -- { serverError 309 } - -CREATE FUNCTION IF NOT EXISTS sum_udf as (x, y) -> (x + y); - -SELECT sum_udf(1)(1, 2); -- { serverError 309 } - -DROP FUNCTION IF EXISTS sum_udf; From f4696d762cb3e15878b99c51bcad9ee15a8972c1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 00:56:42 +0200 Subject: [PATCH 1367/1997] Revert "Merge pull request #49419 from ClickHouse/fix-function-parameter-exception" This reverts commit b921476a3be536b17b967391cefab3888c0c96b2, reversing changes made to 7896d307379bc813665fa5b11d08c202ea67f4fb. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 15 --------------- tests/analyzer_tech_debt.txt | 1 + 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 163092f1b7f..da8933aabaa 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -116,7 +116,6 @@ namespace ErrorCodes extern const int UNKNOWN_TABLE; extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; - extern const int FUNCTION_CANNOT_HAVE_PARAMETERS; } /** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h first. @@ -4897,11 +4896,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi lambda_expression_untyped->formatASTForErrorMessage(), scope.scope_node->formatASTForErrorMessage()); - if (!parameters.empty()) - { - throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", function_node.formatASTForErrorMessage()); - } - auto lambda_expression_clone = lambda_expression_untyped->clone(); IdentifierResolveScope lambda_scope(lambda_expression_clone, &scope /*parent_scope*/); @@ -5018,12 +5012,9 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi } FunctionOverloadResolverPtr function = UserDefinedExecutableFunctionFactory::instance().tryGet(function_name, scope.context, parameters); - bool is_executable_udf = false; if (!function) function = FunctionFactory::instance().tryGet(function_name, scope.context); - else - is_executable_udf = true; if (!function) { @@ -5074,12 +5065,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi return result_projection_names; } - /// Executable UDFs may have parameters. They are checked in UserDefinedExecutableFunctionFactory. - if (!parameters.empty() && !is_executable_udf) - { - throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", function_name); - } - /** For lambda arguments we need to initialize lambda argument types DataTypeFunction using `getLambdaArgumentTypes` function. * Then each lambda arguments are initialized with columns, where column source is lambda. * This information is important for later steps of query processing. diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index f7cc13dd2e2..f838a19940a 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -111,6 +111,7 @@ 00917_multiple_joins_denny_crane 00725_join_on_bug_1 00636_partition_key_parts_pruning +00261_storage_aliases_and_array_join 01825_type_json_multiple_files 01281_group_by_limit_memory_tracking 02723_zookeeper_name From 19072c9b475fef191dfd18929cc81c25e8115026 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 02:03:23 +0300 Subject: [PATCH 1368/1997] Corrent example about parametric executable UDFs. --- docs/en/sql-reference/functions/udf.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/udf.md b/docs/en/sql-reference/functions/udf.md index 9c6b1b0c66b..51734beed03 100644 --- a/docs/en/sql-reference/functions/udf.md +++ b/docs/en/sql-reference/functions/udf.md @@ -171,12 +171,13 @@ Result: └──────────────────────────────┘ ``` -Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type). +Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type). It also requires the `execute_direct` option (to ensure no shell argument expansion vulnerability). File `test_function_parameter_python.xml` (`/etc/clickhouse-server/test_function_parameter_python.xml` with default path settings). ```xml executable + true test_function_parameter_python String From 6990f078a0bf87f23d478e83c51001b7cb0d4b8a Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Fri, 7 Jul 2023 19:19:30 -0400 Subject: [PATCH 1369/1997] cleaner way --- src/Daemon/BaseDaemon.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index a75aac7a08e..af2d355d335 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -154,10 +154,7 @@ static void signalHandler(int sig, siginfo_t * info, void * context) writePODBinary(*info, out); writePODBinary(signal_context, out); writePODBinary(stack_trace, out); - if (Exception::enable_job_stack_trace) - writeVectorBinary(Exception::thread_frame_pointers, out); - else - writeVarUInt(0, out); + writeVectorBinary(Exception::enable_job_stack_trace ? Exception::thread_frame_pointers : std::vector{}, out); writeBinary(static_cast(getThreadId()), out); writePODBinary(current_thread, out); From 9144a2dbb2a17af72304267edfe5a81ee7daa0b9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 05:23:13 +0200 Subject: [PATCH 1370/1997] Fix unrelated messages from LSan in clickhouse-client --- tests/clickhouse-test | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 4860ce0fac9..95470f77987 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -57,6 +57,8 @@ MESSAGES_TO_RETRY = [ "ConnectionPoolWithFailover: Connection failed at try", "DB::Exception: New table appeared in database being dropped or detached. Try again", "is already started to be removing by another replica right now", + # This is from LSan, and it indicates its own internal problem: + "Unable to get registers from thread", ] MAX_RETRIES = 3 From c828db572078bb68bbcd20c6850073030d4addac Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 05:57:23 +0200 Subject: [PATCH 1371/1997] Allow OOM in AST Fuzzer with Sanitizers --- docker/test/fuzzer/run-fuzzer.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index d2c8de7a211..5cda0831a84 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -291,7 +291,7 @@ quit if [ "$server_died" == 1 ] then # The server has died. - if ! rg --text -o 'Received signal.*|Logical error.*|Assertion.*failed|Failed assertion.*|.*runtime error: .*|.*is located.*|(SUMMARY|ERROR): [a-zA-Z]+Sanitizer:.*|.*_LIBCPP_ASSERT.*' server.log > description.txt + if ! rg --text -o 'Received signal.*|Logical error.*|Assertion.*failed|Failed assertion.*|.*runtime error: .*|.*is located.*|(SUMMARY|ERROR): [a-zA-Z]+Sanitizer:.*|.*_LIBCPP_ASSERT.*|.*Child process was terminated by signal 9.*' server.log > description.txt then echo "Lost connection to server. See the logs." > description.txt fi From 1bdcd29da2bfc4cab02a0db5dedeb7d0515ac49c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 06:02:38 +0200 Subject: [PATCH 1372/1997] Disable one test under Analyzer --- tests/analyzer_tech_debt.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index f7cc13dd2e2..a10f72e743a 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -128,3 +128,4 @@ 02784_parallel_replicas_automatic_disabling 02581_share_big_sets_between_mutation_tasks_long 02581_share_big_sets_between_multiple_mutations_tasks_long +00992_system_parts_race_condition_zookeeper_long From adbd85b975aba4618ddf2a934422559410eeea48 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 06:26:44 +0200 Subject: [PATCH 1373/1997] Fix Docker --- tests/integration/conftest.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 749f4aa1cde..5933883f7b0 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -42,6 +42,13 @@ def cleanup_environment(): logging.debug(f"Docker ps before start:{r.stdout}") else: logging.debug(f"No running containers") + + logging.debug("Pruning Docker networks") + run_and_check( + ["docker network prune"], + shell=True, + nothrow=True, + ) except Exception as e: logging.exception(f"cleanup_environment:{str(e)}") pass From cdbf279b65cca972ce63dd7fd835d2b46359f7f3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 06:46:28 +0200 Subject: [PATCH 1374/1997] Fix test 01825_type_json_from_map --- tests/queries/0_stateless/01825_type_json_from_map.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01825_type_json_from_map.sql b/tests/queries/0_stateless/01825_type_json_from_map.sql index 2480aca1667..51e60843a1a 100644 --- a/tests/queries/0_stateless/01825_type_json_from_map.sql +++ b/tests/queries/0_stateless/01825_type_json_from_map.sql @@ -1,4 +1,5 @@ --- Tags: no-fasttest +-- Tags: no-fasttest, no-random-merge-tree-settings +-- For example, it is 4 times slower with --merge_max_block_size=5967 --index_granularity=55 --min_bytes_for_wide_part=847510133 DROP TABLE IF EXISTS t_json; DROP TABLE IF EXISTS t_map; From 0b0caec9c435aaf0df3e01ef64bf06397d11f2ce Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 07:51:17 +0300 Subject: [PATCH 1375/1997] Update Context.cpp --- src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 7b3d419cce4..8df8723123f 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -181,7 +181,7 @@ namespace ErrorCodes if (ptr) \ { \ LOG_DEBUG(log, "Shutting down " desc); \ - ptr->method; \ + (ptr)->method; \ } \ } while (false) \ From 4de02c243816f907643eefbbe4743861660b6d99 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 07:04:33 +0200 Subject: [PATCH 1376/1997] Fix test 02354_distributed_with_external_aggregation_memory_usage --- ...distributed_with_external_aggregation_memory_usage.sql | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql index 548660e36b1..c8ec40bb0a7 100644 --- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql +++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql @@ -1,5 +1,7 @@ -- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-s3-storage +DROP TABLE IF EXISTS t_2354_dist_with_external_aggr; + create table t_2354_dist_with_external_aggr(a UInt64, b String, c FixedString(100)) engine = MergeTree order by tuple(); insert into t_2354_dist_with_external_aggr select number, toString(number) as s, toFixedString(s, 100) from numbers_mt(5e7); @@ -15,8 +17,12 @@ set max_bytes_before_external_group_by = '2G', -- whole aggregation state of local aggregation uncompressed is 5.8G -- it is hard to provide an accurate estimation for memory usage, so 4G is just the actual value taken from the logs + delta +-- also avoid using localhost, so the queries will go over separate connections +-- (otherwise the memory usage for merge will be counted together with the localhost query) select a, b, c, sum(a) as s -from remote('127.0.0.{1,2}', currentDatabase(), t_2354_dist_with_external_aggr) +from remote('127.0.0.{2,3}', currentDatabase(), t_2354_dist_with_external_aggr) group by a, b, c format Null settings max_memory_usage = '4Gi'; + +DROP TABLE t_2354_dist_with_external_aggr; From 8e4c8f118cf64fcd77524439508b838c05a58fcf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 09:07:05 +0200 Subject: [PATCH 1377/1997] Fix disaster in integration tests, part 2 --- tests/integration/ci-runner.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index d6d17abe725..43184574e6e 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -406,9 +406,9 @@ class ClickhouseIntegrationTestsRunner: out_file_full = os.path.join(self.result_path, "runner_get_all_tests.log") cmd = ( "cd {repo_path}/tests/integration && " - "timeout -s 9 1h ./runner {runner_opts} {image_cmd} ' --setup-plan' " - "| tee {out_file_full} | grep '::' | sed 's/ (fixtures used:.*//g' | sed 's/^ *//g' | sed 's/ *$//g' " - "| grep -v 'SKIPPED' | sort -u > {out_file}".format( + "timeout --signal=KILL 1h ./runner {runner_opts} {image_cmd} ' --setup-plan' " + "| tee '{out_file_full}' | grep -F '::' | sed -r 's/ \(fixtures used:.*//g; s/^ *//g; s/ *$//g' " + "| grep -v -F 'SKIPPED' | sort --unique > {out_file}".format( repo_path=repo_path, runner_opts=self._get_runner_opts(), image_cmd=image_cmd, @@ -626,7 +626,7 @@ class ClickhouseIntegrationTestsRunner: info_basename = test_group_str + "_" + str(i) + ".nfo" info_path = os.path.join(repo_path, "tests/integration", info_basename) - test_cmd = " ".join([test for test in sorted(test_names)]) + test_cmd = " ".join([f"'{test}'" for test in sorted(test_names)]) parallel_cmd = ( " --parallel {} ".format(num_workers) if num_workers > 0 else "" ) From df31034820c245030b16fddd7b9b3e06c07b0d51 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Jul 2023 20:29:59 +0200 Subject: [PATCH 1378/1997] rollback merge tasks on exception --- src/Storages/MergeTree/IExecutableTask.h | 11 +++++++---- .../MergeTree/MergeFromLogEntryTask.cpp | 2 +- .../MergeTree/MergeFromLogEntryTask.h | 2 +- .../MergeTree/MergePlainMergeTreeTask.cpp | 19 ++++++++++++++++--- .../MergeTree/MergePlainMergeTreeTask.h | 5 +++-- .../MergeTree/MergeTreeBackgroundExecutor.cpp | 17 ++++++++++------- .../MergeTree/MutateFromLogEntryTask.cpp | 2 +- .../MergeTree/MutateFromLogEntryTask.h | 2 +- .../MergeTree/MutatePlainMergeTreeTask.cpp | 4 ++-- .../MergeTree/MutatePlainMergeTreeTask.h | 5 +++-- src/Storages/MergeTree/MutateTask.cpp | 15 +++++++++------ .../ReplicatedMergeMutateTaskBase.cpp | 2 +- .../MergeTree/ReplicatedMergeMutateTaskBase.h | 3 ++- .../MergeTree/tests/gtest_executor.cpp | 10 ++++++---- src/Storages/StorageMergeTree.cpp | 2 +- 15 files changed, 64 insertions(+), 37 deletions(-) diff --git a/src/Storages/MergeTree/IExecutableTask.h b/src/Storages/MergeTree/IExecutableTask.h index d0c2d4a840e..738056e0ea0 100644 --- a/src/Storages/MergeTree/IExecutableTask.h +++ b/src/Storages/MergeTree/IExecutableTask.h @@ -32,8 +32,9 @@ public: using TaskResultCallback = std::function; virtual bool executeStep() = 0; virtual void onCompleted() = 0; - virtual StorageID getStorageID() = 0; - virtual Priority getPriority() = 0; + virtual StorageID getStorageID() const = 0; + virtual String getQueryId() const = 0; + virtual Priority getPriority() const = 0; virtual ~IExecutableTask() = default; }; @@ -63,12 +64,14 @@ public: } void onCompleted() override { job_result_callback(!res); } - StorageID getStorageID() override { return id; } - Priority getPriority() override + StorageID getStorageID() const override { return id; } + Priority getPriority() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "getPriority() method is not supported by LambdaAdapter"); } + String getQueryId() const override { return id.getShortName() + "::lambda"; } + private: bool res = false; std::function job_to_execute; diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 17582e7df98..9f54c554c85 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -287,7 +287,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() task_context = Context::createCopy(storage.getContext()); task_context->makeQueryContext(); - task_context->setCurrentQueryId(""); + task_context->setCurrentQueryId(getQueryId()); /// Add merge to list merge_mutate_entry = storage.getContext()->getMergeList().insert( diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.h b/src/Storages/MergeTree/MergeFromLogEntryTask.h index 62908f79fb4..16e69a568ba 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.h +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.h @@ -24,7 +24,7 @@ public: StorageReplicatedMergeTree & storage_, IExecutableTask::TaskResultCallback & task_result_callback_); - Priority getPriority() override { return priority; } + Priority getPriority() const override { return priority; } protected: /// Both return false if we can't execute merge. diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index 9302bdf11de..3f5753a0c95 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -3,8 +3,10 @@ #include #include #include +#include #include #include +#include namespace DB @@ -16,7 +18,7 @@ namespace ErrorCodes } -StorageID MergePlainMergeTreeTask::getStorageID() +StorageID MergePlainMergeTreeTask::getStorageID() const { return storage.getStorageID(); } @@ -77,7 +79,6 @@ bool MergePlainMergeTreeTask::executeStep() throw Exception(ErrorCodes::LOGICAL_ERROR, "Task with state SUCCESS mustn't be executed again"); } } - return false; } @@ -145,16 +146,28 @@ void MergePlainMergeTreeTask::finish() storage.merger_mutator.renameMergedTemporaryPart(new_part, future_part->parts, txn, transaction); transaction.commit(); + ThreadFuzzer::maybeInjectSleep(); + ThreadFuzzer::maybeInjectMemoryLimitException(); + write_part_log({}); storage.incrementMergedPartsProfileEvent(new_part->getType()); transfer_profile_counters_to_initial_query(); + + if (auto txn_ = txn_holder.getTransaction()) + { + /// Explicitly commit the transaction if we own it (it's a background merge, not OPTIMIZE) + TransactionLog::instance().commitTransaction(txn_, /* throw_on_unknown_status */ false); + ThreadFuzzer::maybeInjectSleep(); + ThreadFuzzer::maybeInjectMemoryLimitException(); + } + } ContextMutablePtr MergePlainMergeTreeTask::createTaskContext() const { auto context = Context::createCopy(storage.getContext()); context->makeQueryContext(); - auto queryId = storage.getStorageID().getShortName() + "::" + future_part->name; + auto queryId = getQueryId(); context->setCurrentQueryId(queryId); return context; } diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.h b/src/Storages/MergeTree/MergePlainMergeTreeTask.h index 95df8c90c9b..5cc9c0e50d3 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.h +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.h @@ -39,8 +39,9 @@ public: bool executeStep() override; void onCompleted() override; - StorageID getStorageID() override; - Priority getPriority() override { return priority; } + StorageID getStorageID() const override; + Priority getPriority() const override { return priority; } + String getQueryId() const override { return getStorageID().getShortName() + "::" + merge_mutate_entry->future_part->name; } void setCurrentTransaction(MergeTreeTransactionHolder && txn_holder_, MergeTreeTransactionPtr && txn_) { diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp index d4f8d1140a2..6eab4337162 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp @@ -136,7 +136,7 @@ bool MergeTreeBackgroundExecutor::trySchedule(ExecutableTaskPtr task) return true; } -void printExceptionWithRespectToAbort(Poco::Logger * log) +void printExceptionWithRespectToAbort(Poco::Logger * log, const String & query_id) { std::exception_ptr ex = std::current_exception(); @@ -155,14 +155,14 @@ void printExceptionWithRespectToAbort(Poco::Logger * log) if (e.code() == ErrorCodes::ABORTED) LOG_DEBUG(log, getExceptionMessageAndPattern(e, /* with_stacktrace */ false)); else - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, "Exception while executing background task {" + query_id + "}"); }); } catch (...) { NOEXCEPT_SCOPE({ ALLOW_ALLOCATIONS_IN_SCOPE; - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, "Exception while executing background task {" + query_id + "}"); }); } } @@ -239,7 +239,9 @@ void MergeTreeBackgroundExecutor::routine(TaskRuntimeDataPtr item) has_tasks.notify_one(); }; - auto release_task = [this, &erase_from_active, &on_task_done](TaskRuntimeDataPtr && item_) + String query_id; + + auto release_task = [this, &erase_from_active, &on_task_done, &query_id](TaskRuntimeDataPtr && item_) { std::lock_guard guard(mutex); @@ -256,7 +258,7 @@ void MergeTreeBackgroundExecutor::routine(TaskRuntimeDataPtr item) } catch (...) { - printExceptionWithRespectToAbort(log); + printExceptionWithRespectToAbort(log, query_id); } on_task_done(std::move(item_)); @@ -267,11 +269,12 @@ void MergeTreeBackgroundExecutor::routine(TaskRuntimeDataPtr item) try { ALLOW_ALLOCATIONS_IN_SCOPE; + item->task->getQueryId(); need_execute_again = item->task->executeStep(); } catch (...) { - printExceptionWithRespectToAbort(log); + printExceptionWithRespectToAbort(log, query_id); /// Release the task with exception context. /// An exception context is needed to proper delete write buffers without finalization release_task(std::move(item)); @@ -298,7 +301,7 @@ void MergeTreeBackgroundExecutor::routine(TaskRuntimeDataPtr item) } catch (...) { - printExceptionWithRespectToAbort(log); + printExceptionWithRespectToAbort(log, query_id); on_task_done(std::move(item)); return; } diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index ba55fb400ca..6cb9d50436e 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -191,7 +191,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() task_context = Context::createCopy(storage.getContext()); task_context->makeQueryContext(); - task_context->setCurrentQueryId(""); + task_context->setCurrentQueryId(getQueryId()); merge_mutate_entry = storage.getContext()->getMergeList().insert( storage.getStorageID(), diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.h b/src/Storages/MergeTree/MutateFromLogEntryTask.h index b6d3f5d4b6b..42d8307e948 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.h +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.h @@ -31,7 +31,7 @@ public: {} - Priority getPriority() override { return priority; } + Priority getPriority() const override { return priority; } private: diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 3180431d31b..bf8e879e3d0 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -13,7 +13,7 @@ namespace ErrorCodes } -StorageID MutatePlainMergeTreeTask::getStorageID() +StorageID MutatePlainMergeTreeTask::getStorageID() const { return storage.getStorageID(); } @@ -137,7 +137,7 @@ ContextMutablePtr MutatePlainMergeTreeTask::createTaskContext() const { auto context = Context::createCopy(storage.getContext()); context->makeQueryContext(); - auto queryId = storage.getStorageID().getShortName() + "::" + future_part->name; + auto queryId = getQueryId(); context->setCurrentQueryId(queryId); return context; } diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.h b/src/Storages/MergeTree/MutatePlainMergeTreeTask.h index bd03c276256..ef11780a873 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.h +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.h @@ -41,8 +41,9 @@ public: bool executeStep() override; void onCompleted() override; - StorageID getStorageID() override; - Priority getPriority() override { return priority; } + StorageID getStorageID() const override; + Priority getPriority() const override { return priority; } + String getQueryId() const override { return getStorageID().getShortName() + "::" + merge_mutate_entry->future_part->name; } private: diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index f4a071b8f27..be512884756 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -884,8 +884,9 @@ public: } void onCompleted() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } - StorageID getStorageID() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } - Priority getPriority() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + StorageID getStorageID() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + Priority getPriority() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + String getQueryId() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } bool executeStep() override { @@ -1206,8 +1207,9 @@ public: explicit MutateAllPartColumnsTask(MutationContextPtr ctx_) : ctx(ctx_) {} void onCompleted() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } - StorageID getStorageID() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } - Priority getPriority() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + StorageID getStorageID() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + Priority getPriority() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + String getQueryId() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } bool executeStep() override { @@ -1434,8 +1436,9 @@ public: explicit MutateSomePartColumnsTask(MutationContextPtr ctx_) : ctx(ctx_) {} void onCompleted() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } - StorageID getStorageID() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } - Priority getPriority() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + StorageID getStorageID() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + Priority getPriority() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + String getQueryId() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } bool executeStep() override { diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp index 61356558e16..b4748ee77ea 100644 --- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp @@ -16,7 +16,7 @@ namespace ErrorCodes extern const int PART_IS_TEMPORARILY_LOCKED; } -StorageID ReplicatedMergeMutateTaskBase::getStorageID() +StorageID ReplicatedMergeMutateTaskBase::getStorageID() const { return storage.getStorageID(); } diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h index 1e7f9834245..ba514f11f20 100644 --- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h +++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h @@ -33,7 +33,8 @@ public: ~ReplicatedMergeMutateTaskBase() override = default; void onCompleted() override; - StorageID getStorageID() override; + StorageID getStorageID() const override; + String getQueryId() const override { return getStorageID().getShortName() + "::" + selected_entry->log_entry->new_part_name; } bool executeStep() override; protected: diff --git a/src/Storages/MergeTree/tests/gtest_executor.cpp b/src/Storages/MergeTree/tests/gtest_executor.cpp index 5815b74284a..6f34eb4dfbd 100644 --- a/src/Storages/MergeTree/tests/gtest_executor.cpp +++ b/src/Storages/MergeTree/tests/gtest_executor.cpp @@ -39,7 +39,7 @@ public: return false; } - StorageID getStorageID() override + StorageID getStorageID() const override { return {"test", name}; } @@ -51,7 +51,8 @@ public: throw std::runtime_error("Unlucky..."); } - Priority getPriority() override { return {}; } + Priority getPriority() const override { return {}; } + String getQueryId() const override { return {}; } private: std::mt19937 generator; @@ -79,14 +80,15 @@ public: return --step_count; } - StorageID getStorageID() override + StorageID getStorageID() const override { return {"test", name}; } void onCompleted() override {} - Priority getPriority() override { return priority; } + Priority getPriority() const override { return priority; } + String getQueryId() const override { return "test::lambda"; } private: String name; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 4c0c0c8e3fa..add1d112c1a 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1269,7 +1269,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign { /// TODO Transactions: avoid beginning transaction if there is nothing to merge. txn = TransactionLog::instance().beginTransaction(); - transaction_for_merge = MergeTreeTransactionHolder{txn, /* autocommit = */ true}; + transaction_for_merge = MergeTreeTransactionHolder{txn, /* autocommit = */ false}; } bool has_mutations = false; From 44ae3a0986c941f234a7cb63468e77b626d10713 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sat, 8 Jul 2023 14:58:38 +0200 Subject: [PATCH 1379/1997] fix a bug in projections --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 13 ++++++++++++- src/Storages/MergeTree/IMergeTreeDataPart.h | 9 ++++++++- src/Storages/MergeTree/MergeTreeData.cpp | 14 +++++++++++++- src/Storages/MergeTree/MutateTask.cpp | 2 +- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 4 ++-- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 7 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index b9591864869..9309f0d4df6 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -312,15 +312,20 @@ IMergeTreeDataPart::IMergeTreeDataPart( const IMergeTreeDataPart * parent_part_) : DataPartStorageHolder(data_part_storage_) , storage(storage_) - , name(name_) + , mutable_name(name_) + , name(mutable_name) , info(info_) , index_granularity_info(storage_, part_type_) , part_type(part_type_) , parent_part(parent_part_) + , parent_part_name(parent_part ? parent_part->name : "") , use_metadata_cache(storage.use_metadata_cache) { if (parent_part) + { + chassert(parent_part_name.starts_with(parent_part->info.partition_id)); /// Make sure there's no prefix state = MergeTreeDataPartState::Active; + } incrementStateMetric(state); incrementTypeMetric(part_type); @@ -337,6 +342,12 @@ IMergeTreeDataPart::~IMergeTreeDataPart() decrementTypeMetric(part_type); } +void IMergeTreeDataPart::setName(const String & new_name) +{ + mutable_name = new_name; + for (auto & proj_part : projection_parts) + proj_part.second->parent_part_name = new_name; +} String IMergeTreeDataPart::getNewName(const MergeTreePartInfo & new_part_info) const { diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 92dbe084081..2c0cf37b3a5 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -200,9 +200,14 @@ public: /// If token is not empty, block id is calculated based on it instead of block data String getZeroLevelPartBlockID(std::string_view token) const; + void setName(const String & new_name); + const MergeTreeData & storage; - String name; +private: + String mutable_name; +public: + const String & name; // const ref to private mutable_name MergeTreePartInfo info; /// Part unique identifier. @@ -386,6 +391,7 @@ public: bool isProjectionPart() const { return parent_part != nullptr; } const IMergeTreeDataPart * getParentPart() const { return parent_part; } + String getParentPartName() const { return parent_part_name; } const std::map> & getProjectionParts() const { return projection_parts; } @@ -519,6 +525,7 @@ protected: /// Not null when it's a projection part. const IMergeTreeDataPart * parent_part; + String parent_part_name; std::map> projection_parts; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index b7fde55880e..f81726863b2 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7455,7 +7455,19 @@ void MergeTreeData::reportBrokenPart(MergeTreeData::DataPartPtr data_part) const return; if (data_part->isProjectionPart()) - data_part = data_part->getParentPart()->shared_from_this(); + { + String parent_part_name = data_part->getParentPartName(); + auto parent_part = getPartIfExists(parent_part_name, {DataPartState::PreActive, DataPartState::Active, DataPartState::Outdated}); + + if (!parent_part) + { + LOG_WARNING(log, "Did not find parent part {} for potentially broken projection part {}", + parent_part_name, data_part->getDataPartStorage().getFullPath()); + return; + } + + data_part = parent_part; + } if (data_part->getDataPartStorage().isBroken()) { diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index f4a071b8f27..41f767cc4de 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -917,7 +917,7 @@ public: { LOG_DEBUG(log, "Merged a projection part in level {}", current_level); selected_parts[0]->renameTo(projection.name + ".proj", true); - selected_parts[0]->name = projection.name; + selected_parts[0]->setName(projection.name); selected_parts[0]->is_temp = false; ctx->new_data_part->addProjectionPart(name, std::move(selected_parts[0])); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 4128654a632..22e2ab945eb 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -788,7 +788,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: part->info.level = 0; part->info.mutation = 0; - part->name = part->getNewName(part->info); + part->setName(part->getNewName(part->info)); StorageReplicatedMergeTree::LogEntry log_entry; @@ -914,7 +914,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: /// Note that it may also appear on filesystem right now in PreActive state due to concurrent inserts of the same data. /// It will be checked when we will try to rename directory. - part->name = existing_part_name; + part->setName(existing_part_name); part->info = MergeTreePartInfo::fromPartName(existing_part_name, storage.format_version); /// Used only for exception messages. block_number = part->info.min_block; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 4c0c0c8e3fa..d427a857f07 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -2260,7 +2260,7 @@ void StorageMergeTree::fillNewPartName(MutableDataPartPtr & part, DataPartsLock { part->info.min_block = part->info.max_block = increment.get(); part->info.mutation = 0; - part->name = part->getNewName(part->info); + part->setName(part->getNewName(part->info)); } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 2da18f69baf..8a21da69460 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -9262,7 +9262,7 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP } MergeTreeData::MutableDataPartPtr new_data_part = createEmptyPart(new_part_info, partition, lost_part_name, NO_TRANSACTION_PTR); - new_data_part->name = lost_part_name; + new_data_part->setName(lost_part_name); try { From 85531f32cfb5339c45dade1b84c2a20f0a694cfe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 19:32:44 +0300 Subject: [PATCH 1380/1997] Update 02804_clusterAllReplicas_insert.sql --- tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql b/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql index 05bda19eb9e..c39d9e7d78b 100644 --- a/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql +++ b/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql @@ -3,3 +3,4 @@ create table data (key Int) engine=Memory(); -- NOTE: internal_replication is false, so INSERT will be done only into one shard insert into function clusterAllReplicas(test_cluster_two_shards, currentDatabase(), data, rand()) values (2); select * from data order by key; +drop table data; From 2a8c7d0ea23e2b7a41d03d32b0fb44513fa309e0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 21:52:19 +0300 Subject: [PATCH 1381/1997] Update src/Parsers/ParserCreateQuery.cpp Co-authored-by: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> --- src/Parsers/ParserCreateQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index c4c02ab7417..415d3321eb5 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -304,7 +304,7 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E for (const auto & elem : list->children) { - if (auto *cd = elem->as()) + if (auto * cd = elem->as()) { if (cd->primary_key_specifier) { From a10aa9ad50db5bd3b95a7ebe4ccce4bf10c8e1f6 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 3 May 2023 10:09:11 +0200 Subject: [PATCH 1382/1997] Force libunwind usage (removes gcc_eh support) libunwind is reentrant and signal safe, and works faster then then gcc_eh (plus it has some custom patches for problems that have been found during it's usage in ClickHouse). gcc_eh may be missing in the system (if gcc was not installed), and even if it exists clickhouse uses -nodefaultlibs, so some care should be made to make it work. Also this library is tiny and there shouln't be any problem to require it always (there is already tendency to require some contrib libraries, i.e. poco). Signed-off-by: Azat Khuzhin --- CMakeLists.txt | 1 - cmake/darwin/default_libs.cmake | 1 + cmake/target.cmake | 1 - cmake/unwind.cmake | 14 +------------- contrib/jemalloc-cmake/CMakeLists.txt | 17 +++++++---------- contrib/libcxx-cmake/CMakeLists.txt | 4 +--- contrib/libcxxabi-cmake/CMakeLists.txt | 6 ++---- docker/test/fasttest/run.sh | 1 - docs/en/development/build-cross-riscv.md | 2 +- programs/server/Server.cpp | 6 +----- src/Common/QueryProfiler.cpp | 8 ++++---- src/Common/QueryProfiler.h | 4 ++-- src/Common/StackTrace.cpp | 9 +-------- src/Common/config.h.in | 1 - .../System/StorageSystemBuildOptions.cpp.in | 1 - 15 files changed, 21 insertions(+), 55 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 06ee98b5ee1..45c3c422d7a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,7 +87,6 @@ if (ENABLE_FUZZING) set (ENABLE_CLICKHOUSE_ODBC_BRIDGE OFF) set (ENABLE_LIBRARIES 0) set (ENABLE_SSL 1) - set (USE_UNWIND ON) set (ENABLE_EMBEDDED_COMPILER 0) set (ENABLE_EXAMPLES 0) set (ENABLE_UTILS 0) diff --git a/cmake/darwin/default_libs.cmake b/cmake/darwin/default_libs.cmake index 812847e6201..42b8473cb75 100644 --- a/cmake/darwin/default_libs.cmake +++ b/cmake/darwin/default_libs.cmake @@ -15,6 +15,7 @@ set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) +include (cmake/unwind.cmake) include (cmake/cxx.cmake) link_libraries(global-group) diff --git a/cmake/target.cmake b/cmake/target.cmake index 0791da87bf0..ffab08f1103 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -40,7 +40,6 @@ if (CMAKE_CROSSCOMPILING) set (OPENSSL_NO_ASM ON CACHE INTERNAL "") set (ENABLE_JEMALLOC ON CACHE INTERNAL "") set (ENABLE_PARQUET OFF CACHE INTERNAL "") - set (USE_UNWIND OFF CACHE INTERNAL "") set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_HDFS OFF CACHE INTERNAL "") set (ENABLE_MYSQL OFF CACHE INTERNAL "") diff --git a/cmake/unwind.cmake b/cmake/unwind.cmake index c9f5f30a5d6..84e4f01b752 100644 --- a/cmake/unwind.cmake +++ b/cmake/unwind.cmake @@ -1,13 +1 @@ -option (USE_UNWIND "Enable libunwind (better stacktraces)" ${ENABLE_LIBRARIES}) - -if (USE_UNWIND) - add_subdirectory(contrib/libunwind-cmake) - set (UNWIND_LIBRARIES unwind) - set (EXCEPTION_HANDLING_LIBRARY ${UNWIND_LIBRARIES}) - - message (STATUS "Using libunwind: ${UNWIND_LIBRARIES}") -else () - set (EXCEPTION_HANDLING_LIBRARY gcc_eh) -endif () - -message (STATUS "Using exception handler: ${EXCEPTION_HANDLING_LIBRARY}") +add_subdirectory(contrib/libunwind-cmake) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 97f723bb540..20025dfc63e 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -170,16 +170,13 @@ endif () target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_PROF=1) -if (USE_UNWIND) - # jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++. - # The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`. - # At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracing. - - # ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1). - - target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1) - target_link_libraries (_jemalloc PRIVATE unwind) -endif () +# jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++. +# The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`. +# At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracing. +# +# ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1). +target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1) +target_link_libraries (_jemalloc PRIVATE unwind) # for RTLD_NEXT target_compile_options(_jemalloc PRIVATE -D_GNU_SOURCE) diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index a13e4f0f60a..b7e59e2c9a3 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -61,9 +61,7 @@ target_include_directories(cxx SYSTEM BEFORE PUBLIC $<$:$ target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI) # Enable capturing stack traces for all exceptions. -if (USE_UNWIND) - target_compile_definitions(cxx PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) -endif () +target_compile_definitions(cxx PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) if (USE_MUSL) target_compile_definitions(cxx PUBLIC -D_LIBCPP_HAS_MUSL_LIBC=1) diff --git a/contrib/libcxxabi-cmake/CMakeLists.txt b/contrib/libcxxabi-cmake/CMakeLists.txt index 0473527912e..c7ee34e6e28 100644 --- a/contrib/libcxxabi-cmake/CMakeLists.txt +++ b/contrib/libcxxabi-cmake/CMakeLists.txt @@ -35,12 +35,10 @@ target_include_directories(cxxabi SYSTEM BEFORE ) target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY) target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast. -target_link_libraries(cxxabi PUBLIC ${EXCEPTION_HANDLING_LIBRARY}) +target_link_libraries(cxxabi PUBLIC unwind) # Enable capturing stack traces for all exceptions. -if (USE_UNWIND) - target_compile_definitions(cxxabi PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) -endif () +target_compile_definitions(cxxabi PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) install( TARGETS cxxabi diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 989ed9d2fbb..828c73e6781 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -166,7 +166,6 @@ function run_cmake "-DENABLE_UTILS=0" "-DENABLE_EMBEDDED_COMPILER=0" "-DENABLE_THINLTO=0" - "-DUSE_UNWIND=1" "-DENABLE_NURAFT=1" "-DENABLE_SIMDJSON=1" "-DENABLE_JEMALLOC=1" diff --git a/docs/en/development/build-cross-riscv.md b/docs/en/development/build-cross-riscv.md index e3550a046c7..c21353f7f73 100644 --- a/docs/en/development/build-cross-riscv.md +++ b/docs/en/development/build-cross-riscv.md @@ -23,7 +23,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ``` bash cd ClickHouse mkdir build-riscv64 -CC=clang-16 CXX=clang++-16 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF +CC=clang-16 CXX=clang++-16 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF ninja -C build-riscv64 ``` diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 7fbbcd39446..071f7d3177e 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1627,7 +1627,7 @@ try /// Init trace collector only after trace_log system table was created /// Disable it if we collect test coverage information, because it will work extremely slow. -#if USE_UNWIND && !WITH_COVERAGE +#if !WITH_COVERAGE /// Profilers cannot work reliably with any other libunwind or without PHDR cache. if (hasPHDRCache()) { @@ -1650,10 +1650,6 @@ try /// Describe multiple reasons when query profiler cannot work. -#if !USE_UNWIND - LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they cannot work without bundled unwind (stack unwinding) library."); -#endif - #if WITH_COVERAGE LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they work extremely slow with test coverage."); #endif diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index 313d4b77739..dc9f3610513 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -91,7 +91,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -#if USE_UNWIND +#ifndef __APPLE__ Timer::Timer() : log(&Poco::Logger::get("Timer")) {} @@ -209,13 +209,13 @@ QueryProfilerBase::QueryProfilerBase(UInt64 thread_id, int clock_t UNUSED(pause_signal); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler disabled because they cannot work under sanitizers"); -#elif !USE_UNWIND +#elif defined(__APPLE__) UNUSED(thread_id); UNUSED(clock_type); UNUSED(period); UNUSED(pause_signal); - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler cannot work with stock libunwind"); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler cannot work on OSX"); #else /// Sanity check. if (!hasPHDRCache()) @@ -264,7 +264,7 @@ QueryProfilerBase::~QueryProfilerBase() template void QueryProfilerBase::cleanup() { -#if USE_UNWIND +#ifndef __APPLE__ timer.stop(); signal_handler_disarmed = true; #endif diff --git a/src/Common/QueryProfiler.h b/src/Common/QueryProfiler.h index 6a9ed10e315..87432a4b699 100644 --- a/src/Common/QueryProfiler.h +++ b/src/Common/QueryProfiler.h @@ -28,7 +28,7 @@ namespace DB * Note that signal handler implementation is defined by template parameter. See QueryProfilerReal and QueryProfilerCPU. */ -#if USE_UNWIND +#ifndef __APPLE__ class Timer { public: @@ -60,7 +60,7 @@ private: Poco::Logger * log; -#if USE_UNWIND +#ifndef __APPLE__ inline static thread_local Timer timer = Timer(); #endif diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index aea0f854fe1..c13b63854e4 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -20,13 +20,10 @@ #include #include #include +#include #include "config.h" -#if USE_UNWIND -# include -#endif - namespace { /// Currently this variable is set up once on server startup. @@ -287,12 +284,8 @@ StackTrace::StackTrace(const ucontext_t & signal_context) void StackTrace::tryCapture() { -#if USE_UNWIND size = unw_backtrace(frame_pointers.data(), capacity); __msan_unpoison(frame_pointers.data(), size * sizeof(frame_pointers[0])); -#else - size = 0; -#endif } /// ClickHouse uses bundled libc++ so type names will be the same on every system thus it's safe to hardcode them diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 71b4e098c8f..1cb13d3ae3e 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -9,7 +9,6 @@ #cmakedefine01 USE_AWS_S3 #cmakedefine01 USE_AZURE_BLOB_STORAGE #cmakedefine01 USE_BROTLI -#cmakedefine01 USE_UNWIND #cmakedefine01 USE_CASSANDRA #cmakedefine01 USE_SENTRY #cmakedefine01 USE_GRPC diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in index c2d35c96ce5..4e7a25d7726 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -23,7 +23,6 @@ const char * auto_config_build[] "USE_EMBEDDED_COMPILER", "@USE_EMBEDDED_COMPILER@", "USE_GLIBC_COMPATIBILITY", "@GLIBC_COMPATIBILITY@", "USE_JEMALLOC", "@ENABLE_JEMALLOC@", - "USE_UNWIND", "@USE_UNWIND@", "USE_ICU", "@USE_ICU@", "USE_H3", "@USE_H3@", "USE_MYSQL", "@USE_MYSQL@", From 45d36b736a8d6b207fb9cf88f8f0ba8f2a7e0ce6 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 8 Jul 2023 23:14:02 +0000 Subject: [PATCH 1383/1997] Update version_date.tsv and changelogs after v23.6.2.18-stable --- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v23.6.2.18-stable.md | 25 +++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 2 ++ 5 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v23.6.2.18-stable.md diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index f13fcdc14d6..8a6324aef88 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.6.1.1524" +ARG VERSION="23.6.2.18" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 5e5be3f6d73..7f453627601 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.6.1.1524" +ARG VERSION="23.6.2.18" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 8693193455f..1fa7b83ae16 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.6.1.1524" +ARG VERSION="23.6.2.18" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v23.6.2.18-stable.md b/docs/changelogs/v23.6.2.18-stable.md new file mode 100644 index 00000000000..1f872a190ba --- /dev/null +++ b/docs/changelogs/v23.6.2.18-stable.md @@ -0,0 +1,25 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.6.2.18-stable (89f39a7ccfe) FIXME as compared to v23.6.1.1524-stable (d1c7e13d088) + +#### Build/Testing/Packaging Improvement +* Backported in [#51888](https://github.com/ClickHouse/ClickHouse/issues/51888): Update cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). +* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Remove the usage of Analyzer setting in the client [#51578](https://github.com/ClickHouse/ClickHouse/pull/51578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix 02116_tuple_element with Analyzer [#51669](https://github.com/ClickHouse/ClickHouse/pull/51669) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix SQLLogic docker images [#51719](https://github.com/ClickHouse/ClickHouse/pull/51719) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix source image for sqllogic [#51728](https://github.com/ClickHouse/ClickHouse/pull/51728) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Pin for docker-ce [#51743](https://github.com/ClickHouse/ClickHouse/pull/51743) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 2a098d8c1da..dd46f6103d0 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v23.6.2.18-stable 2023-07-09 v23.6.1.1524-stable 2023-06-30 v23.5.4.25-stable 2023-06-29 v23.5.3.24-stable 2023-06-17 @@ -55,6 +56,7 @@ v22.9.4.32-stable 2022-10-26 v22.9.3.18-stable 2022-09-30 v22.9.2.7-stable 2022-09-23 v22.9.1.2603-stable 2022-09-22 +v22.8.20.11-lts 2023-07-09 v22.8.19.10-lts 2023-06-17 v22.8.18.31-lts 2023-06-12 v22.8.17.17-lts 2023-04-22 From c968fe808fc1b7693e53bb3d4f9adc03f41c7066 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 8 Jul 2023 23:17:41 +0000 Subject: [PATCH 1384/1997] Update version_date.tsv and changelogs after v22.8.20.11-lts --- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v22.8.20.11-lts.md | 20 ++++++++++++++++++++ utils/list-versions/version_date.tsv | 2 ++ 5 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v22.8.20.11-lts.md diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index f13fcdc14d6..8a6324aef88 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.6.1.1524" +ARG VERSION="23.6.2.18" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 5e5be3f6d73..7f453627601 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.6.1.1524" +ARG VERSION="23.6.2.18" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 8693193455f..1fa7b83ae16 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.6.1.1524" +ARG VERSION="23.6.2.18" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v22.8.20.11-lts.md b/docs/changelogs/v22.8.20.11-lts.md new file mode 100644 index 00000000000..bd45ce9319a --- /dev/null +++ b/docs/changelogs/v22.8.20.11-lts.md @@ -0,0 +1,20 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.8.20.11-lts (c9ca79e24e8) FIXME as compared to v22.8.19.10-lts (989bc2fe8b0) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix broken index analysis when binary operator contains a null constant argument [#50177](https://github.com/ClickHouse/ClickHouse/pull/50177) ([Amos Bird](https://github.com/amosbird)). +* Fix incorrect constant folding [#50536](https://github.com/ClickHouse/ClickHouse/pull/50536) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix fuzzer failure in ActionsDAG [#51301](https://github.com/ClickHouse/ClickHouse/pull/51301) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix segfault in MathUnary [#51499](https://github.com/ClickHouse/ClickHouse/pull/51499) ([Ilya Yatsishin](https://github.com/qoega)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Decoupled commits from [#51180](https://github.com/ClickHouse/ClickHouse/issues/51180) for backports [#51561](https://github.com/ClickHouse/ClickHouse/pull/51561) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 2a098d8c1da..dd46f6103d0 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v23.6.2.18-stable 2023-07-09 v23.6.1.1524-stable 2023-06-30 v23.5.4.25-stable 2023-06-29 v23.5.3.24-stable 2023-06-17 @@ -55,6 +56,7 @@ v22.9.4.32-stable 2022-10-26 v22.9.3.18-stable 2022-09-30 v22.9.2.7-stable 2022-09-23 v22.9.1.2603-stable 2022-09-22 +v22.8.20.11-lts 2023-07-09 v22.8.19.10-lts 2023-06-17 v22.8.18.31-lts 2023-06-12 v22.8.17.17-lts 2023-04-22 From 62bfa4ed93fb3796eccb0df041a9dfa057583c9b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 9 Jul 2023 02:21:48 +0200 Subject: [PATCH 1385/1997] Fix performance test for regexp cache --- src/Functions/Regexps.h | 4 +++- tests/performance/re2_regex_caching.xml | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index 4bfd10bdbf5..aa8ae5b4054 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -21,6 +22,7 @@ # include #endif + namespace ProfileEvents { extern const Event RegexpCreated; @@ -86,7 +88,7 @@ public: private: constexpr static size_t CACHE_SIZE = 100; /// collision probability - std::hash hasher; + DefaultHash hasher; struct Bucket { String pattern; /// key diff --git a/tests/performance/re2_regex_caching.xml b/tests/performance/re2_regex_caching.xml index 6edc83097ba..9778a8d4c0c 100644 --- a/tests/performance/re2_regex_caching.xml +++ b/tests/performance/re2_regex_caching.xml @@ -24,8 +24,8 @@ '.*' || toString(number) || '.' '.*' || toString(number % 10) || '.' - - '([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?([^ @]+)@([^ @]+)([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])' || toString(number) + + '([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?([^ @]+)@([^ @]+)([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])' || toString(number % 10) + + s3 + s3_common_disk/ + http://localhost:11111/test/common/ + clickhouse + clickhouse + 20000 + s3 s3_disk/ diff --git a/tests/queries/0_stateless/02801_backup_native_copy.reference b/tests/queries/0_stateless/02801_backup_native_copy.reference new file mode 100644 index 00000000000..659df5e9b25 --- /dev/null +++ b/tests/queries/0_stateless/02801_backup_native_copy.reference @@ -0,0 +1,4 @@ +BACKUP TABLE data TO S3(s3_conn, \'backups/default/data_native_copy\') SETTINGS native_copy = 1 1 +BACKUP TABLE data TO S3(s3_conn, \'backups/default/data_no_native_copy\') SETTINGS native_copy = 0 0 +RESTORE TABLE data AS data_native_copy FROM S3(s3_conn, \'backups/default/data_native_copy\') SETTINGS native_copy = 1 1 +RESTORE TABLE data AS data_no_native_copy FROM S3(s3_conn, \'backups/default/data_no_native_copy\') SETTINGS native_copy = 0 0 diff --git a/tests/queries/0_stateless/02801_backup_native_copy.sh b/tests/queries/0_stateless/02801_backup_native_copy.sh new file mode 100755 index 00000000000..966d7ae9ce8 --- /dev/null +++ b/tests/queries/0_stateless/02801_backup_native_copy.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag: no-fasttest - requires S3 + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +set -e + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists data; + create table data (key Int) engine=MergeTree() order by tuple() settings disk='s3_common_disk'; + insert into data select * from numbers(10); +" + +query_id=$(random_str 10) +$CLICKHOUSE_CLIENT --format Null --query_id $query_id -q "BACKUP TABLE data TO S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_native_copy') SETTINGS native_copy=true" +$CLICKHOUSE_CLIENT -nm -q " + SYSTEM FLUSH LOGS; + SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' +" + +query_id=$(random_str 10) +$CLICKHOUSE_CLIENT --format Null --query_id $query_id -q "BACKUP TABLE data TO S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_no_native_copy') SETTINGS native_copy=false" +$CLICKHOUSE_CLIENT -nm -q " + SYSTEM FLUSH LOGS; + SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' +" + +query_id=$(random_str 10) +$CLICKHOUSE_CLIENT --send_logs_level=error --format Null --query_id $query_id -q "RESTORE TABLE data AS data_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_native_copy') SETTINGS native_copy=true" +$CLICKHOUSE_CLIENT -nm -q " + SYSTEM FLUSH LOGS; + SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' +" + +query_id=$(random_str 10) +$CLICKHOUSE_CLIENT --send_logs_level=error --format Null --query_id $query_id -q "RESTORE TABLE data AS data_no_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_no_native_copy') SETTINGS native_copy=false" +$CLICKHOUSE_CLIENT -nm -q " + SYSTEM FLUSH LOGS; + SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' +" From 5835e72fd6d5dd0225a0dda2f81887d6f61015fb Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 27 Jun 2023 16:20:27 +0200 Subject: [PATCH 1395/1997] More generic approach to disable native copy Previous patch implements this only for BACKUP/RESTORE, but it can be useful for regular disks as well, so add allow_native_copy for disks. Note, that there is s3_allow_native_copy query setting, since it looks redundant, since it make sense only for S3 disks, and not on a per query basis. Signed-off-by: Azat Khuzhin --- src/Backups/BackupIO_S3.cpp | 34 +++++++++++++------ src/Backups/BackupIO_S3.h | 6 ++-- .../ObjectStorages/S3/S3ObjectStorage.cpp | 14 ++++++-- src/IO/S3/copyS3File.cpp | 22 +++++++++++- src/IO/S3/copyS3File.h | 20 +++++++++-- src/Storages/StorageS3Settings.cpp | 2 ++ src/Storages/StorageS3Settings.h | 1 + 7 files changed, 80 insertions(+), 19 deletions(-) diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 01e6bc78949..6531948c872 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -101,15 +101,16 @@ namespace BackupReaderS3::BackupReaderS3( - const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy_, const ContextPtr & context_) + const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy, const ContextPtr & context_) : BackupReaderDefault(&Poco::Logger::get("BackupReaderS3"), context_) , s3_uri(s3_uri_) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings) , data_source_description{DataSourceType::S3, s3_uri.endpoint, false, false} - , native_copy(native_copy_) { + request_settings.updateFromSettings(context_->getSettingsRef()); request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint + request_settings.allow_native_copy = native_copy; } BackupReaderS3::~BackupReaderS3() = default; @@ -139,11 +140,10 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s /// Use the native copy as a more optimal way to copy a file from S3 to S3 if it's possible. /// We don't check for `has_throttling` here because the native copy almost doesn't use network. auto destination_data_source_description = destination_disk->getDataSourceDescription(); - if (native_copy && destination_data_source_description.sameKind(data_source_description) + if (destination_data_source_description.sameKind(data_source_description) && (destination_data_source_description.is_encrypted == encrypted_in_backup)) { - /// Use native copy, the more optimal way. - LOG_TRACE(log, "Copying {} from S3 to disk {} using native copy", path_in_backup, destination_disk->getName()); + LOG_TRACE(log, "Copying {} from S3 to disk {}", path_in_backup, destination_disk->getName()); auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional & object_attributes) -> size_t { /// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files. @@ -152,7 +152,13 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s "Blob writing function called with unexpected blob_path.size={} or mode={}", blob_path.size(), mode); + auto create_read_buffer = [this, path_in_backup] + { + return readFile(path_in_backup); + }; + copyS3File( + create_read_buffer, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, @@ -178,16 +184,16 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s BackupWriterS3::BackupWriterS3( - const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy_, const ContextPtr & context_) + const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy, const ContextPtr & context_) : BackupWriterDefault(&Poco::Logger::get("BackupWriterS3"), context_) , s3_uri(s3_uri_) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings) , data_source_description{DataSourceType::S3, s3_uri.endpoint, false, false} - , native_copy(native_copy_) { request_settings.updateFromSettings(context_->getSettingsRef()); request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint + request_settings.allow_native_copy = native_copy; } void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path, @@ -196,15 +202,23 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src /// Use the native copy as a more optimal way to copy a file from S3 to S3 if it's possible. /// We don't check for `has_throttling` here because the native copy almost doesn't use network. auto source_data_source_description = src_disk->getDataSourceDescription(); - if (native_copy && source_data_source_description.sameKind(data_source_description) && (source_data_source_description.is_encrypted == copy_encrypted)) + if (source_data_source_description.sameKind(data_source_description) && (source_data_source_description.is_encrypted == copy_encrypted)) { /// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in S3 bucket. /// In this case we can't use the native copy. if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2) { - /// Use native copy, the more optimal way. - LOG_TRACE(log, "Copying file {} from disk {} to S3 using native copy", src_path, src_disk->getName()); + auto create_read_buffer = [src_disk, src_path, copy_encrypted, settings = read_settings.adjustBufferSize(start_pos + length)] + { + if (copy_encrypted) + return src_disk->readEncryptedFile(src_path, settings); + else + return src_disk->readFile(src_path, settings); + }; + + LOG_TRACE(log, "Copying file {} from disk {} to S3", src_path, src_disk->getName()); copyS3File( + create_read_buffer, client, /* src_bucket */ blob_path[1], /* src_key= */ blob_path[0], diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h index d02e45370f9..16b2abfea3d 100644 --- a/src/Backups/BackupIO_S3.h +++ b/src/Backups/BackupIO_S3.h @@ -17,7 +17,7 @@ namespace DB class BackupReaderS3 : public BackupReaderDefault { public: - BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy_, const ContextPtr & context_); + BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy, const ContextPtr & context_); ~BackupReaderS3() override; bool fileExists(const String & file_name) override; @@ -32,14 +32,13 @@ private: const std::shared_ptr client; S3Settings::RequestSettings request_settings; const DataSourceDescription data_source_description; - const bool native_copy; }; class BackupWriterS3 : public BackupWriterDefault { public: - BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy_, const ContextPtr & context_); + BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy, const ContextPtr & context_); ~BackupWriterS3() override; bool fileExists(const String & file_name) override; @@ -62,7 +61,6 @@ private: S3Settings::RequestSettings request_settings; std::optional supports_batch_delete; const DataSourceDescription data_source_description; - const bool native_copy; }; } diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index e46ca3d0828..3c19af188dc 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -435,7 +435,12 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT auto settings_ptr = s3_settings.get(); auto size = S3::getObjectSize(*client_ptr, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "S3ObjStor_copy"); - copyS3File(client_ptr, bucket, object_from.remote_path, 0, size, dest_s3->bucket, object_to.remote_path, + auto create_read_buffer = [this, object_from] + { + return readObject(object_from); + }; + + copyS3File(create_read_buffer, client_ptr, bucket, object_from.remote_path, 0, size, dest_s3->bucket, object_to.remote_path, settings_ptr->request_settings, object_to_attributes, scheduler, /* for_disk_s3= */ true); } else @@ -451,7 +456,12 @@ void S3ObjectStorage::copyObject( // NOLINT auto settings_ptr = s3_settings.get(); auto size = S3::getObjectSize(*client_ptr, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "S3ObjStor_copy"); - copyS3File(client_ptr, bucket, object_from.remote_path, 0, size, bucket, object_to.remote_path, + auto create_read_buffer = [this, object_from] + { + return readObject(object_from); + }; + + copyS3File(create_read_buffer, client_ptr, bucket, object_from.remote_path, 0, size, bucket, object_to.remote_path, settings_ptr->request_settings, object_to_attributes, scheduler, /* for_disk_s3= */ true); } diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index 7886b84cd00..3f18d3b2145 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -809,7 +809,7 @@ void copyDataToS3File( } -void copyS3File( +void copyS3FileNative( const std::shared_ptr & s3_client, const String & src_bucket, const String & src_key, @@ -826,6 +826,26 @@ void copyS3File( helper.performCopy(); } +void copyS3File( + const CreateReadBuffer & create_read_buffer, + const std::shared_ptr & s3_client, + const String & src_bucket, + const String & src_key, + size_t src_offset, + size_t src_size, + const String & dest_bucket, + const String & dest_key, + const S3Settings::RequestSettings & settings, + const std::optional> & object_metadata, + ThreadPoolCallbackRunner schedule, + bool for_disk_s3) +{ + if (settings.allow_native_copy) + copyS3FileNative(s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3); + else + copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3); +} + } #endif diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h index 618ef419a9b..d41f34c103c 100644 --- a/src/IO/S3/copyS3File.h +++ b/src/IO/S3/copyS3File.h @@ -19,9 +19,9 @@ using CreateReadBuffer = std::function()>; /// Copies a file from S3 to S3. /// The same functionality can be done by using the function copyData() and the classes ReadBufferFromS3 and WriteBufferFromS3 -/// however copyS3File() is faster and spends less network traffic and memory. +/// however copyS3FileNative() is faster and spends less network traffic and memory. /// The parameters `src_offset` and `src_size` specify a part in the source to copy. -void copyS3File( +void copyS3FileNative( const std::shared_ptr & s3_client, const String & src_bucket, const String & src_key, @@ -51,6 +51,22 @@ void copyDataToS3File( ThreadPoolCallbackRunner schedule_ = {}, bool for_disk_s3 = false); +/// Tries to copy file using native copy (copyS3FileNative()), if this is not +/// possible it will fallback to read-write copy (copyDataToS3File()) +void copyS3File( + const CreateReadBuffer & create_read_buffer, + const std::shared_ptr & s3_client, + const String & src_bucket, + const String & src_key, + size_t src_offset, + size_t src_size, + const String & dest_bucket, + const String & dest_key, + const S3Settings::RequestSettings & settings, + const std::optional> & object_metadata = std::nullopt, + ThreadPoolCallbackRunner schedule_ = {}, + bool for_disk_s3 = false); + } #endif diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 89e6ee46b4d..0dc8d8d897b 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -182,6 +182,7 @@ S3Settings::RequestSettings::RequestSettings(const NamedCollection & collection) max_single_read_retries = collection.getOrDefault("max_single_read_retries", max_single_read_retries); max_connections = collection.getOrDefault("max_connections", max_connections); list_object_keys_size = collection.getOrDefault("list_object_keys_size", list_object_keys_size); + allow_native_copy = collection.getOrDefault("allow_native_copy", allow_native_copy); throw_on_zero_files_match = collection.getOrDefault("throw_on_zero_files_match", throw_on_zero_files_match); } @@ -197,6 +198,7 @@ S3Settings::RequestSettings::RequestSettings( max_connections = config.getUInt64(key + "max_connections", settings.s3_max_connections); check_objects_after_upload = config.getBool(key + "check_objects_after_upload", settings.s3_check_objects_after_upload); list_object_keys_size = config.getUInt64(key + "list_object_keys_size", settings.s3_list_object_keys_size); + allow_native_copy = config.getBool(key + "allow_native_copy", allow_native_copy); throw_on_zero_files_match = config.getBool(key + "throw_on_zero_files_match", settings.s3_throw_on_zero_files_match); retry_attempts = config.getUInt64(key + "retry_attempts", settings.s3_retry_attempts); request_timeout_ms = config.getUInt64(key + "request_timeout_ms", settings.s3_request_timeout_ms); diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 991e323acb6..581665a7dc5 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -71,6 +71,7 @@ struct S3Settings size_t retry_attempts = 10; size_t request_timeout_ms = 3000; size_t long_request_timeout_ms = 30000; // TODO: Take this from config like request_timeout_ms + bool allow_native_copy = true; bool throw_on_zero_files_match = false; From 5d63b8be0d317af7b2ee1fdfd7dc76daeeec3afd Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 27 Jun 2023 16:48:30 +0200 Subject: [PATCH 1396/1997] Add a test for allow_native_copy using clickhouse-disks (first ever) Signed-off-by: Azat Khuzhin --- .../02802_clickhouse_disks_s3_copy.reference | 4 +++ .../02802_clickhouse_disks_s3_copy.sh | 26 +++++++++++++++++++ .../02802_clickhouse_disks_s3_copy.xml | 21 +++++++++++++++ 3 files changed, 51 insertions(+) create mode 100644 tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.reference create mode 100755 tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh create mode 100644 tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.xml diff --git a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.reference b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.reference new file mode 100644 index 00000000000..96860a2f90a --- /dev/null +++ b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.reference @@ -0,0 +1,4 @@ +s3_plain_native_copy +Single operation copy has completed. +s3_plain_no_native_copy +Single part upload has completed. diff --git a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh new file mode 100755 index 00000000000..f879b7a5621 --- /dev/null +++ b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: requires S3 + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +config="${BASH_SOURCE[0]/.sh/.xml}" + +function run_test_for_disk() +{ + local disk=$1 && shift + + echo "$disk" + + clickhouse-disks -C "$config" --disk "$disk" write --input "$config" $CLICKHOUSE_DATABASE/test + clickhouse-disks -C "$config" --log-level test --disk "$disk" copy $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy |& { + grep -o -e "Single part upload has completed." -e "Single operation copy has completed." + } + clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test + clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test.copy +} + +run_test_for_disk s3_plain_native_copy +run_test_for_disk s3_plain_no_native_copy diff --git a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.xml b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.xml new file mode 100644 index 00000000000..d4235a70903 --- /dev/null +++ b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.xml @@ -0,0 +1,21 @@ + + + + + s3_plain + http://localhost:11111/test/clickhouse-disks/ + clickhouse + clickhouse + true + + + + s3_plain + http://localhost:11111/test/clickhouse-disks/ + clickhouse + clickhouse + false + + + + From 84c720b33e9ffe44c79658af57f5985b38b8a728 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 10:52:00 +0200 Subject: [PATCH 1397/1997] Beatify test_backup_restore_s3 (using per-query profile events) Signed-off-by: Azat Khuzhin --- .../test_backup_restore_s3/test.py | 148 +++++++++--------- 1 file changed, 75 insertions(+), 73 deletions(-) diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 0285500d044..bb14fa4824b 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -2,6 +2,7 @@ from typing import Dict, Iterable import pytest from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV +import uuid cluster = ClickHouseCluster(__file__) @@ -37,32 +38,31 @@ def new_backup_name(): return f"backup{backup_id_counter}" -def get_events(events_names: Iterable[str]) -> Dict[str, int]: - _events = TSV( +def get_events_for_query(query_id: str) -> Dict[str, int]: + events = TSV( node.query( - f"SELECT event, value FROM system.events WHERE event in {events_names} SETTINGS system_events_show_zero_values = 1;" + f""" + SYSTEM FLUSH LOGS; + + WITH arrayJoin(ProfileEvents) as pe + SELECT pe.1, pe.2 + FROM system.query_log + WHERE query_id = '{query_id}' + """ ) ) return { event: int(value) - for event, value in [line.split("\t") for line in _events.lines] + for event, value in [line.split("\t") for line in events.lines] } def check_backup_and_restore( - storage_policy, backup_destination, size=1000, backup_name=None, check_events=False + storage_policy, + backup_destination, + size=1000, + backup_name=None, ): - s3_backup_events = ( - "WriteBufferFromS3Microseconds", - "WriteBufferFromS3Bytes", - "WriteBufferFromS3RequestsErrors", - ) - s3_restore_events = ( - "ReadBufferFromS3Microseconds", - "ReadBufferFromS3Bytes", - "ReadBufferFromS3RequestsErrors", - ) - node.query( f""" DROP TABLE IF EXISTS data SYNC; @@ -72,16 +72,17 @@ def check_backup_and_restore( """ ) try: - events_before_backups = get_events(s3_backup_events) - node.query(f"BACKUP TABLE data TO {backup_destination}") - events_after_backups = get_events(s3_backup_events) - events_before_restore = get_events(s3_restore_events) + backup_query_id = uuid.uuid4().hex + node.query( + f"BACKUP TABLE data TO {backup_destination}", query_id=backup_query_id + ) + restore_query_id = uuid.uuid4().hex node.query( f""" RESTORE TABLE data AS data_restored FROM {backup_destination}; - """ + """, + query_id=restore_query_id, ) - events_after_restore = get_events(s3_restore_events) node.query( """ SELECT throwIf( @@ -91,55 +92,10 @@ def check_backup_and_restore( ); """ ) - if check_events and backup_name: - objects = node.cluster.minio_client.list_objects( - "root", f"data/backups/multipart/{backup_name}/" - ) - backup_meta_size = 0 - for obj in objects: - if ".backup" in obj.object_name: - backup_meta_size = obj.size - break - backup_total_size = int( - node.query( - f"SELECT sum(total_size) FROM system.backups WHERE status = 'BACKUP_CREATED' AND name like '%{backup_name}%'" - ).strip() - ) - restore_total_size = int( - node.query( - f"SELECT sum(total_size) FROM system.backups WHERE status = 'RESTORED' AND name like '%{backup_name}%'" - ).strip() - ) - # backup - # NOTE: ~35 bytes is used by .lock file, so set up 100 bytes to avoid flaky test - assert ( - abs( - backup_total_size - - ( - events_after_backups["WriteBufferFromS3Bytes"] - - events_before_backups["WriteBufferFromS3Bytes"] - - backup_meta_size - ) - ) - < 100 - ) - assert ( - events_after_backups["WriteBufferFromS3Microseconds"] - > events_before_backups["WriteBufferFromS3Microseconds"] - ) - assert events_after_backups["WriteBufferFromS3RequestsErrors"] == 0 - # restore - assert ( - events_after_restore["ReadBufferFromS3Bytes"] - - events_before_restore["ReadBufferFromS3Bytes"] - - backup_meta_size - == restore_total_size - ) - assert ( - events_after_restore["ReadBufferFromS3Microseconds"] - > events_before_restore["ReadBufferFromS3Microseconds"] - ) - assert events_after_restore["ReadBufferFromS3RequestsErrors"] == 0 + return [ + get_events_for_query(backup_query_id), + get_events_for_query(restore_query_id), + ] finally: node.query( """ @@ -224,17 +180,63 @@ def test_backup_to_s3_multipart(): storage_policy = "default" backup_name = new_backup_name() backup_destination = f"S3('http://minio1:9001/root/data/backups/multipart/{backup_name}', 'minio', 'minio123')" - check_backup_and_restore( + (backup_events, restore_events) = check_backup_and_restore( storage_policy, backup_destination, size=1000000, backup_name=backup_name, - check_events=True, ) assert node.contains_in_log( f"copyDataToS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}" ) + s3_backup_events = ( + "WriteBufferFromS3Microseconds", + "WriteBufferFromS3Bytes", + "WriteBufferFromS3RequestsErrors", + ) + s3_restore_events = ( + "ReadBufferFromS3Microseconds", + "ReadBufferFromS3Bytes", + "ReadBufferFromS3RequestsErrors", + ) + + objects = node.cluster.minio_client.list_objects( + "root", f"data/backups/multipart/{backup_name}/" + ) + backup_meta_size = 0 + for obj in objects: + if ".backup" in obj.object_name: + backup_meta_size = obj.size + break + backup_total_size = int( + node.query( + f"SELECT sum(total_size) FROM system.backups WHERE status = 'BACKUP_CREATED' AND name like '%{backup_name}%'" + ).strip() + ) + restore_total_size = int( + node.query( + f"SELECT sum(total_size) FROM system.backups WHERE status = 'RESTORED' AND name like '%{backup_name}%'" + ).strip() + ) + # backup + # NOTE: ~35 bytes is used by .lock file, so set up 100 bytes to avoid flaky test + assert ( + abs( + backup_total_size + - (backup_events["WriteBufferFromS3Bytes"] - backup_meta_size) + ) + < 100 + ) + assert backup_events["WriteBufferFromS3Microseconds"] > 0 + assert "WriteBufferFromS3RequestsErrors" not in backup_events + # restore + assert ( + restore_events["ReadBufferFromS3Bytes"] - backup_meta_size == restore_total_size + ) + assert restore_events["ReadBufferFromS3Microseconds"] > 0 + assert "ReadBufferFromS3RequestsErrors" not in restore_events + def test_backup_to_s3_native_copy(): storage_policy = "policy_s3" From 29dc9abfcab495f66689826fdbb8ee7a81ab4c7d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 10:58:23 +0200 Subject: [PATCH 1398/1997] Fix test_backup_restore_s3 after logging for native copying changed Check profile events instead of some odd logs. Signed-off-by: Azat Khuzhin --- .../test_backup_restore_s3/test.py | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index bb14fa4824b..8701bf0d832 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -244,9 +244,12 @@ def test_backup_to_s3_native_copy(): backup_destination = ( f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" ) - check_backup_and_restore(storage_policy, backup_destination) - assert node.contains_in_log("BackupWriterS3.*using native copy") - assert node.contains_in_log("BackupReaderS3.*using native copy") + (backup_events, restore_events) = check_backup_and_restore( + storage_policy, backup_destination + ) + # single part upload + assert backup_events["S3CopyObject"] > 0 + assert restore_events["S3CopyObject"] > 0 assert node.contains_in_log( f"copyS3File: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}" ) @@ -258,9 +261,12 @@ def test_backup_to_s3_native_copy_other_bucket(): backup_destination = ( f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" ) - check_backup_and_restore(storage_policy, backup_destination) - assert node.contains_in_log("BackupWriterS3.*using native copy") - assert node.contains_in_log("BackupReaderS3.*using native copy") + (backup_events, restore_events) = check_backup_and_restore( + storage_policy, backup_destination + ) + # single part upload + assert backup_events["S3CopyObject"] > 0 + assert restore_events["S3CopyObject"] > 0 assert node.contains_in_log( f"copyS3File: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}" ) @@ -270,9 +276,12 @@ def test_backup_to_s3_native_copy_multipart(): storage_policy = "policy_s3" backup_name = new_backup_name() backup_destination = f"S3('http://minio1:9001/root/data/backups/multipart/{backup_name}', 'minio', 'minio123')" - check_backup_and_restore(storage_policy, backup_destination, size=1000000) - assert node.contains_in_log("BackupWriterS3.*using native copy") - assert node.contains_in_log("BackupReaderS3.*using native copy") + (backup_events, restore_events) = check_backup_and_restore( + storage_policy, backup_destination, size=1000000 + ) + # multi part upload + assert backup_events["S3CreateMultipartUpload"] > 0 + assert restore_events["S3CreateMultipartUpload"] > 0 assert node.contains_in_log( f"copyS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}/" ) From 1590ffa3b1eee26d66ae3aec3ac32c63acdea153 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 17:22:57 +0200 Subject: [PATCH 1399/1997] Remove copyS3FileNative() Signed-off-by: Azat Khuzhin --- src/IO/S3/copyS3File.cpp | 22 ++++------------------ src/IO/S3/copyS3File.h | 25 +++++++------------------ 2 files changed, 11 insertions(+), 36 deletions(-) diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index 3f18d3b2145..2c6557d97e7 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -809,23 +809,6 @@ void copyDataToS3File( } -void copyS3FileNative( - const std::shared_ptr & s3_client, - const String & src_bucket, - const String & src_key, - size_t src_offset, - size_t src_size, - const String & dest_bucket, - const String & dest_key, - const S3Settings::RequestSettings & settings, - const std::optional> & object_metadata, - ThreadPoolCallbackRunner schedule, - bool for_disk_s3) -{ - CopyFileHelper helper{s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3}; - helper.performCopy(); -} - void copyS3File( const CreateReadBuffer & create_read_buffer, const std::shared_ptr & s3_client, @@ -841,7 +824,10 @@ void copyS3File( bool for_disk_s3) { if (settings.allow_native_copy) - copyS3FileNative(s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3); + { + CopyFileHelper helper{s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3}; + helper.performCopy(); + } else copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3); } diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h index d41f34c103c..2c848076e9b 100644 --- a/src/IO/S3/copyS3File.h +++ b/src/IO/S3/copyS3File.h @@ -19,9 +19,14 @@ using CreateReadBuffer = std::function()>; /// Copies a file from S3 to S3. /// The same functionality can be done by using the function copyData() and the classes ReadBufferFromS3 and WriteBufferFromS3 -/// however copyS3FileNative() is faster and spends less network traffic and memory. +/// however copyS3File() is faster and spends less network traffic and memory. /// The parameters `src_offset` and `src_size` specify a part in the source to copy. -void copyS3FileNative( +/// +/// Note, that it tries to copy file using native copy (CopyObject), but if it +/// has been disabled (with settings.allow_native_copy) it is fallbacks to +/// read-write copy (copyDataToS3File()). +void copyS3File( + const CreateReadBuffer & create_read_buffer, const std::shared_ptr & s3_client, const String & src_bucket, const String & src_key, @@ -51,22 +56,6 @@ void copyDataToS3File( ThreadPoolCallbackRunner schedule_ = {}, bool for_disk_s3 = false); -/// Tries to copy file using native copy (copyS3FileNative()), if this is not -/// possible it will fallback to read-write copy (copyDataToS3File()) -void copyS3File( - const CreateReadBuffer & create_read_buffer, - const std::shared_ptr & s3_client, - const String & src_bucket, - const String & src_key, - size_t src_offset, - size_t src_size, - const String & dest_bucket, - const String & dest_key, - const S3Settings::RequestSettings & settings, - const std::optional> & object_metadata = std::nullopt, - ThreadPoolCallbackRunner schedule_ = {}, - bool for_disk_s3 = false); - } #endif From 559d3281782c22fa380e85e188d2a15e404a4c19 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 17:16:02 +0200 Subject: [PATCH 1400/1997] Rename BACKUP setting native_copy to allow_s3_native_copy Signed-off-by: Azat Khuzhin --- src/Backups/BackupFactory.h | 2 +- src/Backups/BackupIO_S3.cpp | 8 ++++---- src/Backups/BackupIO_S3.h | 4 ++-- src/Backups/BackupSettings.cpp | 2 +- src/Backups/BackupSettings.h | 2 +- src/Backups/BackupsWorker.cpp | 4 ++-- src/Backups/RestoreSettings.cpp | 2 +- src/Backups/RestoreSettings.h | 2 +- src/Backups/registerBackupEngineS3.cpp | 4 ++-- .../0_stateless/02801_backup_native_copy.reference | 8 ++++---- tests/queries/0_stateless/02801_backup_native_copy.sh | 8 ++++---- 11 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/Backups/BackupFactory.h b/src/Backups/BackupFactory.h index 642f5cb07b9..e95aeddb086 100644 --- a/src/Backups/BackupFactory.h +++ b/src/Backups/BackupFactory.h @@ -35,7 +35,7 @@ public: std::shared_ptr backup_coordination; std::optional backup_uuid; bool deduplicate_files = true; - bool native_copy = true; + bool allow_s3_native_copy = true; }; static BackupFactory & instance(); diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 6531948c872..60fea9e2008 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -101,7 +101,7 @@ namespace BackupReaderS3::BackupReaderS3( - const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy, const ContextPtr & context_) + const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_) : BackupReaderDefault(&Poco::Logger::get("BackupReaderS3"), context_) , s3_uri(s3_uri_) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) @@ -110,7 +110,7 @@ BackupReaderS3::BackupReaderS3( { request_settings.updateFromSettings(context_->getSettingsRef()); request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint - request_settings.allow_native_copy = native_copy; + request_settings.allow_native_copy = allow_s3_native_copy; } BackupReaderS3::~BackupReaderS3() = default; @@ -184,7 +184,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s BackupWriterS3::BackupWriterS3( - const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy, const ContextPtr & context_) + const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_) : BackupWriterDefault(&Poco::Logger::get("BackupWriterS3"), context_) , s3_uri(s3_uri_) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) @@ -193,7 +193,7 @@ BackupWriterS3::BackupWriterS3( { request_settings.updateFromSettings(context_->getSettingsRef()); request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint - request_settings.allow_native_copy = native_copy; + request_settings.allow_native_copy = allow_s3_native_copy; } void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path, diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h index 16b2abfea3d..a93d6119786 100644 --- a/src/Backups/BackupIO_S3.h +++ b/src/Backups/BackupIO_S3.h @@ -17,7 +17,7 @@ namespace DB class BackupReaderS3 : public BackupReaderDefault { public: - BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy, const ContextPtr & context_); + BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_); ~BackupReaderS3() override; bool fileExists(const String & file_name) override; @@ -38,7 +38,7 @@ private: class BackupWriterS3 : public BackupWriterDefault { public: - BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy, const ContextPtr & context_); + BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_); ~BackupWriterS3() override; bool fileExists(const String & file_name) override; diff --git a/src/Backups/BackupSettings.cpp b/src/Backups/BackupSettings.cpp index 8e9fe7956f9..b6d776d0347 100644 --- a/src/Backups/BackupSettings.cpp +++ b/src/Backups/BackupSettings.cpp @@ -25,7 +25,7 @@ namespace ErrorCodes M(Bool, async) \ M(Bool, decrypt_files_from_encrypted_disks) \ M(Bool, deduplicate_files) \ - M(Bool, native_copy) \ + M(Bool, allow_s3_native_copy) \ M(UInt64, shard_num) \ M(UInt64, replica_num) \ M(Bool, internal) \ diff --git a/src/Backups/BackupSettings.h b/src/Backups/BackupSettings.h index e21b70ee25f..7cec2d9693d 100644 --- a/src/Backups/BackupSettings.h +++ b/src/Backups/BackupSettings.h @@ -39,7 +39,7 @@ struct BackupSettings bool deduplicate_files = true; /// Whether native copy is allowed (optimization for cloud storages, that sometimes could have bugs) - bool native_copy = true; + bool allow_s3_native_copy = true; /// 1-based shard index to store in the backup. 0 means all shards. /// Can only be used with BACKUP ON CLUSTER. diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index fddd4f34bb6..c08b110075e 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -348,7 +348,7 @@ void BackupsWorker::doBackup( backup_create_params.backup_coordination = backup_coordination; backup_create_params.backup_uuid = backup_settings.backup_uuid; backup_create_params.deduplicate_files = backup_settings.deduplicate_files; - backup_create_params.native_copy = backup_settings.native_copy; + backup_create_params.allow_s3_native_copy = backup_settings.allow_s3_native_copy; BackupMutablePtr backup = BackupFactory::instance().createBackup(backup_create_params); /// Write the backup. @@ -648,7 +648,7 @@ void BackupsWorker::doRestore( backup_open_params.backup_info = backup_info; backup_open_params.base_backup_info = restore_settings.base_backup_info; backup_open_params.password = restore_settings.password; - backup_open_params.native_copy = restore_settings.native_copy; + backup_open_params.allow_s3_native_copy = restore_settings.allow_s3_native_copy; BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params); String current_database = context->getCurrentDatabase(); diff --git a/src/Backups/RestoreSettings.cpp b/src/Backups/RestoreSettings.cpp index 4dd75911a91..2009ca4c1ff 100644 --- a/src/Backups/RestoreSettings.cpp +++ b/src/Backups/RestoreSettings.cpp @@ -161,7 +161,7 @@ namespace M(RestoreAccessCreationMode, create_access) \ M(Bool, allow_unresolved_access_dependencies) \ M(RestoreUDFCreationMode, create_function) \ - M(Bool, native_copy) \ + M(Bool, allow_s3_native_copy) \ M(Bool, internal) \ M(String, host_id) \ M(OptionalUUID, restore_uuid) diff --git a/src/Backups/RestoreSettings.h b/src/Backups/RestoreSettings.h index 59d73c83d69..1861e219dba 100644 --- a/src/Backups/RestoreSettings.h +++ b/src/Backups/RestoreSettings.h @@ -108,7 +108,7 @@ struct RestoreSettings RestoreUDFCreationMode create_function = RestoreUDFCreationMode::kCreateIfNotExists; /// Whether native copy is allowed (optimization for cloud storages, that sometimes could have bugs) - bool native_copy = true; + bool allow_s3_native_copy = true; /// Internal, should not be specified by user. bool internal = false; diff --git a/src/Backups/registerBackupEngineS3.cpp b/src/Backups/registerBackupEngineS3.cpp index ef8ced94590..bd705e4d70f 100644 --- a/src/Backups/registerBackupEngineS3.cpp +++ b/src/Backups/registerBackupEngineS3.cpp @@ -107,12 +107,12 @@ void registerBackupEngineS3(BackupFactory & factory) if (params.open_mode == IBackup::OpenMode::READ) { - auto reader = std::make_shared(S3::URI{s3_uri}, access_key_id, secret_access_key, params.native_copy, params.context); + auto reader = std::make_shared(S3::URI{s3_uri}, access_key_id, secret_access_key, params.allow_s3_native_copy, params.context); return std::make_unique(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context); } else { - auto writer = std::make_shared(S3::URI{s3_uri}, access_key_id, secret_access_key, params.native_copy, params.context); + auto writer = std::make_shared(S3::URI{s3_uri}, access_key_id, secret_access_key, params.allow_s3_native_copy, params.context); return std::make_unique( backup_name_for_logging, archive_params, diff --git a/tests/queries/0_stateless/02801_backup_native_copy.reference b/tests/queries/0_stateless/02801_backup_native_copy.reference index 659df5e9b25..f9b008cde2e 100644 --- a/tests/queries/0_stateless/02801_backup_native_copy.reference +++ b/tests/queries/0_stateless/02801_backup_native_copy.reference @@ -1,4 +1,4 @@ -BACKUP TABLE data TO S3(s3_conn, \'backups/default/data_native_copy\') SETTINGS native_copy = 1 1 -BACKUP TABLE data TO S3(s3_conn, \'backups/default/data_no_native_copy\') SETTINGS native_copy = 0 0 -RESTORE TABLE data AS data_native_copy FROM S3(s3_conn, \'backups/default/data_native_copy\') SETTINGS native_copy = 1 1 -RESTORE TABLE data AS data_no_native_copy FROM S3(s3_conn, \'backups/default/data_no_native_copy\') SETTINGS native_copy = 0 0 +BACKUP TABLE data TO S3(s3_conn, \'backups/default/data_native_copy\') SETTINGS allow_s3_native_copy = 1 1 +BACKUP TABLE data TO S3(s3_conn, \'backups/default/data_no_native_copy\') SETTINGS allow_s3_native_copy = 0 0 +RESTORE TABLE data AS data_native_copy FROM S3(s3_conn, \'backups/default/data_native_copy\') SETTINGS allow_s3_native_copy = 1 1 +RESTORE TABLE data AS data_no_native_copy FROM S3(s3_conn, \'backups/default/data_no_native_copy\') SETTINGS allow_s3_native_copy = 0 0 diff --git a/tests/queries/0_stateless/02801_backup_native_copy.sh b/tests/queries/0_stateless/02801_backup_native_copy.sh index 966d7ae9ce8..015dcb19b82 100755 --- a/tests/queries/0_stateless/02801_backup_native_copy.sh +++ b/tests/queries/0_stateless/02801_backup_native_copy.sh @@ -15,28 +15,28 @@ $CLICKHOUSE_CLIENT -nm -q " " query_id=$(random_str 10) -$CLICKHOUSE_CLIENT --format Null --query_id $query_id -q "BACKUP TABLE data TO S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_native_copy') SETTINGS native_copy=true" +$CLICKHOUSE_CLIENT --format Null --query_id $query_id -q "BACKUP TABLE data TO S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_native_copy') SETTINGS allow_s3_native_copy=true" $CLICKHOUSE_CLIENT -nm -q " SYSTEM FLUSH LOGS; SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' " query_id=$(random_str 10) -$CLICKHOUSE_CLIENT --format Null --query_id $query_id -q "BACKUP TABLE data TO S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_no_native_copy') SETTINGS native_copy=false" +$CLICKHOUSE_CLIENT --format Null --query_id $query_id -q "BACKUP TABLE data TO S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_no_native_copy') SETTINGS allow_s3_native_copy=false" $CLICKHOUSE_CLIENT -nm -q " SYSTEM FLUSH LOGS; SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' " query_id=$(random_str 10) -$CLICKHOUSE_CLIENT --send_logs_level=error --format Null --query_id $query_id -q "RESTORE TABLE data AS data_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_native_copy') SETTINGS native_copy=true" +$CLICKHOUSE_CLIENT --send_logs_level=error --format Null --query_id $query_id -q "RESTORE TABLE data AS data_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_native_copy') SETTINGS allow_s3_native_copy=true" $CLICKHOUSE_CLIENT -nm -q " SYSTEM FLUSH LOGS; SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' " query_id=$(random_str 10) -$CLICKHOUSE_CLIENT --send_logs_level=error --format Null --query_id $query_id -q "RESTORE TABLE data AS data_no_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_no_native_copy') SETTINGS native_copy=false" +$CLICKHOUSE_CLIENT --send_logs_level=error --format Null --query_id $query_id -q "RESTORE TABLE data AS data_no_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_no_native_copy') SETTINGS allow_s3_native_copy=false" $CLICKHOUSE_CLIENT -nm -q " SYSTEM FLUSH LOGS; SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' From 1844ac37d76ac1a660681acb6b79af8af860d5ff Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 19:12:52 +0200 Subject: [PATCH 1401/1997] Remove create_read_buffer argument for copyS3File() Signed-off-by: Azat Khuzhin --- src/Backups/BackupIO_S3.cpp | 15 --------------- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 14 ++------------ src/IO/S3/copyS3File.cpp | 7 ++++++- src/IO/S3/copyS3File.h | 6 +++--- 4 files changed, 11 insertions(+), 31 deletions(-) diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 60fea9e2008..d487ec6e80e 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -152,13 +152,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s "Blob writing function called with unexpected blob_path.size={} or mode={}", blob_path.size(), mode); - auto create_read_buffer = [this, path_in_backup] - { - return readFile(path_in_backup); - }; - copyS3File( - create_read_buffer, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, @@ -208,17 +202,8 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src /// In this case we can't use the native copy. if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2) { - auto create_read_buffer = [src_disk, src_path, copy_encrypted, settings = read_settings.adjustBufferSize(start_pos + length)] - { - if (copy_encrypted) - return src_disk->readEncryptedFile(src_path, settings); - else - return src_disk->readFile(src_path, settings); - }; - LOG_TRACE(log, "Copying file {} from disk {} to S3", src_path, src_disk->getName()); copyS3File( - create_read_buffer, client, /* src_bucket */ blob_path[1], /* src_key= */ blob_path[0], diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 3c19af188dc..e46ca3d0828 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -435,12 +435,7 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT auto settings_ptr = s3_settings.get(); auto size = S3::getObjectSize(*client_ptr, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "S3ObjStor_copy"); - auto create_read_buffer = [this, object_from] - { - return readObject(object_from); - }; - - copyS3File(create_read_buffer, client_ptr, bucket, object_from.remote_path, 0, size, dest_s3->bucket, object_to.remote_path, + copyS3File(client_ptr, bucket, object_from.remote_path, 0, size, dest_s3->bucket, object_to.remote_path, settings_ptr->request_settings, object_to_attributes, scheduler, /* for_disk_s3= */ true); } else @@ -456,12 +451,7 @@ void S3ObjectStorage::copyObject( // NOLINT auto settings_ptr = s3_settings.get(); auto size = S3::getObjectSize(*client_ptr, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "S3ObjStor_copy"); - auto create_read_buffer = [this, object_from] - { - return readObject(object_from); - }; - - copyS3File(create_read_buffer, client_ptr, bucket, object_from.remote_path, 0, size, bucket, object_to.remote_path, + copyS3File(client_ptr, bucket, object_from.remote_path, 0, size, bucket, object_to.remote_path, settings_ptr->request_settings, object_to_attributes, scheduler, /* for_disk_s3= */ true); } diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index 2c6557d97e7..2de2ccd0f9f 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -810,7 +810,6 @@ void copyDataToS3File( void copyS3File( - const CreateReadBuffer & create_read_buffer, const std::shared_ptr & s3_client, const String & src_bucket, const String & src_key, @@ -829,7 +828,13 @@ void copyS3File( helper.performCopy(); } else + { + auto create_read_buffer = [&] + { + return std::make_unique(s3_client, src_bucket, src_key, "", settings, Context::getGlobalContextInstance()->getReadSettings()); + }; copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3); + } } } diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h index 2c848076e9b..5d35e5ebe2d 100644 --- a/src/IO/S3/copyS3File.h +++ b/src/IO/S3/copyS3File.h @@ -23,10 +23,10 @@ using CreateReadBuffer = std::function()>; /// The parameters `src_offset` and `src_size` specify a part in the source to copy. /// /// Note, that it tries to copy file using native copy (CopyObject), but if it -/// has been disabled (with settings.allow_native_copy) it is fallbacks to -/// read-write copy (copyDataToS3File()). +/// has been disabled (with settings.allow_native_copy) or request failed +/// because it is a known issue, it is fallbacks to read-write copy +/// (copyDataToS3File()). void copyS3File( - const CreateReadBuffer & create_read_buffer, const std::shared_ptr & s3_client, const String & src_bucket, const String & src_key, From b95836363085160a20bddfceaaf0709a0e721870 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 8 Jul 2023 14:32:34 +0200 Subject: [PATCH 1402/1997] tests: temporary fix the 02802_clickhouse_disks_s3_copy In #51135 the behavior of the `clickhouse-disks copy` had been changed, let's temporary update the test (and continue discussion about this change in that PR). Signed-off-by: Azat Khuzhin --- tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh index f879b7a5621..33321607728 100755 --- a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh +++ b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh @@ -19,6 +19,8 @@ function run_test_for_disk() grep -o -e "Single part upload has completed." -e "Single operation copy has completed." } clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test + # NOTE: this is due to "copy" does works like "cp -R from to/" instead of "cp from to" + clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test.copy/test clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test.copy } From ac972661f9718b9d15e5bb49c63b2dff7d296fe3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 9 Jul 2023 08:21:18 +0200 Subject: [PATCH 1403/1997] Add exclusion for "API mode: {}" from S3 in 00002_log_and_exception_messages_formatting Signed-off-by: Azat Khuzhin --- .../0_stateless/00002_log_and_exception_messages_formatting.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql index acb6117f937..86fe01dc0e3 100644 --- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql @@ -36,7 +36,7 @@ create temporary table known_short_messages (s String) as select * from (select 'Database {} doesn''t exist', 'Dictionary ({}) not found', 'Unknown table function {}', 'Unknown format {}', 'Unknown explain kind ''{}''', 'Unknown setting {}', 'Unknown input format {}', 'Unknown identifier: ''{}''', 'User name is empty', 'Expected function, got: {}', -'Attempt to read after eof', 'String size is too big ({}), maximum: {}' +'Attempt to read after eof', 'String size is too big ({}), maximum: {}', 'API mode: {}' ] as arr) array join arr; -- Check that we don't have too many short meaningless message patterns. From d52041345401bdd1a02c2482546da2d5c21793cb Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 9 Jul 2023 09:20:03 +0200 Subject: [PATCH 1404/1997] Cleanup SymbolIndex after reload got removed Remove MultiVersion for SymbolIndex structure since after #51873 it is useless. Follow-up for: #51873 Signed-off-by: Azat Khuzhin --- src/Common/StackTrace.cpp | 6 ++---- src/Common/SymbolIndex.cpp | 11 +++-------- src/Common/SymbolIndex.h | 9 +++------ src/Common/examples/symbol_index.cpp | 3 +-- src/Common/getResource.cpp | 2 +- src/Daemon/BaseDaemon.cpp | 2 +- src/Daemon/SentryWriter.cpp | 2 +- src/Functions/addressToLine.h | 3 +-- src/Functions/addressToSymbol.cpp | 3 +-- src/Functions/serverConstants.cpp | 2 +- src/Interpreters/CrashLog.cpp | 2 +- 11 files changed, 16 insertions(+), 29 deletions(-) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index c13b63854e4..b323f1e4363 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -208,8 +208,7 @@ void StackTrace::symbolize( const StackTrace::FramePointers & frame_pointers, [[maybe_unused]] size_t offset, size_t size, StackTrace::Frames & frames) { #if defined(__ELF__) && !defined(OS_FREEBSD) - auto symbol_index_ptr = DB::SymbolIndex::instance(); - const DB::SymbolIndex & symbol_index = *symbol_index_ptr; + const DB::SymbolIndex & symbol_index = DB::SymbolIndex::instance(); std::unordered_map dwarfs; for (size_t i = 0; i < offset; ++i) @@ -341,8 +340,7 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s using enum DB::Dwarf::LocationInfoMode; const auto mode = fatal ? FULL_WITH_INLINE : FAST; - auto symbol_index_ptr = DB::SymbolIndex::instance(); - const DB::SymbolIndex & symbol_index = *symbol_index_ptr; + const DB::SymbolIndex & symbol_index = DB::SymbolIndex::instance(); std::unordered_map dwarfs; for (size_t i = stack_trace.offset; i < stack_trace.size; ++i) diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp index 4c7f3827125..cb02bb3ff75 100644 --- a/src/Common/SymbolIndex.cpp +++ b/src/Common/SymbolIndex.cpp @@ -509,7 +509,7 @@ const T * find(const void * address, const std::vector & vec) } -void SymbolIndex::update() +void SymbolIndex::load() { dl_iterate_phdr(collectSymbols, &data); @@ -549,17 +549,12 @@ String SymbolIndex::getBuildIDHex() const return build_id_hex; } -MultiVersion & SymbolIndex::instanceImpl() +const SymbolIndex & SymbolIndex::instance() { - static MultiVersion instance(std::unique_ptr(new SymbolIndex)); + static SymbolIndex instance; return instance; } -MultiVersion::Version SymbolIndex::instance() -{ - return instanceImpl().get(); -} - } #endif diff --git a/src/Common/SymbolIndex.h b/src/Common/SymbolIndex.h index 773f59b7914..4fd108434d5 100644 --- a/src/Common/SymbolIndex.h +++ b/src/Common/SymbolIndex.h @@ -8,8 +8,6 @@ #include #include -#include - namespace DB { @@ -20,10 +18,10 @@ namespace DB class SymbolIndex : private boost::noncopyable { protected: - SymbolIndex() { update(); } + SymbolIndex() { load(); } public: - static MultiVersion::Version instance(); + static const SymbolIndex & instance(); struct Symbol { @@ -89,8 +87,7 @@ public: private: Data data; - void update(); - static MultiVersion & instanceImpl(); + void load(); }; } diff --git a/src/Common/examples/symbol_index.cpp b/src/Common/examples/symbol_index.cpp index 13a49fd65ad..ca9c26f27d6 100644 --- a/src/Common/examples/symbol_index.cpp +++ b/src/Common/examples/symbol_index.cpp @@ -22,8 +22,7 @@ int main(int argc, char ** argv) return 1; } - auto symbol_index_ptr = SymbolIndex::instance(); - const SymbolIndex & symbol_index = *symbol_index_ptr; + const SymbolIndex & symbol_index = SymbolIndex::instance(); for (const auto & elem : symbol_index.symbols()) std::cout << elem.name << ": " << elem.address_begin << " ... " << elem.address_end << "\n"; diff --git a/src/Common/getResource.cpp b/src/Common/getResource.cpp index fe603fcc550..72ba24c2f44 100644 --- a/src/Common/getResource.cpp +++ b/src/Common/getResource.cpp @@ -16,7 +16,7 @@ std::string_view getResource(std::string_view name) #if defined USE_MUSL /// If static linking is used, we cannot use dlsym and have to parse ELF symbol table by ourself. - return DB::SymbolIndex::instance()->getResource(name_replaced); + return DB::SymbolIndex::instance().getResource(name_replaced); #else // In most `dlsym(3)` APIs, one passes the symbol name as it appears via diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index bf6c3b4cdcf..319d2bc8b5b 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -986,7 +986,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() signal_listener_thread.start(*signal_listener); #if defined(__ELF__) && !defined(OS_FREEBSD) - String build_id_hex = SymbolIndex::instance()->getBuildIDHex(); + String build_id_hex = SymbolIndex::instance().getBuildIDHex(); if (build_id_hex.empty()) build_id = ""; else diff --git a/src/Daemon/SentryWriter.cpp b/src/Daemon/SentryWriter.cpp index 041d3292841..e38d339d088 100644 --- a/src/Daemon/SentryWriter.cpp +++ b/src/Daemon/SentryWriter.cpp @@ -150,7 +150,7 @@ void SentryWriter::onFault(int sig, const std::string & error_message, const Sta sentry_set_extra("signal_number", sentry_value_new_int32(sig)); #if defined(__ELF__) && !defined(OS_FREEBSD) - const String & build_id_hex = DB::SymbolIndex::instance()->getBuildIDHex(); + const String & build_id_hex = DB::SymbolIndex::instance().getBuildIDHex(); sentry_set_tag("build_id", build_id_hex.c_str()); #endif diff --git a/src/Functions/addressToLine.h b/src/Functions/addressToLine.h index 1410e55d9a9..5c1611fe173 100644 --- a/src/Functions/addressToLine.h +++ b/src/Functions/addressToLine.h @@ -90,8 +90,7 @@ protected: ResultT impl(uintptr_t addr) const { - auto symbol_index_ptr = SymbolIndex::instance(); - const SymbolIndex & symbol_index = *symbol_index_ptr; + const SymbolIndex & symbol_index = SymbolIndex::instance(); if (const auto * object = symbol_index.findObject(reinterpret_cast(addr))) { diff --git a/src/Functions/addressToSymbol.cpp b/src/Functions/addressToSymbol.cpp index 95d57f6d296..cc5ad4c4fdf 100644 --- a/src/Functions/addressToSymbol.cpp +++ b/src/Functions/addressToSymbol.cpp @@ -68,8 +68,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - auto symbol_index_ptr = SymbolIndex::instance(); - const SymbolIndex & symbol_index = *symbol_index_ptr; + const SymbolIndex & symbol_index = SymbolIndex::instance(); const ColumnPtr & column = arguments[0].column; const ColumnUInt64 * column_concrete = checkAndGetColumn(column.get()); diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index 0fda53414de..4294f97d771 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -27,7 +27,7 @@ namespace public: static constexpr auto name = "buildId"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionBuildId(ContextPtr context) : FunctionConstantBase(SymbolIndex::instance()->getBuildIDHex(), context->isDistributed()) {} + explicit FunctionBuildId(ContextPtr context) : FunctionConstantBase(SymbolIndex::instance().getBuildIDHex(), context->isDistributed()) {} }; #endif diff --git a/src/Interpreters/CrashLog.cpp b/src/Interpreters/CrashLog.cpp index f1f0ffb6f60..08c08ffecd1 100644 --- a/src/Interpreters/CrashLog.cpp +++ b/src/Interpreters/CrashLog.cpp @@ -52,7 +52,7 @@ void CrashLogElement::appendToBlock(MutableColumns & columns) const String build_id_hex; #if defined(__ELF__) && !defined(OS_FREEBSD) - build_id_hex = SymbolIndex::instance()->getBuildIDHex(); + build_id_hex = SymbolIndex::instance().getBuildIDHex(); #endif columns[i++]->insert(build_id_hex); } From 3c18a181c997f1f43e759d72eeadcc5d4f35142d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 9 Jul 2023 13:54:18 +0200 Subject: [PATCH 1405/1997] Fix using of pools from the main thread Otherwise it is not possible to use clickhouse-disks with S3: $ clickhouse-disks -C /src/ch/clickhouse/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.xml --log-level test --disk s3_plain_native_copy copy default/test default/test.copy Failed to make request to: http://localhost:11111/test?list-type=2&max-keys=1&prefix=clickhouse-disks%2Fdefault%2Ftest.copy: Code: 49. DB::Exception: current_thread is not initialized. (LOGICAL_ERROR), Stack trace (when copying this message, always include the lines below): 0. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/exception:134: Poco::Exception::Exception(String const&, int) @ 0x000000001ad7c872 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 1. ./.cmake-llvm16/./src/Common/Exception.cpp:94: DB::Exception::Exception(DB::Exception::MessageMasked&&, int, bool) @ 0x0000000011e2c4b7 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 2. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/string:1499: DB::Exception::Exception(int, char const (&) [34]) @ 0x000000000d341e58 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 3. ./.cmake-llvm16/./src/Common/MemoryTrackerSwitcher.h:19: DB::(anonymous namespace)::SingleEndpointHTTPSessionPool::allocObject() @ 0x0000000012010e5a in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 4. ./.cmake-llvm16/./src/Common/PoolBase.h:174: PoolBase::get(long) @ 0x0000000012011a6f in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 5. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/variant:797: DB::makePooledHTTPSession(Poco::URI const&, Poco::URI const&, DB::ConnectionTimeouts const&, unsigned long, bool, bool) @ 0x000000001200ec69 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 6. ./.cmake-llvm16/./src/IO/HTTPCommon.cpp:0: DB::makePooledHTTPSession(Poco::URI const&, DB::ConnectionTimeouts const&, unsigned long, bool, bool) @ 0x000000001200d909 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 7. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/variant:797: void DB::S3::PocoHTTPClient::makeRequestInternalImpl(Aws::Http::HttpRequest&, DB::S3::ClientConfigurationPerRequest const&, std::shared_ptr&, Aws::Utils::RateLimits::RateLimiterInterface*, Aws::Utils::RateLimits::RateLimiterInterface*) const @ 0x00000000163f5157 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 8. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/string:1499: DB::S3::PocoHTTPClient::makeRequestInternal(Aws::Http::HttpRequest&, std::shared_ptr&, Aws::Utils::RateLimits::RateLimiterInterface*, Aws::Utils::RateLimits::RateLimiterInterface*) const @ 0x00000000163f465d in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 9. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/__memory/shared_ptr.h:622: DB::S3::PocoHTTPClient::MakeRequest(std::shared_ptr const&, Aws::Utils::RateLimits::RateLimiterInterface*, Aws::Utils::RateLimits::RateLimiterInterface*) const @ 0x00000000163f4454 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 10. ./.cmake-llvm16/./contrib/aws/src/aws-cpp-sdk-core/source/client/AWSClient.cpp:506: Aws::Client::AWSClient::AttemptOneRequest(std::shared_ptr const&, Aws::AmazonWebServiceRequest const&, char const*, char const*, char const*) const @ 0x000000001ae2a922 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 11. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/__memory/shared_ptr.h:612: Aws::Client::AWSClient::AttemptExhaustively(Aws::Http::URI const&, Aws::AmazonWebServiceRequest const&, Aws::Http::HttpMethod, char const*, char const*, char const*) const @ 0x000000001ae28299 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 12. ./.cmake-llvm16/./contrib/aws/src/aws-cpp-sdk-core/include/aws/core/utils/Outcome.h:160: Aws::Client::AWSXMLClient::MakeRequest(Aws::Http::URI const&, Aws::AmazonWebServiceRequest const&, Aws::Http::HttpMethod, char const*, char const*, char const*) const @ 0x000000001ae3c9ed in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 13. ./.cmake-llvm16/./contrib/aws/src/aws-cpp-sdk-core/source/client/AWSXmlClient.cpp:66: Aws::Client::AWSXMLClient::MakeRequest(Aws::AmazonWebServiceRequest const&, Aws::Endpoint::AWSEndpoint const&, Aws::Http::HttpMethod, char const*, char const*, char const*) const @ 0x000000001ae3c995 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 14. ./.cmake-llvm16/./contrib/aws/generated/src/aws-cpp-sdk-s3/source/S3Client.cpp:0: Aws::S3::S3Client::ListObjectsV2(Aws::S3::Model::ListObjectsV2Request const&) const @ 0x000000001aee6666 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 15. ./.cmake-llvm16/./contrib/aws/src/aws-cpp-sdk-core/include/aws/core/utils/Outcome.h:160: DB::S3::Client::ListObjectsV2(DB::S3::ExtendedRequest const&) const @ 0x00000000163cee42 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 16. ./.cmake-llvm16/./contrib/aws/src/aws-cpp-sdk-core/include/aws/core/utils/Outcome.h:120: DB::S3ObjectStorage::listObjects(String const&, std::vector>&, int) const @ 0x0000000016b582e2 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 17. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/vector:543: DB::IObjectStorage::existsOrHasAnyChild(String const&) const @ 0x000000001644ebe9 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 18. ./.cmake-llvm16/./src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp:0: DB::MetadataStorageFromPlainObjectStorage::exists(String const&) const @ 0x0000000016b54a64 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 19. ./.cmake-llvm16/./src/Disks/IDisk.cpp:145: DB::IDisk::copyDirectoryContent(String const&, std::shared_ptr const&, String const&) @ 0x0000000016b38fa0 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 20. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/string:1499: DB::CommandCopy::execute(std::vector> const&, std::shared_ptr&, Poco::Util::LayeredConfiguration&) @ 0x0000000012050403 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 21. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/vector:434: DB::DisksApp::main(std::vector> const&) @ 0x000000001204bf02 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 22. ./.cmake-llvm16/./base/poco/Util/src/Application.cpp:0: Poco::Util::Application::run() @ 0x000000001ac7a666 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 23. ./.cmake-llvm16/./programs/disks/DisksApp.cpp:0: mainEntryClickHouseDisks(int, char**) @ 0x000000001204c550 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 24. ./.cmake-llvm16/./programs/main.cpp:0: main @ 0x000000000cfbadc4 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 25. ? @ 0x00007ffff7dc9850 in ? 26. __libc_start_main @ 0x00007ffff7dc990a in ? 27. _start @ 0x000000000cfba1ee in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse (version 23.7.1.1) AWSXmlClient: HTTP response code: -1 Resolved remote host IP address: Request ID: Exception name: Error message: Code: 49. DB::Exception: current_thread is not initialized. (LOGICAL_ERROR) (version 23.7.1.1) 0 response headers: If the signature check failed. This could be because of a time skew. Attempting to adjust the signer. Request failed, now waiting 1600 ms before attempting again. Signed-off-by: Azat Khuzhin --- src/Common/MemoryTrackerSwitcher.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Common/MemoryTrackerSwitcher.h b/src/Common/MemoryTrackerSwitcher.h index 0fefcbb280a..3c99fd12353 100644 --- a/src/Common/MemoryTrackerSwitcher.h +++ b/src/Common/MemoryTrackerSwitcher.h @@ -6,17 +6,13 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - struct MemoryTrackerSwitcher { explicit MemoryTrackerSwitcher(MemoryTracker * new_tracker) { + /// current_thread is not initialized for the main thread, so simply do not switch anything if (!current_thread) - throw Exception(ErrorCodes::LOGICAL_ERROR, "current_thread is not initialized"); + return; auto * thread_tracker = CurrentThread::getMemoryTracker(); prev_untracked_memory = current_thread->untracked_memory; @@ -28,6 +24,10 @@ struct MemoryTrackerSwitcher ~MemoryTrackerSwitcher() { + /// current_thread is not initialized for the main thread, so simply do not switch anything + if (!current_thread) + return; + CurrentThread::flushUntrackedMemory(); auto * thread_tracker = CurrentThread::getMemoryTracker(); @@ -35,6 +35,7 @@ struct MemoryTrackerSwitcher thread_tracker->setParent(prev_memory_tracker_parent); } +private: MemoryTracker * prev_memory_tracker_parent = nullptr; Int64 prev_untracked_memory = 0; }; From 3b954a2952477bee203a5e00c2cbb9f6a50ae274 Mon Sep 17 00:00:00 2001 From: Konstantin Ilchenko Date: Sun, 9 Jul 2023 14:38:16 +0200 Subject: [PATCH 1406/1997] [DOCS] Add REMOVE SAMPLE BY to docs --- .../statements/alter/sample-by.md | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/sample-by.md b/docs/en/sql-reference/statements/alter/sample-by.md index b20f3c7b5d3..ccad792f853 100644 --- a/docs/en/sql-reference/statements/alter/sample-by.md +++ b/docs/en/sql-reference/statements/alter/sample-by.md @@ -5,15 +5,28 @@ sidebar_label: SAMPLE BY title: "Manipulating Sampling-Key Expressions" --- -Syntax: +# Manipulating SAMPLE BY expression + +The following operations are available: + +## MODIFY ``` sql ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY SAMPLE BY new_expression ``` -The command changes the [sampling key](../../../engines/table-engines/mergetree-family/mergetree.md) of the table to `new_expression` (an expression or a tuple of expressions). +The command changes the [sampling key](../../../engines/table-engines/mergetree-family/mergetree.md) of the table to `new_expression` (an expression or a tuple of expressions). The primary key must contain the new sample key. -The command is lightweight in the sense that it only changes metadata. The primary key must contain the new sample key. +## REMOVE + +``` sql +ALTER TABLE [db].name [ON CLUSTER cluster] REMOVE SAMPLE BY +``` + +The command removes the [sampling key](../../../engines/table-engines/mergetree-family/mergetree.md) of the table. + + +The commands `MODIFY` and `REMOVE` are lightweight in the sense that they only change metadata or remove files. :::note It only works for tables in the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). From 2db092f9d82537e7bac4f31568a0d1c21dbc5799 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 8 Jul 2023 21:06:00 +0200 Subject: [PATCH 1407/1997] Cleanup remote_servers in dist config.xml At first, there was no such amount of clusters in dist config, they added when someone need to write some new cluster for tests. So let's move them to the clusters.xml that is deployed only for tests, and leave only default cluster. And cleanup also some configs that had been copied from dist config in the repo (about test_config_* integration tests, this should be OK, since there are more_clusters.xml as well, that covers additional cases). Signed-off-by: Azat Khuzhin --- .../internal/platform/data/file_test.go | 4 +- .../testdata/configs/xml/config.xml | 67 -------- .../testdata/configs/yaml/config.yaml | 40 ----- .../testdata/configs/yandex_xml/config.xml | 67 -------- programs/server/config.d/more_clusters.xml | 49 ------ programs/server/config.xml | 155 +---------------- programs/server/config.yaml.example | 44 +---- tests/config/config.d/clusters.xml | 157 ++++++++++++++++++ .../configs/config.xml | 38 ----- .../test_config_xml_full/configs/config.xml | 85 ---------- .../test_config_xml_main/configs/config.xml | 67 -------- .../configs/config.xml | 67 -------- .../test_config_yaml_full/configs/config.yaml | 46 ----- .../test_config_yaml_main/configs/config.yaml | 40 ----- .../configs/disable_lazy_load.xml | 12 +- .../configs/overrides.xml | 12 ++ .../test_dictionaries_dependency/test.py | 8 +- ...torage_configuration.xml => overrides.xml} | 17 ++ .../test.py | 2 +- .../configs/macros.xml | 1 - .../test_https_replication/configs/config.xml | 25 --- .../{named_collections.xml => overrides.xml} | 12 ++ .../test_mask_sensitive_info/test.py | 2 +- .../configs/config.d/remote_servers.xml | 14 ++ .../test_storage_hdfs/configs/cluster.xml | 15 ++ .../test_storage_url/configs/conf.xml | 34 ++++ utils/clickhouse-diagnostics/README.md | 75 --------- 27 files changed, 289 insertions(+), 866 deletions(-) delete mode 100644 programs/server/config.d/more_clusters.xml create mode 100644 tests/integration/test_dictionaries_dependency/configs/overrides.xml rename tests/integration/test_distributed_storage_configuration/configs/config.d/{storage_configuration.xml => overrides.xml} (54%) rename tests/integration/test_mask_sensitive_info/configs/{named_collections.xml => overrides.xml} (65%) diff --git a/programs/diagnostics/internal/platform/data/file_test.go b/programs/diagnostics/internal/platform/data/file_test.go index 938c34281f1..5df1f8cc359 100644 --- a/programs/diagnostics/internal/platform/data/file_test.go +++ b/programs/diagnostics/internal/platform/data/file_test.go @@ -135,7 +135,7 @@ func TestConfigFileFrameCopy(t *testing.T) { sizes := map[string]int64{ "users.xml": int64(2017), "default-password.xml": int64(188), - "config.xml": int64(61662), + "config.xml": int64(59506), "server-include.xml": int64(168), "user-include.xml": int64(559), } @@ -189,7 +189,7 @@ func TestConfigFileFrameCopy(t *testing.T) { sizes := map[string]int64{ "users.yaml": int64(1023), "default-password.yaml": int64(132), - "config.yaml": int64(42512), + "config.yaml": int64(41633), "server-include.yaml": int64(21), "user-include.yaml": int64(120), } diff --git a/programs/diagnostics/testdata/configs/xml/config.xml b/programs/diagnostics/testdata/configs/xml/config.xml index 21a0821f89d..c08b0b2970f 100644 --- a/programs/diagnostics/testdata/configs/xml/config.xml +++ b/programs/diagnostics/testdata/configs/xml/config.xml @@ -649,73 +649,6 @@ - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - 127.0.0.1 - 9000 - - - - - 127.0.0.2 - 9000 - - - - - - true - - 127.0.0.1 - 9000 - - - - true - - 127.0.0.2 - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - + + + - - - - false - - 127.0.0.1 - 9000 - - - 127.0.0.2 - 9000 - - - 127.0.0.3 - 9000 - - - - - - - false - - 127.0.0.1 - 9000 - - - 127.0.0.2 - 9000 - - - 127.0.0.3 - 9000 - - - 127.0.0.4 - 9000 - - - 127.0.0.5 - 9000 - - - 127.0.0.6 - 9000 - - - 127.0.0.7 - 9000 - - - 127.0.0.8 - 9000 - - - 127.0.0.9 - 9000 - - - 127.0.0.10 - 9000 - - - - 127.0.0.11 - 1234 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - 127.0.0.1 - 9000 - - - - - 127.0.0.2 - 9000 - - - - - - true - - 127.0.0.1 - 9000 - - - - true - - 127.0.0.2 - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - + + + 127.0.0.11 + 1234 + + + + + + false + + 127.0.0.1 + 9000 + + + 127.0.0.2 + 9000 + + + 127.0.0.3 + 9000 + + + + + + + + localhost + 9000 + + + + + localhost + 9000 + + + + + + true + + 127.0.0.1 + 9000 + + + + true + + 127.0.0.2 + 9000 + + + + + + + localhost + 9440 + 1 + + + + + + + localhost + 9000 + + + + + localhost + 1 + + + diff --git a/tests/integration/test_config_corresponding_root/configs/config.xml b/tests/integration/test_config_corresponding_root/configs/config.xml index 72014646161..9a38d02a036 100644 --- a/tests/integration/test_config_corresponding_root/configs/config.xml +++ b/tests/integration/test_config_corresponding_root/configs/config.xml @@ -136,7 +136,6 @@ https://clickhouse.com/docs/en/table_engines/distributed/ --> - @@ -145,43 +144,6 @@ - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - diff --git a/tests/integration/test_config_xml_full/configs/config.xml b/tests/integration/test_config_xml_full/configs/config.xml index 4e3d1def5fc..d142df18af8 100644 --- a/tests/integration/test_config_xml_full/configs/config.xml +++ b/tests/integration/test_config_xml_full/configs/config.xml @@ -565,91 +565,6 @@ - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - 127.0.0.1 - 9000 - - - - - 127.0.0.2 - 9000 - - - - - - true - - 127.0.0.1 - 9000 - - - - true - - 127.0.0.2 - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9440 - - - - - - - - localhost - 9440 - - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - 0 + 0 From 6fd27b6cd882b31f73ecd27ca7ae0bb2f0d25854 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 18 Jul 2023 22:19:35 +0200 Subject: [PATCH 1712/1997] Fix build --- src/Storages/StorageMergeTree.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 085d532b09c..32e100edc4d 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -45,6 +45,7 @@ #include #include + namespace DB { @@ -940,7 +941,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( SelectPartsDecision select_decision = SelectPartsDecision::CANNOT_SELECT; - auto is_background_memory_usage_ok = [](String * disable_reason) -> bool + auto is_background_memory_usage_ok = [](String & disable_reason) -> bool { if (canEnqueueBackgroundTask()) return true; From ff6e5ff1c547494ed7c6320c5d62bf789d433ae2 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 18 Jul 2023 20:23:55 +0000 Subject: [PATCH 1713/1997] Automatic style fix --- tests/integration/test_concurrent_ttl_merges/test.py | 10 ++++++++-- .../test_shutdown_wait_unfinished_queries/test.py | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_concurrent_ttl_merges/test.py b/tests/integration/test_concurrent_ttl_merges/test.py index f6ba3834c92..96264e53522 100644 --- a/tests/integration/test_concurrent_ttl_merges/test.py +++ b/tests/integration/test_concurrent_ttl_merges/test.py @@ -7,10 +7,16 @@ from helpers.test_tools import assert_eq_with_retry, TSV cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/fast_background_pool.xml"], user_configs=["configs/users.xml"], with_zookeeper=True + "node1", + main_configs=["configs/fast_background_pool.xml"], + user_configs=["configs/users.xml"], + with_zookeeper=True, ) node2 = cluster.add_instance( - "node2", main_configs=["configs/fast_background_pool.xml"], user_configs=["configs/users.xml"], with_zookeeper=True + "node2", + main_configs=["configs/fast_background_pool.xml"], + user_configs=["configs/users.xml"], + with_zookeeper=True, ) diff --git a/tests/integration/test_shutdown_wait_unfinished_queries/test.py b/tests/integration/test_shutdown_wait_unfinished_queries/test.py index 71f8b9a759d..074667fc92f 100644 --- a/tests/integration/test_shutdown_wait_unfinished_queries/test.py +++ b/tests/integration/test_shutdown_wait_unfinished_queries/test.py @@ -6,10 +6,16 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node_wait_queries = cluster.add_instance( - "node_wait_queries", main_configs=["configs/config_wait.xml"], user_configs=["configs/users.xml"], stay_alive=True + "node_wait_queries", + main_configs=["configs/config_wait.xml"], + user_configs=["configs/users.xml"], + stay_alive=True, ) node_kill_queries = cluster.add_instance( - "node_kill_queries", main_configs=["configs/config_kill.xml"], user_configs=["configs/users.xml"], stay_alive=True + "node_kill_queries", + main_configs=["configs/config_kill.xml"], + user_configs=["configs/users.xml"], + stay_alive=True, ) global result From 3715c7f461dc9a0c48ea3cfac52ef52c47a53c64 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Jul 2023 01:08:14 +0200 Subject: [PATCH 1714/1997] Fix error in a test --- tests/queries/0_stateless/02293_selected_rows_and_merges.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02293_selected_rows_and_merges.sh b/tests/queries/0_stateless/02293_selected_rows_and_merges.sh index 76c562c9744..2f281d27814 100755 --- a/tests/queries/0_stateless/02293_selected_rows_and_merges.sh +++ b/tests/queries/0_stateless/02293_selected_rows_and_merges.sh @@ -24,4 +24,4 @@ ${CLICKHOUSE_CLIENT} -q "system flush logs" # Here for mutation all values are 0, cause mutation is executed async. # It's pretty hard to write a test with total counter. -${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'] > 10, ProfileEvents['SelectedBytes'], ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'alter%' and current_database = currentDatabase()" +${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'] > 10, ProfileEvents['SelectedBytes'] > 1000, ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'alter%' and current_database = currentDatabase()" From c724816cb8403c07d2d4c4601e0c4c9dcfc16e5f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Jul 2023 01:15:16 +0200 Subject: [PATCH 1715/1997] Fix test --- .../configs/config.d/merge_tree.xml | 5 +++++ .../configs/config.d/users.xml | 5 ----- .../configs/config.xml | 22 ------------------- .../test_merge_tree_s3_failover/test.py | 1 + 4 files changed, 6 insertions(+), 27 deletions(-) create mode 100644 tests/integration/test_merge_tree_s3_failover/configs/config.d/merge_tree.xml delete mode 100644 tests/integration/test_merge_tree_s3_failover/configs/config.d/users.xml delete mode 100644 tests/integration/test_merge_tree_s3_failover/configs/config.xml diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.d/merge_tree.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.d/merge_tree.xml new file mode 100644 index 00000000000..c58c957b596 --- /dev/null +++ b/tests/integration/test_merge_tree_s3_failover/configs/config.d/merge_tree.xml @@ -0,0 +1,5 @@ + + + 1.0 + + diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.d/users.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.d/users.xml deleted file mode 100644 index 0011583a68c..00000000000 --- a/tests/integration/test_merge_tree_s3_failover/configs/config.d/users.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.xml deleted file mode 100644 index 743d75d9a21..00000000000 --- a/tests/integration/test_merge_tree_s3_failover/configs/config.xml +++ /dev/null @@ -1,22 +0,0 @@ - - 9000 - 127.0.0.1 - - - - true - none - - AcceptCertificateHandler - - - - - 500 - ./clickhouse/ - users.xml - - - 1.0 - - diff --git a/tests/integration/test_merge_tree_s3_failover/test.py b/tests/integration/test_merge_tree_s3_failover/test.py index 90dda631924..57ca5ed5ffd 100644 --- a/tests/integration/test_merge_tree_s3_failover/test.py +++ b/tests/integration/test_merge_tree_s3_failover/test.py @@ -67,6 +67,7 @@ def cluster(): "configs/config.d/storage_conf.xml", "configs/config.d/instant_moves.xml", "configs/config.d/part_log.xml", + "configs/config.d/merge_tree.xml" ], with_minio=True, ) From 3c8141529f0f8d4d7c48c077e91af77ee9885ad8 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 18 Jul 2023 23:25:21 +0000 Subject: [PATCH 1716/1997] Automatic style fix --- tests/integration/test_merge_tree_s3_failover/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_merge_tree_s3_failover/test.py b/tests/integration/test_merge_tree_s3_failover/test.py index 57ca5ed5ffd..b47d741e78e 100644 --- a/tests/integration/test_merge_tree_s3_failover/test.py +++ b/tests/integration/test_merge_tree_s3_failover/test.py @@ -67,7 +67,7 @@ def cluster(): "configs/config.d/storage_conf.xml", "configs/config.d/instant_moves.xml", "configs/config.d/part_log.xml", - "configs/config.d/merge_tree.xml" + "configs/config.d/merge_tree.xml", ], with_minio=True, ) From a19a1001f063ce6d992ffc08d6d05d3ef7342b66 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Jul 2023 01:28:22 +0200 Subject: [PATCH 1717/1997] Fix 01111_create_drop_replicated_db_stress --- .../01111_create_drop_replicated_db_stress.sh | 37 +++++++++---------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh index 4d341e5b8a3..cc63af3676b 100755 --- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh +++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh @@ -8,7 +8,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function create_db() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do SHARD=$(($RANDOM % 2)) REPLICA=$(($RANDOM % 2)) SUFFIX=$(($RANDOM % 16)) @@ -24,7 +25,8 @@ function create_db() function drop_db() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do database=$($CLICKHOUSE_CLIENT -q "select name from system.databases where name like '${CLICKHOUSE_DATABASE}%' order by rand() limit 1") if [[ "$database" == "$CLICKHOUSE_DATABASE" ]]; then continue; fi if [ -z "$database" ]; then continue; fi @@ -36,7 +38,8 @@ function drop_db() function sync_db() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do database=$($CLICKHOUSE_CLIENT -q "select name from system.databases where name like '${CLICKHOUSE_DATABASE}%' order by rand() limit 1") if [ -z "$database" ]; then continue; fi $CLICKHOUSE_CLIENT --receive_timeout=1 -q \ @@ -47,7 +50,8 @@ function sync_db() function create_table() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do database=$($CLICKHOUSE_CLIENT -q "select name from system.databases where name like '${CLICKHOUSE_DATABASE}%' order by rand() limit 1") if [ -z "$database" ]; then continue; fi $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=0 -q \ @@ -59,7 +63,8 @@ function create_table() function alter_table() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do table=$($CLICKHOUSE_CLIENT -q "select database || '.' || name from system.tables where database like '${CLICKHOUSE_DATABASE}%' order by rand() limit 1") if [ -z "$table" ]; then continue; fi $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=0 -q \ @@ -71,7 +76,8 @@ function alter_table() function insert() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do table=$($CLICKHOUSE_CLIENT -q "select database || '.' || name from system.tables where database like '${CLICKHOUSE_DATABASE}%' order by rand() limit 1") if [ -z "$table" ]; then continue; fi $CLICKHOUSE_CLIENT -q \ @@ -81,23 +87,16 @@ function insert() -export -f create_db -export -f drop_db -export -f sync_db -export -f create_table -export -f alter_table -export -f insert - TIMEOUT=30 -timeout $TIMEOUT bash -c create_db & -timeout $TIMEOUT bash -c sync_db & -timeout $TIMEOUT bash -c create_table & -timeout $TIMEOUT bash -c alter_table & -timeout $TIMEOUT bash -c insert & +create_db $TIMEOUT & +sync_db $TIMEOUT & +create_table $TIMEOUT & +alter_table $TIMEOUT & +insert $TIMEOUT & sleep 1 # give other queries a head start -timeout $TIMEOUT bash -c drop_db & +drop_db $TIMEOUT & wait From 6d915042a23ed0fd320b98118d9527e312d49ffe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Jul 2023 01:44:20 +0200 Subject: [PATCH 1718/1997] Fix ugly code --- src/Processors/Formats/Impl/ArrowFieldIndexUtil.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h index 4beffbcf869..909133dfa4a 100644 --- a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h +++ b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h @@ -75,7 +75,7 @@ public: { if (!allow_missing_columns) throw Exception( - ErrorCodes::THERE_IS_NO_COLUMN, "Not found field({}) in arrow schema:{}.", named_col.name, schema.ToString()); + ErrorCodes::THERE_IS_NO_COLUMN, "Not found field ({}) in the Arrow schema: {}.", named_col.name, schema.ToString()); else continue; } @@ -168,4 +168,3 @@ private: }; } #endif - From 0789f388c3f6acbfdb42f44ee6463b3d646ddc27 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Jul 2023 02:45:56 +0300 Subject: [PATCH 1719/1997] Update ArrowFieldIndexUtil.h --- src/Processors/Formats/Impl/ArrowFieldIndexUtil.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h index 909133dfa4a..b7adaa35335 100644 --- a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h +++ b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h @@ -75,7 +75,7 @@ public: { if (!allow_missing_columns) throw Exception( - ErrorCodes::THERE_IS_NO_COLUMN, "Not found field ({}) in the Arrow schema: {}.", named_col.name, schema.ToString()); + ErrorCodes::THERE_IS_NO_COLUMN, "Not found field ({}) in the following Arrow schema:\n{}\n", named_col.name, schema.ToString()); else continue; } From d666272b7666967cf1d1bed3804673e3beb1ca64 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Jul 2023 05:29:12 +0200 Subject: [PATCH 1720/1997] Enable `allow_vertical_merges_from_compact_to_wide_parts` by default --- src/Storages/MergeTree/MergeTreeSettings.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index dc24327712c..783fde088dc 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -160,7 +160,7 @@ struct Settings; M(UInt64, min_marks_to_honor_max_concurrent_queries, 0, "Minimal number of marks to honor the MergeTree-level's max_concurrent_queries (0 - disabled). Queries will still be limited by other max_concurrent_queries settings.", 0) \ M(UInt64, min_bytes_to_rebalance_partition_over_jbod, 0, "Minimal amount of bytes to enable part rebalance over JBOD array (0 - disabled).", 0) \ M(Bool, check_sample_column_is_correct, true, "Check columns or columns by hash for sampling are unsigned integer.", 0) \ - M(Bool, allow_vertical_merges_from_compact_to_wide_parts, false, "Allows vertical merges from compact to wide parts. This settings must have the same value on all replicas", 0) \ + M(Bool, allow_vertical_merges_from_compact_to_wide_parts, true, "Allows vertical merges from compact to wide parts. This settings must have the same value on all replicas", 0) \ M(Bool, enable_the_endpoint_id_with_zookeeper_name_prefix, false, "Enable the endpoint id with zookeeper name prefix for the replicated merge tree table", 0) \ M(UInt64, zero_copy_merge_mutation_min_parts_size_sleep_before_lock, 1ULL * 1024 * 1024 * 1024, "If zero copy replication is enabled sleep random amount of time before trying to lock depending on parts size for merge or mutation", 0) \ \ @@ -169,8 +169,9 @@ struct Settings; M(UInt64, part_moves_between_shards_delay_seconds, 30, "Time to wait before/after moving parts between shards.", 0) \ M(Bool, use_metadata_cache, false, "Experimental feature to speed up parts loading process by using MergeTree metadata cache", 0) \ M(Bool, allow_remote_fs_zero_copy_replication, false, "Don't use this setting in production, because it is not ready.", 0) \ - M(String, remote_fs_zero_copy_zookeeper_path, "/clickhouse/zero_copy", "ZooKeeper path for Zero-copy table-independet info.", 0) \ + M(String, remote_fs_zero_copy_zookeeper_path, "/clickhouse/zero_copy", "ZooKeeper path for zero-copy table-independent info.", 0) \ M(Bool, remote_fs_zero_copy_path_compatible_mode, false, "Run zero-copy in compatible mode during conversion process.", 0) \ + \ /** Compress marks and primary key. */ \ M(Bool, compress_marks, true, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \ M(Bool, compress_primary_key, true, "Primary key support compression, reduce primary key file size and speed up network transmission.", 0) \ From c3b8978023fae8adaa98a111f6253be50ee72a35 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 19 Jul 2023 11:53:03 +0800 Subject: [PATCH 1721/1997] Don't use minmax_count projections when counting nullable columns --- .../optimizeUseAggregateProjection.cpp | 32 ++++--------------- ..._count_projection_count_nullable.reference | 1 + ...minmax_count_projection_count_nullable.sql | 9 ++++++ 3 files changed, 17 insertions(+), 25 deletions(-) create mode 100644 tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.reference create mode 100644 tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.sql diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index f183bdca7a9..4f25118958f 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -92,18 +92,6 @@ static AggregateProjectionInfo getAggregatingProjectionInfo( return info; } -static bool hasNullableOrMissingColumn(const DAGIndex & index, const Names & names) -{ - for (const auto & query_name : names) - { - auto jt = index.find(query_name); - if (jt == index.end() || jt->second->result_type->isNullable()) - return true; - } - - return false; -} - struct AggregateFunctionMatch { const AggregateDescription * description = nullptr; @@ -170,20 +158,14 @@ std::optional matchAggregateFunctions( } /// This is a special case for the function count(). - /// We can assume that 'count(expr) == count()' if expr is not nullable. - if (typeid_cast(candidate.function.get())) + /// We can assume that 'count(expr) == count()' if expr is not nullable, + /// which can be verified by simply casting to `AggregateFunctionCount *`. + if (typeid_cast(aggregate.function.get())) { - bool has_nullable_or_missing_arg = false; - has_nullable_or_missing_arg |= hasNullableOrMissingColumn(query_index, aggregate.argument_names); - has_nullable_or_missing_arg |= hasNullableOrMissingColumn(proj_index, candidate.argument_names); - - if (!has_nullable_or_missing_arg) - { - /// we can ignore arguments for count() - found_match = true; - res.push_back({&candidate, DataTypes()}); - break; - } + /// we can ignore arguments for count() + found_match = true; + res.push_back({&candidate, DataTypes()}); + break; } /// Now, function names and types matched. diff --git a/tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.reference b/tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.sql b/tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.sql new file mode 100644 index 00000000000..048d725e0a0 --- /dev/null +++ b/tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE test (`val` LowCardinality(Nullable(String))) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192; + +insert into test select number == 3 ? 'some value' : null from numbers(5); + +SELECT count(val) FROM test SETTINGS optimize_use_implicit_projections = 1; + +DROP TABLE test; From 65de310137a4e192499119128aa069375eb007c8 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 19 Jul 2023 06:15:57 +0000 Subject: [PATCH 1722/1997] Return back SystemLogBase --- src/Common/SystemLogBase.cpp | 40 +++++++++++++++++++++++++++++++++- src/Common/SystemLogBase.h | 33 ++++++++++++++++++++++++++++ src/Interpreters/SystemLog.cpp | 29 ++---------------------- src/Interpreters/SystemLog.h | 16 +++----------- 4 files changed, 77 insertions(+), 41 deletions(-) diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index 1d0673e30dd..baee7021c35 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -202,7 +202,45 @@ void SystemLogQueue::shutdown() flush_event.notify_all(); } -#define INSTANTIATE_SYSTEM_LOG_BASE(ELEMENT) template class SystemLogQueue; +template +SystemLogBase::SystemLogBase( + const String& name, + size_t flush_interval_milliseconds_, + std::shared_ptr> queue_) + : queue(queue_ ? queue_ : std::make_shared>(name, flush_interval_milliseconds_)) +{ +} + +template +void SystemLogBase::startup() +{ + std::lock_guard lock(queue->mutex); + saving_thread = std::make_unique([this] { savingThreadFunction(); }); +} + +template +void SystemLogBase::add(const LogElement & element) +{ + queue->push(element); +} + +template +void SystemLogBase::flush(bool force) +{ + uint64_t this_thread_requested_offset = queue->notifyFlush(force); + if (this_thread_requested_offset == uint64_t(-1)) + return; + + queue->waitFlush(this_thread_requested_offset); +} + +template +void SystemLogBase::notifyFlush(bool force) { queue->notifyFlush(force); } + +#define INSTANTIATE_SYSTEM_LOG_BASE(ELEMENT) template class SystemLogBase; SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE) +#define INSTANTIATE_SYSTEM_LOG_QUEUE(ELEMENT) template class SystemLogQueue; +SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_QUEUE) + } diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index 9436137d4a8..5718182e115 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -121,4 +121,37 @@ private: const size_t flush_interval_milliseconds; }; + + +template +class SystemLogBase : public ISystemLog +{ +public: + using Self = SystemLogBase; + + SystemLogBase( + const String& name, + size_t flush_interval_milliseconds_, + std::shared_ptr> queue_ = nullptr); + + void startup() override; + + /** Append a record into log. + * Writing to table will be done asynchronously and in case of failure, record could be lost. + */ + void add(const LogElement & element); + + /// Flush data in the buffer to disk. Block the thread until the data is stored on disk. + void flush(bool force) override; + + /// Non-blocking flush data in the buffer to disk. + void notifyFlush(bool force); + + String getName() const override { return LogElement::name(); } + + static const char * getDefaultOrderBy() { return "event_date, event_time"; } + +protected: + std::shared_ptr> queue; +}; } diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 3193baa551f..674210cbaad 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -334,23 +334,16 @@ SystemLog::SystemLog( const String & storage_def_, size_t flush_interval_milliseconds_, std::shared_ptr> queue_) - : WithContext(context_) + : Base(database_name_ + "." + table_name_, flush_interval_milliseconds_, queue_) + , WithContext(context_) , log(&Poco::Logger::get("SystemLog (" + database_name_ + "." + table_name_ + ")")) , table_id(database_name_, table_name_) , storage_def(storage_def_) , create_query(serializeAST(*getCreateTableQuery())) - , queue(queue_ ? queue_ : std::make_shared>(database_name_ + "." + table_name_, flush_interval_milliseconds_)) { assert(database_name_ == DatabaseCatalog::SYSTEM_DATABASE); } -template -void SystemLog::startup() -{ - std::lock_guard lock(queue->mutex); - saving_thread = std::make_unique([this] { savingThreadFunction(); }); -} - template void SystemLog::shutdown() { @@ -618,24 +611,6 @@ ASTPtr SystemLog::getCreateTableQuery() return create; } -template -void SystemLog::add(const LogElement & element) -{ - queue->push(element); -} - -template -void SystemLog::flush(bool force) -{ - uint64_t this_thread_requested_offset = queue->notifyFlush(force); - if (this_thread_requested_offset == uint64_t(-1)) - return; - - queue->waitFlush(this_thread_requested_offset); -} - -template -void SystemLog::notifyFlush(bool force) { queue->notifyFlush(force); } #define INSTANTIATE_SYSTEM_LOG(ELEMENT) template class SystemLog; SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG) diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 6f61e075b49..91fb7f49221 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -89,10 +89,11 @@ struct SystemLogs template -class SystemLog : public ISystemLog, private boost::noncopyable, WithContext +class SystemLog : public SystemLogBase, private boost::noncopyable, WithContext { public: using Self = SystemLog; + using Base = SystemLogBase; /** Parameter: table name where to write log. * If table is not exists, then it get created with specified engine. @@ -110,23 +111,12 @@ public: size_t flush_interval_milliseconds_, std::shared_ptr> queue_ = nullptr); - void startup() override; /** Append a record into log. * Writing to table will be done asynchronously and in case of failure, record could be lost. */ - void add(const LogElement & element); void shutdown() override; - String getName() const override { return LogElement::name(); } - static const char * getDefaultOrderBy() { return "event_date, event_time"; } - - /// Flush data in the buffer to disk. Block the thread until the data is stored on disk. - void flush(bool force) override; - - /// Non-blocking flush data in the buffer to disk. - void notifyFlush(bool force); - void stopFlushThread() override; protected: @@ -134,6 +124,7 @@ protected: using ISystemLog::is_shutdown; using ISystemLog::saving_thread; + using Base::queue; private: @@ -144,7 +135,6 @@ private: String create_query; String old_create_query; bool is_prepared = false; - std::shared_ptr> queue; /** Creates new table if it does not exist. * Renames old table if its structure is not suitable. From ee0453ed00ab5ecb232557e29d4e1f6365d83cd0 Mon Sep 17 00:00:00 2001 From: Chen768959 <67011523+Chen768959@users.noreply.github.com> Date: Wed, 19 Jul 2023 14:18:50 +0800 Subject: [PATCH 1723/1997] fix issue#50582 tests Reproduced issue #50582, which occurs when sorting column contains constants and triggers the FinishSortingTransform. --- .../02815_fix_not_found_constants_col_in_block.reference | 2 ++ .../02815_fix_not_found_constants_col_in_block.sql | 5 +++++ 2 files changed, 7 insertions(+) create mode 100644 tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.reference create mode 100644 tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.sql diff --git a/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.reference b/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.reference new file mode 100644 index 00000000000..f2d4d23d9e3 --- /dev/null +++ b/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.reference @@ -0,0 +1,2 @@ +\N 1 19000 +\N 1 19000 diff --git a/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.sql b/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.sql new file mode 100644 index 00000000000..c56d59c72d6 --- /dev/null +++ b/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.sql @@ -0,0 +1,5 @@ +DROP TABLE IF EXISTS t0; +CREATE TABLE t0 (vkey UInt32, c0 Float32, primary key(c0)) engine = AggregatingMergeTree; +insert into t0 values (19000, 1); +select null as c_2_0, ref_2.c0 as c_2_1, ref_2.vkey as c_2_2 from t0 as ref_2 order by c_2_0 asc, c_2_1 asc, c_2_2 asc; +select null as c_2_0, ref_2.c0 as c_2_1, ref_2.vkey as c_2_2 from t0 as ref_2 order by c_2_0 asc, c_2_1 asc; From d601d86fad94250ca3b749baa4478679cd6e1973 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 19 Jul 2023 07:22:25 +0000 Subject: [PATCH 1724/1997] Remove empty line --- src/Common/SystemLogBase.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index 5718182e115..fa9f9b6f72e 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -122,7 +122,6 @@ private: }; - template class SystemLogBase : public ISystemLog { From 629e0e0269dc96f88f781eb8a0a711667d50c92b Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 19 Jul 2023 16:06:21 +0800 Subject: [PATCH 1725/1997] Fix projection analysis with primary key analysis --- .../QueryPlan/Optimizations/optimizeTree.cpp | 8 +++++--- .../QueryPlan/Optimizations/projectionsCommon.cpp | 3 ++- .../QueryPlan/Optimizations/projectionsCommon.h | 1 - ..._projection_with_query_plan_optimization.reference | 1 + ...normal_projection_with_query_plan_optimization.sql | 11 +++++++++++ 5 files changed, 19 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/01710_normal_projection_with_query_plan_optimization.reference create mode 100644 tests/queries/0_stateless/01710_normal_projection_with_query_plan_optimization.sql diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index 6cb76d540f7..01d192bb1f3 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -146,8 +146,13 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s } } + /// NOTE: optimizePrewhere can modify the stack. + optimizePrewhere(stack, nodes); + optimizePrimaryKeyCondition(stack); + if (optimization_settings.optimize_projection) { + /// Normal projection optimization relies on PK optimization if (optimizeUseNormalProjections(stack, nodes)) { ++num_applied_projection; @@ -164,9 +169,6 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s } } - /// NOTE: optimizePrewhere can modify the stack. - optimizePrewhere(stack, nodes); - optimizePrimaryKeyCondition(stack); enableMemoryBoundMerging(*stack.back().node, nodes); stack.pop_back(); diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index cb76ffa84ba..7ddda29cad4 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -131,7 +131,8 @@ bool QueryDAG::buildImpl(QueryPlan::Node & node, ActionsDAG::NodeRawConstPtrs & if (prewhere_info->prewhere_actions) { appendExpression(prewhere_info->prewhere_actions); - if (const auto * filter_expression = findInOutputs(*dag, prewhere_info->prewhere_column_name, prewhere_info->remove_prewhere_column)) + if (const auto * filter_expression + = findInOutputs(*dag, prewhere_info->prewhere_column_name, prewhere_info->remove_prewhere_column)) filter_nodes.push_back(filter_expression); else return false; diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h index 1e9ab67c8fe..35daccad115 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h @@ -38,7 +38,6 @@ std::shared_ptr getMaxAddedBlocks(ReadFromMergeTree * rea /// This is a common DAG which is a merge of DAGs from Filter and Expression steps chain. /// Additionally, for all the Filter steps, we collect filter conditions into filter_nodes. -/// Flag remove_last_filter_node is set in case if the last step is a Filter step and it should remove filter column. struct QueryDAG { ActionsDAGPtr dag; diff --git a/tests/queries/0_stateless/01710_normal_projection_with_query_plan_optimization.reference b/tests/queries/0_stateless/01710_normal_projection_with_query_plan_optimization.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01710_normal_projection_with_query_plan_optimization.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01710_normal_projection_with_query_plan_optimization.sql b/tests/queries/0_stateless/01710_normal_projection_with_query_plan_optimization.sql new file mode 100644 index 00000000000..30306ec5637 --- /dev/null +++ b/tests/queries/0_stateless/01710_normal_projection_with_query_plan_optimization.sql @@ -0,0 +1,11 @@ +drop table if exists t; + +CREATE TABLE t (id UInt64, id2 UInt64, id3 UInt64, PROJECTION t_reverse (SELECT id, id2, id3 ORDER BY id2, id, id3)) ENGINE = MergeTree ORDER BY (id) settings index_granularity = 4; + +insert into t SELECT number, -number, number FROM numbers(10000); + +set max_rows_to_read = 4; + +select count() from t where id = 3; + +drop table t; From 549026f0ae8041ba40f4557922c480f2f07715bf Mon Sep 17 00:00:00 2001 From: Chen768959 <67011523+Chen768959@users.noreply.github.com> Date: Wed, 19 Jul 2023 16:11:14 +0800 Subject: [PATCH 1726/1997] fix style error fix Trailing whitespaces --- src/Processors/Transforms/FinishSortingTransform.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Processors/Transforms/FinishSortingTransform.cpp b/src/Processors/Transforms/FinishSortingTransform.cpp index dd61472bc37..066928446f2 100644 --- a/src/Processors/Transforms/FinishSortingTransform.cpp +++ b/src/Processors/Transforms/FinishSortingTransform.cpp @@ -50,7 +50,6 @@ FinishSortingTransform::FinishSortingTransform( description_sorted_without_constants.push_back(column_description); } } - /// The target description is modified in SortingTransform constructor. /// To avoid doing the same actions with description_sorted just copy it from prefix of target description. for (const auto & column_sort_desc : description_sorted_without_constants) From a86baab88b4444d5bf34e529bb737817daa20096 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Wed, 19 Jul 2023 07:49:30 +0000 Subject: [PATCH 1727/1997] Fix test_replicated_database 'node doesn't exist' flakiness --- .../test_replicated_database/test.py | 148 +++++++++--------- 1 file changed, 75 insertions(+), 73 deletions(-) diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index b3ba8d4737f..17dd2adcde4 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -132,14 +132,15 @@ def test_create_replicated_table(started_cluster): @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_simple_alter_table(started_cluster, engine): + database = f"test_simple_alter_table_{engine}" main_node.query( - "CREATE DATABASE test_simple_alter_table ENGINE = Replicated('/test/simple_alter_table', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE test_simple_alter_table ENGINE = Replicated('/test/simple_alter_table', 'shard1', 'replica2');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica2');" ) # test_simple_alter_table - name = "test_simple_alter_table.alter_test_{}".format(engine) + name = f"{database}.alter_test" main_node.query( "CREATE TABLE {} " "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) " @@ -187,10 +188,9 @@ def test_simple_alter_table(started_cluster, engine): # test_create_replica_after_delay competing_node.query( - "CREATE DATABASE IF NOT EXISTS test_simple_alter_table ENGINE = Replicated('/test/simple_alter_table', 'shard1', 'replica3');" + f"CREATE DATABASE IF NOT EXISTS {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica3');" ) - name = "test_simple_alter_table.alter_test_{}".format(engine) main_node.query("ALTER TABLE {} ADD COLUMN Added3 UInt32;".format(name)) main_node.query("ALTER TABLE {} DROP COLUMN AddedNested1;".format(name)) main_node.query("ALTER TABLE {} RENAME COLUMN Added1 TO AddedNested1;".format(name)) @@ -210,21 +210,23 @@ def test_simple_alter_table(started_cluster, engine): ) assert_create_query([main_node, dummy_node, competing_node], name, expected) - main_node.query("DROP DATABASE test_simple_alter_table SYNC") - dummy_node.query("DROP DATABASE test_simple_alter_table SYNC") - competing_node.query("DROP DATABASE test_simple_alter_table SYNC") + main_node.query(f"DROP DATABASE {database} SYNC") + dummy_node.query(f"DROP DATABASE {database} SYNC") + competing_node.query(f"DROP DATABASE {database} SYNC") @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_delete_from_table(started_cluster, engine): + database = f"delete_from_table_{engine}" + main_node.query( - "CREATE DATABASE delete_from_table ENGINE = Replicated('/test/simple_alter_table', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE delete_from_table ENGINE = Replicated('/test/simple_alter_table', 'shard2', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard2', 'replica1');" ) - name = "delete_from_table.delete_test_{}".format(engine) + name = f"{database}.delete_test" main_node.query( "CREATE TABLE {} " "(id UInt64, value String) " @@ -241,7 +243,7 @@ def test_delete_from_table(started_cluster, engine): table_for_select = name if not "Replicated" in engine: - table_for_select = "cluster('delete_from_table', {})".format(name) + table_for_select = f"cluster('{database}', {name})" for node in [main_node, dummy_node]: assert_eq_with_retry( node, @@ -249,8 +251,8 @@ def test_delete_from_table(started_cluster, engine): expected, ) - main_node.query("DROP DATABASE delete_from_table SYNC") - dummy_node.query("DROP DATABASE delete_from_table SYNC") + main_node.query(f"DROP DATABASE {database} SYNC") + dummy_node.query(f"DROP DATABASE {database} SYNC") def get_table_uuid(database, name): @@ -278,18 +280,18 @@ def fixture_attachable_part(started_cluster): @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_alter_attach(started_cluster, attachable_part, engine): + database = f"alter_attach_{engine}" main_node.query( - "CREATE DATABASE alter_attach ENGINE = Replicated('/test/alter_attach', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE alter_attach ENGINE = Replicated('/test/alter_attach', 'shard1', 'replica2');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica2');" ) - name = "alter_attach_test_{}".format(engine) main_node.query( - f"CREATE TABLE alter_attach.{name} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" + f"CREATE TABLE {database}.alter_attach_test (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" ) - table_uuid = get_table_uuid("alter_attach", name) + table_uuid = get_table_uuid(database, "alter_attach_test") # Provide and attach a part to the main node shutil.copytree( attachable_part, @@ -298,146 +300,146 @@ def test_alter_attach(started_cluster, attachable_part, engine): f"database/store/{table_uuid[:3]}/{table_uuid}/detached/all_1_1_0", ), ) - main_node.query(f"ALTER TABLE alter_attach.{name} ATTACH PART 'all_1_1_0'") + main_node.query(f"ALTER TABLE {database}.alter_attach_test ATTACH PART 'all_1_1_0'") # On the main node, data is attached - assert main_node.query(f"SELECT CounterID FROM alter_attach.{name}") == "123\n" + assert main_node.query(f"SELECT CounterID FROM {database}.alter_attach_test") == "123\n" # On the other node, data is replicated only if using a Replicated table engine if engine == "ReplicatedMergeTree": - assert dummy_node.query(f"SELECT CounterID FROM alter_attach.{name}") == "123\n" + assert dummy_node.query(f"SELECT CounterID FROM {database}.alter_attach_test") == "123\n" else: - assert dummy_node.query(f"SELECT CounterID FROM alter_attach.{name}") == "" - main_node.query("DROP DATABASE alter_attach SYNC") - dummy_node.query("DROP DATABASE alter_attach SYNC") + assert dummy_node.query(f"SELECT CounterID FROM {database}.alter_attach_test") == "" + main_node.query(f"DROP DATABASE {database} SYNC") + dummy_node.query(f"DROP DATABASE {database} SYNC") @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_alter_drop_part(started_cluster, engine): + database = f"alter_drop_part_{engine}" main_node.query( - "CREATE DATABASE alter_drop_part ENGINE = Replicated('/test/alter_drop_part', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE alter_drop_part ENGINE = Replicated('/test/alter_drop_part', 'shard1', 'replica2');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica2');" ) - table = f"alter_drop_{engine}" part_name = "all_0_0_0" if engine == "ReplicatedMergeTree" else "all_1_1_0" main_node.query( - f"CREATE TABLE alter_drop_part.{table} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" + f"CREATE TABLE {database}.alter_drop_part (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" ) - main_node.query(f"INSERT INTO alter_drop_part.{table} VALUES (123)") + main_node.query(f"INSERT INTO {database}.alter_drop_part VALUES (123)") if engine == "MergeTree": - dummy_node.query(f"INSERT INTO alter_drop_part.{table} VALUES (456)") - main_node.query(f"ALTER TABLE alter_drop_part.{table} DROP PART '{part_name}'") - assert main_node.query(f"SELECT CounterID FROM alter_drop_part.{table}") == "" + dummy_node.query(f"INSERT INTO {database}.alter_drop_part VALUES (456)") + main_node.query(f"ALTER TABLE {database}.alter_drop_part DROP PART '{part_name}'") + assert main_node.query(f"SELECT CounterID FROM {database}.alter_drop_part") == "" if engine == "ReplicatedMergeTree": # The DROP operation is still replicated at the table engine level - assert dummy_node.query(f"SELECT CounterID FROM alter_drop_part.{table}") == "" + assert dummy_node.query(f"SELECT CounterID FROM {database}.alter_drop_part") == "" else: assert ( - dummy_node.query(f"SELECT CounterID FROM alter_drop_part.{table}") + dummy_node.query(f"SELECT CounterID FROM {database}.alter_drop_part") == "456\n" ) - main_node.query("DROP DATABASE alter_drop_part SYNC") - dummy_node.query("DROP DATABASE alter_drop_part SYNC") + main_node.query(f"DROP DATABASE {database} SYNC") + dummy_node.query(f"DROP DATABASE {database} SYNC") @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_alter_detach_part(started_cluster, engine): + database = f"alter_detach_part_{engine}" main_node.query( - "CREATE DATABASE alter_detach_part ENGINE = Replicated('/test/alter_detach_part', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE alter_detach_part ENGINE = Replicated('/test/alter_detach_part', 'shard1', 'replica2');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica2');" ) - table = f"alter_detach_{engine}" part_name = "all_0_0_0" if engine == "ReplicatedMergeTree" else "all_1_1_0" main_node.query( - f"CREATE TABLE alter_detach_part.{table} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" + f"CREATE TABLE {database}.alter_detach (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" ) - main_node.query(f"INSERT INTO alter_detach_part.{table} VALUES (123)") + main_node.query(f"INSERT INTO {database}.alter_detach VALUES (123)") if engine == "MergeTree": - dummy_node.query(f"INSERT INTO alter_detach_part.{table} VALUES (456)") - main_node.query(f"ALTER TABLE alter_detach_part.{table} DETACH PART '{part_name}'") - detached_parts_query = f"SELECT name FROM system.detached_parts WHERE database='alter_detach_part' AND table='{table}'" + dummy_node.query(f"INSERT INTO {database}.alter_detach VALUES (456)") + main_node.query(f"ALTER TABLE {database}.alter_detach DETACH PART '{part_name}'") + detached_parts_query = f"SELECT name FROM system.detached_parts WHERE database='{database}' AND table='alter_detach'" assert main_node.query(detached_parts_query) == f"{part_name}\n" if engine == "ReplicatedMergeTree": # The detach operation is still replicated at the table engine level assert dummy_node.query(detached_parts_query) == f"{part_name}\n" else: assert dummy_node.query(detached_parts_query) == "" - main_node.query("DROP DATABASE alter_detach_part SYNC") - dummy_node.query("DROP DATABASE alter_detach_part SYNC") + main_node.query(f"DROP DATABASE {database} SYNC") + dummy_node.query(f"DROP DATABASE {database} SYNC") @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_alter_drop_detached_part(started_cluster, engine): + database = f"alter_drop_detached_part_{engine}" main_node.query( - "CREATE DATABASE alter_drop_detached_part ENGINE = Replicated('/test/alter_drop_detached_part', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE alter_drop_detached_part ENGINE = Replicated('/test/alter_drop_detached_part', 'shard1', 'replica2');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica2');" ) - table = f"alter_drop_detached_{engine}" part_name = "all_0_0_0" if engine == "ReplicatedMergeTree" else "all_1_1_0" main_node.query( - f"CREATE TABLE alter_drop_detached_part.{table} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" + f"CREATE TABLE {database}.alter_drop_detached (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" ) - main_node.query(f"INSERT INTO alter_drop_detached_part.{table} VALUES (123)") + main_node.query(f"INSERT INTO {database}.alter_drop_detached VALUES (123)") main_node.query( - f"ALTER TABLE alter_drop_detached_part.{table} DETACH PART '{part_name}'" + f"ALTER TABLE {database}.alter_drop_detached DETACH PART '{part_name}'" ) if engine == "MergeTree": - dummy_node.query(f"INSERT INTO alter_drop_detached_part.{table} VALUES (456)") + dummy_node.query(f"INSERT INTO {database}.alter_drop_detached VALUES (456)") dummy_node.query( - f"ALTER TABLE alter_drop_detached_part.{table} DETACH PART '{part_name}'" + f"ALTER TABLE {database}.alter_drop_detached DETACH PART '{part_name}'" ) main_node.query( - f"ALTER TABLE alter_drop_detached_part.{table} DROP DETACHED PART '{part_name}'" + f"ALTER TABLE {database}.alter_drop_detached DROP DETACHED PART '{part_name}'" ) - detached_parts_query = f"SELECT name FROM system.detached_parts WHERE database='alter_drop_detached_part' AND table='{table}'" + detached_parts_query = f"SELECT name FROM system.detached_parts WHERE database='{database}' AND table='alter_drop_detached'" assert main_node.query(detached_parts_query) == "" assert dummy_node.query(detached_parts_query) == f"{part_name}\n" - main_node.query("DROP DATABASE alter_drop_detached_part SYNC") - dummy_node.query("DROP DATABASE alter_drop_detached_part SYNC") + main_node.query(f"DROP DATABASE {database} SYNC") + dummy_node.query(f"DROP DATABASE {database} SYNC") @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_alter_drop_partition(started_cluster, engine): + database = f"alter_drop_partition_{engine}" main_node.query( - "CREATE DATABASE alter_drop_partition ENGINE = Replicated('/test/alter_drop_partition', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE alter_drop_partition ENGINE = Replicated('/test/alter_drop_partition', 'shard1', 'replica2');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica2');" ) snapshotting_node.query( - "CREATE DATABASE alter_drop_partition ENGINE = Replicated('/test/alter_drop_partition', 'shard2', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard2', 'replica1');" ) - table = f"alter_drop_partition.alter_drop_{engine}" main_node.query( - f"CREATE TABLE {table} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" + f"CREATE TABLE {database}.alter_drop (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" ) - main_node.query(f"INSERT INTO {table} VALUES (123)") + main_node.query(f"INSERT INTO {database}.alter_drop VALUES (123)") if engine == "MergeTree": - dummy_node.query(f"INSERT INTO {table} VALUES (456)") - snapshotting_node.query(f"INSERT INTO {table} VALUES (789)") + dummy_node.query(f"INSERT INTO {database}.alter_drop VALUES (456)") + snapshotting_node.query(f"INSERT INTO {database}.alter_drop VALUES (789)") main_node.query( - f"ALTER TABLE {table} ON CLUSTER alter_drop_partition DROP PARTITION ID 'all'", + f"ALTER TABLE {database}.alter_drop ON CLUSTER {database} DROP PARTITION ID 'all'", settings={"replication_alter_partitions_sync": 2}, ) assert ( main_node.query( - f"SELECT CounterID FROM clusterAllReplicas('alter_drop_partition', {table})" + f"SELECT CounterID FROM clusterAllReplicas('{database}', {database}.alter_drop)" ) == "" ) - assert dummy_node.query(f"SELECT CounterID FROM {table}") == "" - main_node.query("DROP DATABASE alter_drop_partition") - dummy_node.query("DROP DATABASE alter_drop_partition") - snapshotting_node.query("DROP DATABASE alter_drop_partition") + assert dummy_node.query(f"SELECT CounterID FROM {database}.alter_drop") == "" + main_node.query(f"DROP DATABASE {database}") + dummy_node.query(f"DROP DATABASE {database}") + snapshotting_node.query(f"DROP DATABASE {database}") def test_alter_fetch(started_cluster): From 8b0fc8283460f5678e733cef9803937ff9913177 Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Sat, 29 Apr 2023 21:23:55 +0000 Subject: [PATCH 1728/1997] test_for_basic_auth_registry - mock up --- .../runner/compose/docker_compose_kafka.yml | 10 +++++++++ tests/integration/helpers/cluster.py | 22 ++++++++++++------- .../secrets/password | 1 + .../secrets/schema_registry_jaas.conf | 5 +++++ .../test_format_avro_confluent/test.py | 18 ++++++++++----- 5 files changed, 43 insertions(+), 13 deletions(-) create mode 100644 tests/integration/test_format_avro_confluent/secrets/password create mode 100644 tests/integration/test_format_avro_confluent/secrets/schema_registry_jaas.conf diff --git a/docker/test/integration/runner/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml index 7e34f4c114d..39247f1dd37 100644 --- a/docker/test/integration/runner/compose/docker_compose_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kafka.yml @@ -39,7 +39,17 @@ services: environment: SCHEMA_REGISTRY_HOST_NAME: schema-registry SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT + # SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: BASIC SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092 + SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC + # SCHEMA_REGISTRY_BASIC_AUTH_CREDENTIALS_SOURCE: USER_INFO + # SCHEMA_REGISTRY_BASIC_AUTH_USER_INFO: fred:letmein + # SCHEMA_REGISTRY_SCHEMA_REGISTRY_BASIC_AUTH_USER_INFO: fred:letmein + SCHEMA_REGISTRY_AUTHENTICATION_ROLES: schemaadmin, schemauser + SCHEMA_REGISTRY_AUTHENTICATION_REALM: RealmFooBar + SCHEMA_REGISTRY_OPTS: "-Djava.security.auth.login.config=/etc/schema-registry/secrets/schema_registry_jaas.conf" + volumes: + - ${SCHEMA_REGISTRY_DIR:-}/secrets:/etc/schema-registry/secrets depends_on: - kafka_zookeeper - kafka1 diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 0614cbf0e0d..9b5b33b1968 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -1159,10 +1159,13 @@ class ClickHouseCluster: ] return self.base_kerberized_hdfs_cmd - def setup_kafka_cmd(self, instance, env_variables, docker_compose_yml_dir): + def setup_kafka_cmd( + self, instance, env_variables, docker_compose_yml_dir + ): self.with_kafka = True env_variables["KAFKA_HOST"] = self.kafka_host env_variables["KAFKA_EXTERNAL_PORT"] = str(self.kafka_port) + env_variables["SCHEMA_REGISTRY_DIR"] = instance.path + "/" env_variables["SCHEMA_REGISTRY_EXTERNAL_PORT"] = str(self.schema_registry_port) env_variables["SCHEMA_REGISTRY_INTERNAL_PORT"] = "8081" self.base_cmd.extend( @@ -1498,6 +1501,7 @@ class ClickHouseCluster: with_kafka=False, with_kerberized_kafka=False, with_kerberos_kdc=False, + with_secrets=False, with_rabbitmq=False, with_nats=False, clickhouse_path_dir=None, @@ -1604,6 +1608,7 @@ class ClickHouseCluster: with_nats=with_nats, with_nginx=with_nginx, with_kerberized_hdfs=with_kerberized_hdfs, + with_secrets=with_secrets or with_kerberized_hdfs or with_kerberos_kdc or with_kerberized_kafka, with_mongo=with_mongo or with_mongo_secure, with_meili=with_meili, with_redis=with_redis, @@ -3135,6 +3140,7 @@ class ClickHouseInstance: with_nats, with_nginx, with_kerberized_hdfs, + with_secrets, with_mongo, with_meili, with_redis, @@ -3197,7 +3203,7 @@ class ClickHouseInstance: if clickhouse_path_dir else None ) - self.kerberos_secrets_dir = p.abspath(p.join(base_path, "secrets")) + self.secrets_dir = p.abspath(p.join(base_path, "secrets")) self.macros = macros if macros is not None else {} self.with_zookeeper = with_zookeeper self.zookeeper_config_path = zookeeper_config_path @@ -3220,6 +3226,7 @@ class ClickHouseInstance: self.with_nats = with_nats self.with_nginx = with_nginx self.with_kerberized_hdfs = with_kerberized_hdfs + self.with_secrets = with_secrets self.with_mongo = with_mongo self.with_meili = with_meili self.with_redis = with_redis @@ -4217,17 +4224,16 @@ class ClickHouseInstance: if self.with_zookeeper: shutil.copy(self.zookeeper_config_path, conf_d_dir) - if ( - self.with_kerberized_kafka - or self.with_kerberized_hdfs - or self.with_kerberos_kdc - ): + if self.with_secrets: if self.with_kerberos_kdc: base_secrets_dir = self.cluster.instances_dir else: base_secrets_dir = self.path + from_dir=self.secrets_dir + to_dir=p.abspath(p.join(base_secrets_dir, "secrets")) + logging.debug(f"Copy secret from {from_dir} to {to_dir}") shutil.copytree( - self.kerberos_secrets_dir, + self.secrets_dir, p.abspath(p.join(base_secrets_dir, "secrets")), dirs_exist_ok=True, ) diff --git a/tests/integration/test_format_avro_confluent/secrets/password b/tests/integration/test_format_avro_confluent/secrets/password new file mode 100644 index 00000000000..8903cf6edd6 --- /dev/null +++ b/tests/integration/test_format_avro_confluent/secrets/password @@ -0,0 +1 @@ +schemauser: MD5:0d107d09f5bbe40cade3de5c71e9e9b7,user diff --git a/tests/integration/test_format_avro_confluent/secrets/schema_registry_jaas.conf b/tests/integration/test_format_avro_confluent/secrets/schema_registry_jaas.conf new file mode 100644 index 00000000000..7d0e6e2bf35 --- /dev/null +++ b/tests/integration/test_format_avro_confluent/secrets/schema_registry_jaas.conf @@ -0,0 +1,5 @@ +RealmFooBar { + org.eclipse.jetty.jaas.spi.PropertyFileLoginModule required + file="/etc/schema-registry/secrets/password" + debug="true"; +}; \ No newline at end of file diff --git a/tests/integration/test_format_avro_confluent/test.py b/tests/integration/test_format_avro_confluent/test.py index 42b7ddce193..921fbdf5ef4 100644 --- a/tests/integration/test_format_avro_confluent/test.py +++ b/tests/integration/test_format_avro_confluent/test.py @@ -14,7 +14,7 @@ from helpers.cluster import ClickHouseCluster, ClickHouseInstance def started_cluster(): try: cluster = ClickHouseCluster(__file__) - cluster.add_instance("dummy", with_kafka=True) + cluster.add_instance("dummy", with_kafka=True, with_secrets=True) logging.info("Starting cluster...") cluster.start() logging.info("Cluster started") @@ -39,10 +39,13 @@ def run_query(instance, query, data=None, settings=None): def test_select(started_cluster): # type: (ClickHouseCluster) -> None + input("Cluster created, press any key to destroy...") - schema_registry_client = CachedSchemaRegistryClient( - "http://localhost:{}".format(started_cluster.schema_registry_port) - ) + reg_url="http://localhost:{}".format( + started_cluster.schema_registry_port) + arg={'url':reg_url,'basic.auth.credentials.source':'USER_INFO','basic.auth.user.info':'schemauser:letmein'} + + schema_registry_client = CachedSchemaRegistryClient(arg) serializer = MessageSerializer(schema_registry_client) schema = avro.schema.make_avsc_object( @@ -62,7 +65,12 @@ def test_select(started_cluster): data = buf.getvalue() instance = started_cluster.instances["dummy"] # type: ClickHouseInstance - schema_registry_url = "http://{}:{}".format( + # schema_registry_url = "http://{}:{}@{}:{}".format( + # 'schemauser', 'letmein', + # started_cluster.schema_registry_host, 8081 + # ) + schema_registry_url = "http://{}:{}@{}:{}".format( + 'schemauser', 'letmein', started_cluster.schema_registry_host, 8081 ) From 1564eace38072417bf2c188d7c0a0c0e55321626 Mon Sep 17 00:00:00 2001 From: dheerajathrey Date: Thu, 7 Jul 2022 13:48:25 +0530 Subject: [PATCH 1729/1997] enable url-encoded basic auth to fetch avro schema in kafka --- .../Formats/Impl/AvroRowInputFormat.cpp | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 4cd73cb23b5..fe795608970 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -52,6 +52,8 @@ #include #include #include +#include +#include #include #include #include @@ -934,6 +936,29 @@ private: Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, url.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); request.setHost(url.getHost()); + if (!url.getUserInfo().empty()) { + Poco::Net::HTTPCredentials http_credentials; + Poco::Net::HTTPBasicCredentials http_basic_credentials; + std::string decoded_username; + std::string decoded_password; + + http_credentials.fromUserInfo(url.getUserInfo()); + + if (!http_credentials.getPassword().empty()) { + Poco::URI::decode(http_credentials.getUsername(), decoded_username); + Poco::URI::decode(http_credentials.getPassword(), decoded_password); + + http_basic_credentials.setUsername(decoded_username); + http_basic_credentials.setPassword(decoded_password); + } + else { + Poco::URI::decode(http_credentials.getUsername(), decoded_username); + http_basic_credentials.setUsername(decoded_username); + } + + http_basic_credentials.authenticate(request); + } + auto session = makePooledHTTPSession(url, timeouts, 1); session->sendRequest(request); From 8e1de7897a0f950a44b9c67b5d7d97b47d380f25 Mon Sep 17 00:00:00 2001 From: dheerajathrey Date: Wed, 24 Aug 2022 19:19:09 +0530 Subject: [PATCH 1730/1997] indentation fix --- src/Processors/Formats/Impl/AvroRowInputFormat.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index fe795608970..318ba3cb443 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -936,7 +936,8 @@ private: Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, url.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); request.setHost(url.getHost()); - if (!url.getUserInfo().empty()) { + if (!url.getUserInfo().empty()) + { Poco::Net::HTTPCredentials http_credentials; Poco::Net::HTTPBasicCredentials http_basic_credentials; std::string decoded_username; @@ -944,14 +945,16 @@ private: http_credentials.fromUserInfo(url.getUserInfo()); - if (!http_credentials.getPassword().empty()) { + if (!http_credentials.getPassword().empty()) + { Poco::URI::decode(http_credentials.getUsername(), decoded_username); Poco::URI::decode(http_credentials.getPassword(), decoded_password); http_basic_credentials.setUsername(decoded_username); http_basic_credentials.setPassword(decoded_password); } - else { + else + { Poco::URI::decode(http_credentials.getUsername(), decoded_username); http_basic_credentials.setUsername(decoded_username); } From e3523cb1a463931513cb7f3edc9937d64ae82331 Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Tue, 2 May 2023 12:31:00 +0000 Subject: [PATCH 1731/1997] test_for_basic_auth_registry - attempt to have two kafka instances --- .../runner/compose/docker_compose_kafka.yml | 44 +++++++++++-- tests/integration/helpers/cluster.py | 58 +++++++++++++++-- .../test_format_avro_confluent/test.py | 65 +++++++++++++++---- 3 files changed, 145 insertions(+), 22 deletions(-) diff --git a/docker/test/integration/runner/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml index 39247f1dd37..fc476c09378 100644 --- a/docker/test/integration/runner/compose/docker_compose_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kafka.yml @@ -31,6 +31,28 @@ services: security_opt: - label:disable + kafka2: + image: confluentinc/cp-kafka:5.2.0 + hostname: kafka2 + ports: + - ${KAFKA2_EXTERNAL_PORT:-8082}:${KAFKA2_EXTERNAL_PORT:-8082} + environment: + # KAFKA_EXTERNAL_PORT: ${KAFKA2_EXTERNAL_PORT} + # KAFKA_HOST: ${KAFKA2_HOST} + KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:${KAFKA2_EXTERNAL_PORT},OUTSIDE://kafka2:19093 + KAFKA_ADVERTISED_HOST_NAME: kafka2 + KAFKA_LISTENERS: INSIDE://0.0.0.0:${KAFKA_EXTERNAL_PORT},OUTSIDE://0.0.0.0:19093 + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT + KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE + KAFKA_BROKER_ID: 2 + KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181" + KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + depends_on: + - kafka_zookeeper + security_opt: + - label:disable + schema-registry: image: confluentinc/cp-schema-registry:5.2.0 hostname: schema-registry @@ -39,12 +61,24 @@ services: environment: SCHEMA_REGISTRY_HOST_NAME: schema-registry SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT - # SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: BASIC SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092 + depends_on: + - kafka_zookeeper + - kafka1 + security_opt: + - label:disable + + schema-registry-auth: + image: confluentinc/cp-schema-registry:5.2.0 + hostname: schema-registry-auth + ports: + - ${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT:-12313}:${SCHEMA_REGISTRY_INTERNAL_PORT:-12313} + environment: + SCHEMA_REGISTRY_EXTERNAL_PORT: ${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT} + SCHEMA_REGISTRY_HOST_NAME: schema-registry-auth + SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT + SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka2:19093 SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC - # SCHEMA_REGISTRY_BASIC_AUTH_CREDENTIALS_SOURCE: USER_INFO - # SCHEMA_REGISTRY_BASIC_AUTH_USER_INFO: fred:letmein - # SCHEMA_REGISTRY_SCHEMA_REGISTRY_BASIC_AUTH_USER_INFO: fred:letmein SCHEMA_REGISTRY_AUTHENTICATION_ROLES: schemaadmin, schemauser SCHEMA_REGISTRY_AUTHENTICATION_REALM: RealmFooBar SCHEMA_REGISTRY_OPTS: "-Djava.security.auth.login.config=/etc/schema-registry/secrets/schema_registry_jaas.conf" @@ -52,6 +86,6 @@ services: - ${SCHEMA_REGISTRY_DIR:-}/secrets:/etc/schema-registry/secrets depends_on: - kafka_zookeeper - - kafka1 + - kafka2 security_opt: - label:disable diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 9b5b33b1968..e0286f6e5c9 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -481,12 +481,18 @@ class ClickHouseCluster: # available when with_kafka == True self.kafka_host = "kafka1" + self.kafka2_host = "kafka2" self.kafka_dir = os.path.join(self.instances_dir, "kafka") self._kafka_port = 0 + self._kafka2_port = 0 self.kafka_docker_id = None + self.kafka2_docker_id = None self.schema_registry_host = "schema-registry" self._schema_registry_port = 0 + self.schema_registry_auth_host = "schema-registry-auth" + self._schema_registry_auth_port = 0 self.kafka_docker_id = self.get_instance_docker_id(self.kafka_host) + self.kafka2_docker_id = self.get_instance_docker_id(self.kafka2_host) self.coredns_host = "coredns" @@ -650,6 +656,13 @@ class ClickHouseCluster: self._kafka_port = get_free_port() return self._kafka_port + @property + def kafka2_port(self): + if self._kafka2_port: + return self._kafka2_port + self._kafka2_port = get_free_port() + return self._kafka2_port + @property def schema_registry_port(self): if self._schema_registry_port: @@ -657,6 +670,13 @@ class ClickHouseCluster: self._schema_registry_port = get_free_port() return self._schema_registry_port + @property + def schema_registry_auth_port(self): + if self._schema_registry_auth_port: + return self._schema_registry_auth_port + self._schema_registry_auth_port = get_free_port() + return self._schema_registry_auth_port + @property def kerberized_kafka_port(self): if self._kerberized_kafka_port: @@ -1164,10 +1184,13 @@ class ClickHouseCluster: ): self.with_kafka = True env_variables["KAFKA_HOST"] = self.kafka_host + env_variables["KAFKA2_HOST"] = self.kafka2_host env_variables["KAFKA_EXTERNAL_PORT"] = str(self.kafka_port) + env_variables["KAFKA2_EXTERNAL_PORT"] = str(self.kafka2_port) env_variables["SCHEMA_REGISTRY_DIR"] = instance.path + "/" env_variables["SCHEMA_REGISTRY_EXTERNAL_PORT"] = str(self.schema_registry_port) env_variables["SCHEMA_REGISTRY_INTERNAL_PORT"] = "8081" + env_variables["SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT"] = str(self.schema_registry_auth_port) self.base_cmd.extend( ["--file", p.join(docker_compose_yml_dir, "docker_compose_kafka.yml")] ) @@ -2498,20 +2521,44 @@ class ClickHouseCluster: raise Exception("Can't wait Azurite to start") def wait_schema_registry_to_start(self, timeout=180): - sr_client = CachedSchemaRegistryClient( - {"url": "http://localhost:{}".format(self.schema_registry_port)} - ) + reg_url="http://localhost:{}".format(self.schema_registry_port) + arg={'url':reg_url} + sr_client = CachedSchemaRegistryClient(arg) + start = time.time() + sr_started = False + sr_auth_started = False while time.time() - start < timeout: try: sr_client._send_request(sr_client.url) logging.debug("Connected to SchemaRegistry") - return sr_client + sr_started = True + break except Exception as ex: logging.debug(("Can't connect to SchemaRegistry: %s", str(ex))) time.sleep(1) - raise Exception("Can't wait Schema Registry to start") + if not sr_started: + raise Exception("Can't wait Schema Registry to start") + + + auth_reg_url="http://localhost:{}".format(self.schema_registry_auth_port) + auth_arg={'url':auth_reg_url,'basic.auth.credentials.source':'USER_INFO','basic.auth.user.info':'schemauser:letmein'} + + + sr_auth_client = CachedSchemaRegistryClient(auth_arg) + while time.time() - start < timeout: + try: + sr_auth_client._send_request(sr_auth_client.url) + logging.debug("Connected to SchemaRegistry with auth") + sr_auth_started = True + break + except Exception as ex: + logging.debug(("Can't connect to SchemaRegistry with auth: %s", str(ex))) + time.sleep(1) + + if not sr_auth_started: + raise Exception("Can't wait Schema Registry with auth to start") def wait_cassandra_to_start(self, timeout=180): self.cassandra_ip = self.get_instance_ip(self.cassandra_host) @@ -2718,6 +2765,7 @@ class ClickHouseCluster: ) self.up_called = True self.wait_kafka_is_available(self.kafka_docker_id, self.kafka_port) + self.wait_kafka_is_available(self.kafka2_docker_id, self.kafka2_port) self.wait_schema_registry_to_start() if self.with_kerberized_kafka and self.base_kerberized_kafka_cmd: diff --git a/tests/integration/test_format_avro_confluent/test.py b/tests/integration/test_format_avro_confluent/test.py index 921fbdf5ef4..7261ce1b97d 100644 --- a/tests/integration/test_format_avro_confluent/test.py +++ b/tests/integration/test_format_avro_confluent/test.py @@ -37,15 +37,18 @@ def run_query(instance, query, data=None, settings=None): return result + + # reg_url="http://localhost:{}".format(started_cluster.schema_registry_port) + # arg={'url':reg_url} + # schema_registry_client = CachedSchemaRegistryClient(arg) + + def test_select(started_cluster): # type: (ClickHouseCluster) -> None - input("Cluster created, press any key to destroy...") - reg_url="http://localhost:{}".format( - started_cluster.schema_registry_port) - arg={'url':reg_url,'basic.auth.credentials.source':'USER_INFO','basic.auth.user.info':'schemauser:letmein'} - - schema_registry_client = CachedSchemaRegistryClient(arg) + schema_registry_client = CachedSchemaRegistryClient( + "http://localhost:{}".format(started_cluster.schema_registry_port) + ) serializer = MessageSerializer(schema_registry_client) schema = avro.schema.make_avsc_object( @@ -65,12 +68,7 @@ def test_select(started_cluster): data = buf.getvalue() instance = started_cluster.instances["dummy"] # type: ClickHouseInstance - # schema_registry_url = "http://{}:{}@{}:{}".format( - # 'schemauser', 'letmein', - # started_cluster.schema_registry_host, 8081 - # ) - schema_registry_url = "http://{}:{}@{}:{}".format( - 'schemauser', 'letmein', + schema_registry_url = "http://{}:{}".format( started_cluster.schema_registry_host, 8081 ) @@ -83,3 +81,46 @@ def test_select(started_cluster): ["1"], ["2"], ] + + +# def test_select_auth(started_cluster): +# # type: (ClickHouseCluster) -> None + +# reg_url="http://localhost:{}".format( +# started_cluster.schema_registry_auth_port) +# arg={'url':reg_url,'basic.auth.credentials.source':'USER_INFO','basic.auth.user.info':'schemauser:letmein'} + +# schema_registry_client = CachedSchemaRegistryClient(arg) +# serializer = MessageSerializer(schema_registry_client) + +# schema = avro.schema.make_avsc_object( +# { +# "name": "test_record", +# "type": "record", +# "fields": [{"name": "value", "type": "long"}], +# } +# ) + +# buf = io.BytesIO() +# for x in range(0, 3): +# message = serializer.encode_record_with_schema( +# "test_subject", schema, {"value": x} +# ) +# buf.write(message) +# data = buf.getvalue() + +# instance = started_cluster.instances["dummy"] # type: ClickHouseInstance +# schema_registry_url = "http://{}:{}@{}:{}".format( +# 'schemauser', 'letmein', +# started_cluster.schema_registry_auth_host, 8081 +# ) + +# run_query(instance, "create table avro_data_auth(value Int64) engine = Memory()") +# settings = {"format_avro_schema_registry_url": schema_registry_url} +# run_query(instance, "insert into avro_data_auth format AvroConfluent", data, settings) +# stdout = run_query(instance, "select * from avro_data_auth") +# assert list(map(str.split, stdout.splitlines())) == [ +# ["0"], +# ["1"], +# ["2"], +# ] From fb3a860d7f02ddf321875eefefeeaeb46b265bf9 Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Fri, 5 May 2023 10:56:35 +0000 Subject: [PATCH 1732/1997] test_for_basic_auth_registry - one kafka instance again --- .../runner/compose/docker_compose_kafka.yml | 49 ++++++++++--------- tests/integration/helpers/cluster.py | 2 +- 2 files changed, 27 insertions(+), 24 deletions(-) diff --git a/docker/test/integration/runner/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml index fc476c09378..e0b58fee73d 100644 --- a/docker/test/integration/runner/compose/docker_compose_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kafka.yml @@ -31,27 +31,27 @@ services: security_opt: - label:disable - kafka2: - image: confluentinc/cp-kafka:5.2.0 - hostname: kafka2 - ports: - - ${KAFKA2_EXTERNAL_PORT:-8082}:${KAFKA2_EXTERNAL_PORT:-8082} - environment: - # KAFKA_EXTERNAL_PORT: ${KAFKA2_EXTERNAL_PORT} - # KAFKA_HOST: ${KAFKA2_HOST} - KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:${KAFKA2_EXTERNAL_PORT},OUTSIDE://kafka2:19093 - KAFKA_ADVERTISED_HOST_NAME: kafka2 - KAFKA_LISTENERS: INSIDE://0.0.0.0:${KAFKA_EXTERNAL_PORT},OUTSIDE://0.0.0.0:19093 - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT - KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE - KAFKA_BROKER_ID: 2 - KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181" - KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 - depends_on: - - kafka_zookeeper - security_opt: - - label:disable + # kafka2: + # image: confluentinc/cp-kafka:5.2.0 + # hostname: kafka2 + # ports: + # - ${KAFKA2_EXTERNAL_PORT:-8082}:${KAFKA2_EXTERNAL_PORT:-8082} + # environment: + # # KAFKA_EXTERNAL_PORT: ${KAFKA2_EXTERNAL_PORT} + # # KAFKA_HOST: ${KAFKA2_HOST} + # KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:${KAFKA2_EXTERNAL_PORT},OUTSIDE://kafka2:19093 + # KAFKA_ADVERTISED_HOST_NAME: kafka2 + # KAFKA_LISTENERS: INSIDE://0.0.0.0:${KAFKA_EXTERNAL_PORT},OUTSIDE://0.0.0.0:19093 + # KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT + # KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE + # KAFKA_BROKER_ID: 2 + # KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181" + # KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" + # KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + # depends_on: + # - kafka_zookeeper + # security_opt: + # - label:disable schema-registry: image: confluentinc/cp-schema-registry:5.2.0 @@ -77,15 +77,18 @@ services: SCHEMA_REGISTRY_EXTERNAL_PORT: ${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT} SCHEMA_REGISTRY_HOST_NAME: schema-registry-auth SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT - SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka2:19093 + SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092 SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC SCHEMA_REGISTRY_AUTHENTICATION_ROLES: schemaadmin, schemauser SCHEMA_REGISTRY_AUTHENTICATION_REALM: RealmFooBar SCHEMA_REGISTRY_OPTS: "-Djava.security.auth.login.config=/etc/schema-registry/secrets/schema_registry_jaas.conf" + SCHEMA_REGISTRY_GROUP_ID: auth + SCHEMA_REGISTRY_ZK_NAMESPACE: auth + SCHEMA_REGISTRY_KAFKASTORE_TOPIC: _schemaauth volumes: - ${SCHEMA_REGISTRY_DIR:-}/secrets:/etc/schema-registry/secrets depends_on: - kafka_zookeeper - - kafka2 + - kafka1 security_opt: - label:disable diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index e0286f6e5c9..e261364ab05 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -2765,7 +2765,7 @@ class ClickHouseCluster: ) self.up_called = True self.wait_kafka_is_available(self.kafka_docker_id, self.kafka_port) - self.wait_kafka_is_available(self.kafka2_docker_id, self.kafka2_port) + # self.wait_kafka_is_available(self.kafka2_docker_id, self.kafka2_port) self.wait_schema_registry_to_start() if self.with_kerberized_kafka and self.base_kerberized_kafka_cmd: From db8e96147a9deb92364c8276577dedf68b7653a5 Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Fri, 5 May 2023 17:52:15 +0000 Subject: [PATCH 1733/1997] test_for_basic_auth_registry - started, but only auth test works --- .../runner/compose/docker_compose_kafka.yml | 94 +++++++++---------- .../test_format_avro_confluent/test.py | 27 ++++-- 2 files changed, 60 insertions(+), 61 deletions(-) diff --git a/docker/test/integration/runner/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml index e0b58fee73d..47e41812cf5 100644 --- a/docker/test/integration/runner/compose/docker_compose_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kafka.yml @@ -1,90 +1,80 @@ version: '2.3' services: + # kafka_zookeeper: + # image: zookeeper:3.4.9 + # hostname: kafka_zookeeper + # environment: + # ZOO_MY_ID: 1 + # ZOO_PORT: 2181 + # ZOO_SERVERS: server.1=kafka_zookeeper:2888:3888 + # security_opt: + # - label:disable kafka_zookeeper: - image: zookeeper:3.4.9 - hostname: kafka_zookeeper + image: confluentinc/cp-zookeeper + ports: + - 2181:2181 environment: - ZOO_MY_ID: 1 - ZOO_PORT: 2181 - ZOO_SERVERS: server.1=kafka_zookeeper:2888:3888 - security_opt: - - label:disable - + ZOOKEEPER_CLIENT_PORT: 2181 + # security_opt: + # - label:disable kafka1: - image: confluentinc/cp-kafka:5.2.0 + image: confluentinc/cp-kafka hostname: kafka1 ports: - - ${KAFKA_EXTERNAL_PORT:-8081}:${KAFKA_EXTERNAL_PORT:-8081} + - ${KAFKA_EXTERNAL_PORT}:${KAFKA_EXTERNAL_PORT} environment: - KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:${KAFKA_EXTERNAL_PORT},OUTSIDE://kafka1:19092 + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT, PLAINTEXT_HOST:PLAINTEXT + # KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT + KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka1:19092, PLAINTEXT_HOST://localhost:${KAFKA_EXTERNAL_PORT} KAFKA_ADVERTISED_HOST_NAME: kafka1 - KAFKA_LISTENERS: INSIDE://0.0.0.0:${KAFKA_EXTERNAL_PORT},OUTSIDE://0.0.0.0:19092 - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT - KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE - KAFKA_BROKER_ID: 1 - KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181" - KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" + # KAFKA_LISTENERS: INSIDE://0.0.0.0:${KAFKA_EXTERNAL_PORT},OUTSIDE://0.0.0.0:19092 + KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT + # KAFKA_BROKER_ID: 1 + KAFKA_ZOOKEEPER_CONNECT: kafka_zookeeper:2181 + # KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 depends_on: - kafka_zookeeper - security_opt: - - label:disable - - # kafka2: - # image: confluentinc/cp-kafka:5.2.0 - # hostname: kafka2 - # ports: - # - ${KAFKA2_EXTERNAL_PORT:-8082}:${KAFKA2_EXTERNAL_PORT:-8082} - # environment: - # # KAFKA_EXTERNAL_PORT: ${KAFKA2_EXTERNAL_PORT} - # # KAFKA_HOST: ${KAFKA2_HOST} - # KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:${KAFKA2_EXTERNAL_PORT},OUTSIDE://kafka2:19093 - # KAFKA_ADVERTISED_HOST_NAME: kafka2 - # KAFKA_LISTENERS: INSIDE://0.0.0.0:${KAFKA_EXTERNAL_PORT},OUTSIDE://0.0.0.0:19093 - # KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT - # KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE - # KAFKA_BROKER_ID: 2 - # KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181" - # KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" - # KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 - # depends_on: - # - kafka_zookeeper - # security_opt: - # - label:disable + # security_opt: + # - label:disable schema-registry: image: confluentinc/cp-schema-registry:5.2.0 hostname: schema-registry ports: - - ${SCHEMA_REGISTRY_EXTERNAL_PORT:-12313}:${SCHEMA_REGISTRY_INTERNAL_PORT:-12313} + - ${SCHEMA_REGISTRY_EXTERNAL_PORT}:${SCHEMA_REGISTRY_EXTERNAL_PORT} environment: SCHEMA_REGISTRY_HOST_NAME: schema-registry - SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT + # SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092 + SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: kafka_zookeeper:2181 + SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_EXTERNAL_PORT:-12313} depends_on: - kafka_zookeeper - kafka1 - security_opt: - - label:disable + # security_opt: + # - label:disable schema-registry-auth: image: confluentinc/cp-schema-registry:5.2.0 hostname: schema-registry-auth ports: - - ${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT:-12313}:${SCHEMA_REGISTRY_INTERNAL_PORT:-12313} + - ${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT} environment: - SCHEMA_REGISTRY_EXTERNAL_PORT: ${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT} + # SCHEMA_REGISTRY_EXTERNAL_PORT: ${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT} SCHEMA_REGISTRY_HOST_NAME: schema-registry-auth - SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT + SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT} + # SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092 + SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: kafka_zookeeper:2181 SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC - SCHEMA_REGISTRY_AUTHENTICATION_ROLES: schemaadmin, schemauser + SCHEMA_REGISTRY_AUTHENTICATION_ROLES: user SCHEMA_REGISTRY_AUTHENTICATION_REALM: RealmFooBar SCHEMA_REGISTRY_OPTS: "-Djava.security.auth.login.config=/etc/schema-registry/secrets/schema_registry_jaas.conf" - SCHEMA_REGISTRY_GROUP_ID: auth - SCHEMA_REGISTRY_ZK_NAMESPACE: auth - SCHEMA_REGISTRY_KAFKASTORE_TOPIC: _schemaauth + # SCHEMA_REGISTRY_GROUP_ID: auth + SCHEMA_REGISTRY_ZK_NAMESPACE: schema_registry_auth + SCHEMA_REGISTRY_KAFKASTORE_TOPIC: _schemas2 volumes: - ${SCHEMA_REGISTRY_DIR:-}/secrets:/etc/schema-registry/secrets depends_on: diff --git a/tests/integration/test_format_avro_confluent/test.py b/tests/integration/test_format_avro_confluent/test.py index 7261ce1b97d..cd0906bedee 100644 --- a/tests/integration/test_format_avro_confluent/test.py +++ b/tests/integration/test_format_avro_confluent/test.py @@ -1,5 +1,6 @@ import io import logging +import time import avro.schema import pytest @@ -46,14 +47,21 @@ def run_query(instance, query, data=None, settings=None): def test_select(started_cluster): # type: (ClickHouseCluster) -> None - schema_registry_client = CachedSchemaRegistryClient( - "http://localhost:{}".format(started_cluster.schema_registry_port) - ) + time.sleep(3) + + # schema_registry_client = CachedSchemaRegistryClient( + # "http://localhost:{}".format(started_cluster.schema_registry_port) + # ) + reg_url="http://localhost:{}".format( + started_cluster.schema_registry_port) + arg={'url':reg_url} + + schema_registry_client = CachedSchemaRegistryClient(arg) serializer = MessageSerializer(schema_registry_client) schema = avro.schema.make_avsc_object( { - "name": "test_record", + "name": "test_record1", "type": "record", "fields": [{"name": "value", "type": "long"}], } @@ -62,14 +70,14 @@ def test_select(started_cluster): buf = io.BytesIO() for x in range(0, 3): message = serializer.encode_record_with_schema( - "test_subject", schema, {"value": x} + "test_subject1", schema, {"value": x} ) buf.write(message) data = buf.getvalue() instance = started_cluster.instances["dummy"] # type: ClickHouseInstance schema_registry_url = "http://{}:{}".format( - started_cluster.schema_registry_host, 8081 + started_cluster.schema_registry_host, started_cluster.schema_registry_port ) run_query(instance, "create table avro_data(value Int64) engine = Memory()") @@ -85,6 +93,7 @@ def test_select(started_cluster): # def test_select_auth(started_cluster): # # type: (ClickHouseCluster) -> None +# time.sleep(5) # reg_url="http://localhost:{}".format( # started_cluster.schema_registry_auth_port) @@ -95,7 +104,7 @@ def test_select(started_cluster): # schema = avro.schema.make_avsc_object( # { -# "name": "test_record", +# "name": "test_record_auth", # "type": "record", # "fields": [{"name": "value", "type": "long"}], # } @@ -104,7 +113,7 @@ def test_select(started_cluster): # buf = io.BytesIO() # for x in range(0, 3): # message = serializer.encode_record_with_schema( -# "test_subject", schema, {"value": x} +# "test_subject_auth", schema, {"value": x} # ) # buf.write(message) # data = buf.getvalue() @@ -112,7 +121,7 @@ def test_select(started_cluster): # instance = started_cluster.instances["dummy"] # type: ClickHouseInstance # schema_registry_url = "http://{}:{}@{}:{}".format( # 'schemauser', 'letmein', -# started_cluster.schema_registry_auth_host, 8081 +# started_cluster.schema_registry_auth_host, started_cluster.schema_registry_auth_port # ) # run_query(instance, "create table avro_data_auth(value Int64) engine = Memory()") From 66581d091af3eda08591e12af551e83a88a95520 Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Mon, 8 May 2023 07:08:04 +0000 Subject: [PATCH 1734/1997] test_for_basic_auth_registry - both tests works, simplifications --- .../runner/compose/docker_compose_kafka.yml | 46 +++---- tests/integration/helpers/cluster.py | 70 +++------- .../secrets/password | 1 + .../test_format_avro_confluent/test.py | 122 ++++++++++++------ 4 files changed, 116 insertions(+), 123 deletions(-) diff --git a/docker/test/integration/runner/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml index 47e41812cf5..5e2e9d87c39 100644 --- a/docker/test/integration/runner/compose/docker_compose_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kafka.yml @@ -1,43 +1,33 @@ version: '2.3' services: - # kafka_zookeeper: - # image: zookeeper:3.4.9 - # hostname: kafka_zookeeper - # environment: - # ZOO_MY_ID: 1 - # ZOO_PORT: 2181 - # ZOO_SERVERS: server.1=kafka_zookeeper:2888:3888 - # security_opt: - # - label:disable kafka_zookeeper: - image: confluentinc/cp-zookeeper + image: zookeeper:3.4.9 + hostname: kafka_zookeeper ports: - 2181:2181 environment: - ZOOKEEPER_CLIENT_PORT: 2181 - # security_opt: - # - label:disable + ZOOKEEPER_CLIENT_PORT: 2181 + security_opt: + - label:disable + kafka1: - image: confluentinc/cp-kafka + image: confluentinc/cp-kafka:5.2.0 hostname: kafka1 ports: - ${KAFKA_EXTERNAL_PORT}:${KAFKA_EXTERNAL_PORT} environment: KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT, PLAINTEXT_HOST:PLAINTEXT - # KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka1:19092, PLAINTEXT_HOST://localhost:${KAFKA_EXTERNAL_PORT} KAFKA_ADVERTISED_HOST_NAME: kafka1 - # KAFKA_LISTENERS: INSIDE://0.0.0.0:${KAFKA_EXTERNAL_PORT},OUTSIDE://0.0.0.0:19092 KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT - # KAFKA_BROKER_ID: 1 KAFKA_ZOOKEEPER_CONNECT: kafka_zookeeper:2181 - # KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" + KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 depends_on: - kafka_zookeeper - # security_opt: - # - label:disable + security_opt: + - label:disable schema-registry: image: confluentinc/cp-schema-registry:5.2.0 @@ -46,15 +36,14 @@ services: - ${SCHEMA_REGISTRY_EXTERNAL_PORT}:${SCHEMA_REGISTRY_EXTERNAL_PORT} environment: SCHEMA_REGISTRY_HOST_NAME: schema-registry - # SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092 - SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: kafka_zookeeper:2181 - SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_EXTERNAL_PORT:-12313} + SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_EXTERNAL_PORT} + SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: noauth depends_on: - kafka_zookeeper - kafka1 - # security_opt: - # - label:disable + security_opt: + - label:disable schema-registry-auth: image: confluentinc/cp-schema-registry:5.2.0 @@ -62,19 +51,14 @@ services: ports: - ${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT} environment: - # SCHEMA_REGISTRY_EXTERNAL_PORT: ${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT} SCHEMA_REGISTRY_HOST_NAME: schema-registry-auth SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT} - # SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092 - SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: kafka_zookeeper:2181 SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC SCHEMA_REGISTRY_AUTHENTICATION_ROLES: user SCHEMA_REGISTRY_AUTHENTICATION_REALM: RealmFooBar SCHEMA_REGISTRY_OPTS: "-Djava.security.auth.login.config=/etc/schema-registry/secrets/schema_registry_jaas.conf" - # SCHEMA_REGISTRY_GROUP_ID: auth - SCHEMA_REGISTRY_ZK_NAMESPACE: schema_registry_auth - SCHEMA_REGISTRY_KAFKASTORE_TOPIC: _schemas2 + SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: auth volumes: - ${SCHEMA_REGISTRY_DIR:-}/secrets:/etc/schema-registry/secrets depends_on: diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index e261364ab05..c51c97ee6c4 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -481,18 +481,14 @@ class ClickHouseCluster: # available when with_kafka == True self.kafka_host = "kafka1" - self.kafka2_host = "kafka2" self.kafka_dir = os.path.join(self.instances_dir, "kafka") self._kafka_port = 0 - self._kafka2_port = 0 self.kafka_docker_id = None - self.kafka2_docker_id = None self.schema_registry_host = "schema-registry" self._schema_registry_port = 0 self.schema_registry_auth_host = "schema-registry-auth" self._schema_registry_auth_port = 0 self.kafka_docker_id = self.get_instance_docker_id(self.kafka_host) - self.kafka2_docker_id = self.get_instance_docker_id(self.kafka2_host) self.coredns_host = "coredns" @@ -656,13 +652,6 @@ class ClickHouseCluster: self._kafka_port = get_free_port() return self._kafka_port - @property - def kafka2_port(self): - if self._kafka2_port: - return self._kafka2_port - self._kafka2_port = get_free_port() - return self._kafka2_port - @property def schema_registry_port(self): if self._schema_registry_port: @@ -1184,12 +1173,9 @@ class ClickHouseCluster: ): self.with_kafka = True env_variables["KAFKA_HOST"] = self.kafka_host - env_variables["KAFKA2_HOST"] = self.kafka2_host env_variables["KAFKA_EXTERNAL_PORT"] = str(self.kafka_port) - env_variables["KAFKA2_EXTERNAL_PORT"] = str(self.kafka2_port) env_variables["SCHEMA_REGISTRY_DIR"] = instance.path + "/" env_variables["SCHEMA_REGISTRY_EXTERNAL_PORT"] = str(self.schema_registry_port) - env_variables["SCHEMA_REGISTRY_INTERNAL_PORT"] = "8081" env_variables["SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT"] = str(self.schema_registry_auth_port) self.base_cmd.extend( ["--file", p.join(docker_compose_yml_dir, "docker_compose_kafka.yml")] @@ -2521,44 +2507,27 @@ class ClickHouseCluster: raise Exception("Can't wait Azurite to start") def wait_schema_registry_to_start(self, timeout=180): - reg_url="http://localhost:{}".format(self.schema_registry_port) - arg={'url':reg_url} - sr_client = CachedSchemaRegistryClient(arg) + for port in self.schema_registry_port, self.schema_registry_auth_port: + reg_url="http://localhost:{}".format(port) + arg={'url':reg_url} + sr_client = CachedSchemaRegistryClient(arg) - start = time.time() - sr_started = False - sr_auth_started = False - while time.time() - start < timeout: - try: - sr_client._send_request(sr_client.url) - logging.debug("Connected to SchemaRegistry") - sr_started = True - break - except Exception as ex: - logging.debug(("Can't connect to SchemaRegistry: %s", str(ex))) - time.sleep(1) + start = time.time() + sr_started = False + sr_auth_started = False + while time.time() - start < timeout: + try: + sr_client._send_request(sr_client.url) + logging.debug("Connected to SchemaRegistry") + # don't care about possible auth errors + sr_started = True + break + except Exception as ex: + logging.debug(("Can't connect to SchemaRegistry: %s", str(ex))) + time.sleep(1) - if not sr_started: - raise Exception("Can't wait Schema Registry to start") - - - auth_reg_url="http://localhost:{}".format(self.schema_registry_auth_port) - auth_arg={'url':auth_reg_url,'basic.auth.credentials.source':'USER_INFO','basic.auth.user.info':'schemauser:letmein'} - - - sr_auth_client = CachedSchemaRegistryClient(auth_arg) - while time.time() - start < timeout: - try: - sr_auth_client._send_request(sr_auth_client.url) - logging.debug("Connected to SchemaRegistry with auth") - sr_auth_started = True - break - except Exception as ex: - logging.debug(("Can't connect to SchemaRegistry with auth: %s", str(ex))) - time.sleep(1) - - if not sr_auth_started: - raise Exception("Can't wait Schema Registry with auth to start") + if not sr_started: + raise Exception("Can't wait Schema Registry to start") def wait_cassandra_to_start(self, timeout=180): self.cassandra_ip = self.get_instance_ip(self.cassandra_host) @@ -2765,7 +2734,6 @@ class ClickHouseCluster: ) self.up_called = True self.wait_kafka_is_available(self.kafka_docker_id, self.kafka_port) - # self.wait_kafka_is_available(self.kafka2_docker_id, self.kafka2_port) self.wait_schema_registry_to_start() if self.with_kerberized_kafka and self.base_kerberized_kafka_cmd: diff --git a/tests/integration/test_format_avro_confluent/secrets/password b/tests/integration/test_format_avro_confluent/secrets/password index 8903cf6edd6..7fde510bf5a 100644 --- a/tests/integration/test_format_avro_confluent/secrets/password +++ b/tests/integration/test_format_avro_confluent/secrets/password @@ -1 +1,2 @@ schemauser: MD5:0d107d09f5bbe40cade3de5c71e9e9b7,user +schemauser/slash: MD5:0d107d09f5bbe40cade3de5c71e9e9b7,user diff --git a/tests/integration/test_format_avro_confluent/test.py b/tests/integration/test_format_avro_confluent/test.py index cd0906bedee..d58f6d972d1 100644 --- a/tests/integration/test_format_avro_confluent/test.py +++ b/tests/integration/test_format_avro_confluent/test.py @@ -9,7 +9,7 @@ from confluent_kafka.avro.cached_schema_registry_client import ( ) from confluent_kafka.avro.serializer.message_serializer import MessageSerializer from helpers.cluster import ClickHouseCluster, ClickHouseInstance - +from urllib import parse @pytest.fixture(scope="module") def started_cluster(): @@ -47,11 +47,8 @@ def run_query(instance, query, data=None, settings=None): def test_select(started_cluster): # type: (ClickHouseCluster) -> None - time.sleep(3) + # input("Top of test_select, press any key") - # schema_registry_client = CachedSchemaRegistryClient( - # "http://localhost:{}".format(started_cluster.schema_registry_port) - # ) reg_url="http://localhost:{}".format( started_cluster.schema_registry_port) arg={'url':reg_url} @@ -91,45 +88,88 @@ def test_select(started_cluster): ] -# def test_select_auth(started_cluster): -# # type: (ClickHouseCluster) -> None -# time.sleep(5) +def test_select_auth(started_cluster): + # type: (ClickHouseCluster) -> None + time.sleep(5) -# reg_url="http://localhost:{}".format( -# started_cluster.schema_registry_auth_port) -# arg={'url':reg_url,'basic.auth.credentials.source':'USER_INFO','basic.auth.user.info':'schemauser:letmein'} + reg_url="http://localhost:{}".format( + started_cluster.schema_registry_auth_port) + arg={'url':reg_url,'basic.auth.credentials.source':'USER_INFO','basic.auth.user.info':'schemauser:letmein'} -# schema_registry_client = CachedSchemaRegistryClient(arg) -# serializer = MessageSerializer(schema_registry_client) + schema_registry_client = CachedSchemaRegistryClient(arg) + serializer = MessageSerializer(schema_registry_client) -# schema = avro.schema.make_avsc_object( -# { -# "name": "test_record_auth", -# "type": "record", -# "fields": [{"name": "value", "type": "long"}], -# } -# ) + schema = avro.schema.make_avsc_object( + { + "name": "test_record_auth", + "type": "record", + "fields": [{"name": "value", "type": "long"}], + } + ) -# buf = io.BytesIO() -# for x in range(0, 3): -# message = serializer.encode_record_with_schema( -# "test_subject_auth", schema, {"value": x} -# ) -# buf.write(message) -# data = buf.getvalue() + buf = io.BytesIO() + for x in range(0, 3): + message = serializer.encode_record_with_schema( + "test_subject_auth", schema, {"value": x} + ) + buf.write(message) + data = buf.getvalue() -# instance = started_cluster.instances["dummy"] # type: ClickHouseInstance -# schema_registry_url = "http://{}:{}@{}:{}".format( -# 'schemauser', 'letmein', -# started_cluster.schema_registry_auth_host, started_cluster.schema_registry_auth_port -# ) + instance = started_cluster.instances["dummy"] # type: ClickHouseInstance + schema_registry_url = "http://{}:{}@{}:{}".format( + 'schemauser', 'letmein', + started_cluster.schema_registry_auth_host, started_cluster.schema_registry_auth_port + ) -# run_query(instance, "create table avro_data_auth(value Int64) engine = Memory()") -# settings = {"format_avro_schema_registry_url": schema_registry_url} -# run_query(instance, "insert into avro_data_auth format AvroConfluent", data, settings) -# stdout = run_query(instance, "select * from avro_data_auth") -# assert list(map(str.split, stdout.splitlines())) == [ -# ["0"], -# ["1"], -# ["2"], -# ] + run_query(instance, "create table avro_data_auth(value Int64) engine = Memory()") + settings = {"format_avro_schema_registry_url": schema_registry_url} + run_query(instance, "insert into avro_data_auth format AvroConfluent", data, settings) + stdout = run_query(instance, "select * from avro_data_auth") + assert list(map(str.split, stdout.splitlines())) == [ + ["0"], + ["1"], + ["2"], + ] + +def test_select_auth_encoded(started_cluster): + # type: (ClickHouseCluster) -> None + time.sleep(5) + + reg_url="http://localhost:{}".format( + started_cluster.schema_registry_auth_port) + arg={'url':reg_url,'basic.auth.credentials.source':'USER_INFO','basic.auth.user.info':'schemauser:letmein'} + + schema_registry_client = CachedSchemaRegistryClient(arg) + serializer = MessageSerializer(schema_registry_client) + + schema = avro.schema.make_avsc_object( + { + "name": "test_record_auth_encoded", + "type": "record", + "fields": [{"name": "value", "type": "long"}], + } + ) + + buf = io.BytesIO() + for x in range(0, 3): + message = serializer.encode_record_with_schema( + "test_subject_auth_encoded", schema, {"value": x} + ) + buf.write(message) + data = buf.getvalue() + + instance = started_cluster.instances["dummy"] # type: ClickHouseInstance + schema_registry_url = "http://{}:{}@{}:{}".format( + parse.quote_plus('schemauser/slash'), parse.quote_plus('letmein'), + started_cluster.schema_registry_auth_host, started_cluster.schema_registry_auth_port + ) + + run_query(instance, "create table avro_data_auth_encoded(value Int64) engine = Memory()") + settings = {"format_avro_schema_registry_url": schema_registry_url} + run_query(instance, "insert into avro_data_auth_encoded format AvroConfluent", data, settings) + stdout = run_query(instance, "select * from avro_data_auth_encoded") + assert list(map(str.split, stdout.splitlines())) == [ + ["0"], + ["1"], + ["2"], + ] From 83569688cba16b80ed959c054fd2f36187c520d4 Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Mon, 8 May 2023 12:59:08 +0000 Subject: [PATCH 1735/1997] test_for_basic_auth_registry - UnknownTopicOrPartitionException --- docker/test/integration/runner/compose/docker_compose_kafka.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/integration/runner/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml index 5e2e9d87c39..d701af1d425 100644 --- a/docker/test/integration/runner/compose/docker_compose_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kafka.yml @@ -42,6 +42,7 @@ services: depends_on: - kafka_zookeeper - kafka1 + restart: always security_opt: - label:disable @@ -64,5 +65,6 @@ services: depends_on: - kafka_zookeeper - kafka1 + restart: always security_opt: - label:disable From c1c5ffa309c20899f81548bd3314233d84eb03e1 Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Mon, 8 May 2023 13:29:19 +0000 Subject: [PATCH 1736/1997] test_for_basic_auth_registry - cpp code small improvement --- .../Formats/Impl/AvroRowInputFormat.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 318ba3cb443..a7efc823fbb 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -940,24 +940,19 @@ private: { Poco::Net::HTTPCredentials http_credentials; Poco::Net::HTTPBasicCredentials http_basic_credentials; - std::string decoded_username; - std::string decoded_password; http_credentials.fromUserInfo(url.getUserInfo()); + std::string decoded_username; + Poco::URI::decode(http_credentials.getUsername(), decoded_username); + http_basic_credentials.setUsername(decoded_username); + if (!http_credentials.getPassword().empty()) { - Poco::URI::decode(http_credentials.getUsername(), decoded_username); + std::string decoded_password; Poco::URI::decode(http_credentials.getPassword(), decoded_password); - - http_basic_credentials.setUsername(decoded_username); http_basic_credentials.setPassword(decoded_password); } - else - { - Poco::URI::decode(http_credentials.getUsername(), decoded_username); - http_basic_credentials.setUsername(decoded_username); - } http_basic_credentials.authenticate(request); } From c550a532e649bfde1382bc7f56cac6a38dee0dee Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Mon, 8 May 2023 20:49:43 +0000 Subject: [PATCH 1737/1997] test_for_basic_auth_registry - black formatter happy + some doc --- .../operations/settings/settings-formats.md | 11 ++++ tests/integration/helpers/cluster.py | 21 ++++---- .../test_format_avro_confluent/test.py | 54 ++++++++++++------- 3 files changed, 59 insertions(+), 27 deletions(-) diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 0915c51806a..637ade17296 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -1325,6 +1325,17 @@ Default value: 0. Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format. +Format: +``` text +http://[user:password@]machine[:port]" +``` + +Examples: +``` text +http://registry.example.com:8081 +http://admin:secret@registry.example.com:8081 +``` + Default value: `Empty`. ### output_format_avro_codec {#output_format_avro_codec} diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index c51c97ee6c4..c52442ecb9c 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -1168,15 +1168,15 @@ class ClickHouseCluster: ] return self.base_kerberized_hdfs_cmd - def setup_kafka_cmd( - self, instance, env_variables, docker_compose_yml_dir - ): + def setup_kafka_cmd(self, instance, env_variables, docker_compose_yml_dir): self.with_kafka = True env_variables["KAFKA_HOST"] = self.kafka_host env_variables["KAFKA_EXTERNAL_PORT"] = str(self.kafka_port) env_variables["SCHEMA_REGISTRY_DIR"] = instance.path + "/" env_variables["SCHEMA_REGISTRY_EXTERNAL_PORT"] = str(self.schema_registry_port) - env_variables["SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT"] = str(self.schema_registry_auth_port) + env_variables["SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT"] = str( + self.schema_registry_auth_port + ) self.base_cmd.extend( ["--file", p.join(docker_compose_yml_dir, "docker_compose_kafka.yml")] ) @@ -1617,7 +1617,10 @@ class ClickHouseCluster: with_nats=with_nats, with_nginx=with_nginx, with_kerberized_hdfs=with_kerberized_hdfs, - with_secrets=with_secrets or with_kerberized_hdfs or with_kerberos_kdc or with_kerberized_kafka, + with_secrets=with_secrets + or with_kerberized_hdfs + or with_kerberos_kdc + or with_kerberized_kafka, with_mongo=with_mongo or with_mongo_secure, with_meili=with_meili, with_redis=with_redis, @@ -2508,8 +2511,8 @@ class ClickHouseCluster: def wait_schema_registry_to_start(self, timeout=180): for port in self.schema_registry_port, self.schema_registry_auth_port: - reg_url="http://localhost:{}".format(port) - arg={'url':reg_url} + reg_url = "http://localhost:{}".format(port) + arg = {"url": reg_url} sr_client = CachedSchemaRegistryClient(arg) start = time.time() @@ -4245,8 +4248,8 @@ class ClickHouseInstance: base_secrets_dir = self.cluster.instances_dir else: base_secrets_dir = self.path - from_dir=self.secrets_dir - to_dir=p.abspath(p.join(base_secrets_dir, "secrets")) + from_dir = self.secrets_dir + to_dir = p.abspath(p.join(base_secrets_dir, "secrets")) logging.debug(f"Copy secret from {from_dir} to {to_dir}") shutil.copytree( self.secrets_dir, diff --git a/tests/integration/test_format_avro_confluent/test.py b/tests/integration/test_format_avro_confluent/test.py index d58f6d972d1..61d839ee63e 100644 --- a/tests/integration/test_format_avro_confluent/test.py +++ b/tests/integration/test_format_avro_confluent/test.py @@ -11,6 +11,7 @@ from confluent_kafka.avro.serializer.message_serializer import MessageSerializer from helpers.cluster import ClickHouseCluster, ClickHouseInstance from urllib import parse + @pytest.fixture(scope="module") def started_cluster(): try: @@ -37,8 +38,6 @@ def run_query(instance, query, data=None, settings=None): return result - - # reg_url="http://localhost:{}".format(started_cluster.schema_registry_port) # arg={'url':reg_url} # schema_registry_client = CachedSchemaRegistryClient(arg) @@ -49,9 +48,8 @@ def test_select(started_cluster): # input("Top of test_select, press any key") - reg_url="http://localhost:{}".format( - started_cluster.schema_registry_port) - arg={'url':reg_url} + reg_url = "http://localhost:{}".format(started_cluster.schema_registry_port) + arg = {"url": reg_url} schema_registry_client = CachedSchemaRegistryClient(arg) serializer = MessageSerializer(schema_registry_client) @@ -92,9 +90,12 @@ def test_select_auth(started_cluster): # type: (ClickHouseCluster) -> None time.sleep(5) - reg_url="http://localhost:{}".format( - started_cluster.schema_registry_auth_port) - arg={'url':reg_url,'basic.auth.credentials.source':'USER_INFO','basic.auth.user.info':'schemauser:letmein'} + reg_url = "http://localhost:{}".format(started_cluster.schema_registry_auth_port) + arg = { + "url": reg_url, + "basic.auth.credentials.source": "USER_INFO", + "basic.auth.user.info": "schemauser:letmein", + } schema_registry_client = CachedSchemaRegistryClient(arg) serializer = MessageSerializer(schema_registry_client) @@ -117,13 +118,17 @@ def test_select_auth(started_cluster): instance = started_cluster.instances["dummy"] # type: ClickHouseInstance schema_registry_url = "http://{}:{}@{}:{}".format( - 'schemauser', 'letmein', - started_cluster.schema_registry_auth_host, started_cluster.schema_registry_auth_port + "schemauser", + "letmein", + started_cluster.schema_registry_auth_host, + started_cluster.schema_registry_auth_port, ) run_query(instance, "create table avro_data_auth(value Int64) engine = Memory()") settings = {"format_avro_schema_registry_url": schema_registry_url} - run_query(instance, "insert into avro_data_auth format AvroConfluent", data, settings) + run_query( + instance, "insert into avro_data_auth format AvroConfluent", data, settings + ) stdout = run_query(instance, "select * from avro_data_auth") assert list(map(str.split, stdout.splitlines())) == [ ["0"], @@ -131,13 +136,17 @@ def test_select_auth(started_cluster): ["2"], ] + def test_select_auth_encoded(started_cluster): # type: (ClickHouseCluster) -> None time.sleep(5) - reg_url="http://localhost:{}".format( - started_cluster.schema_registry_auth_port) - arg={'url':reg_url,'basic.auth.credentials.source':'USER_INFO','basic.auth.user.info':'schemauser:letmein'} + reg_url = "http://localhost:{}".format(started_cluster.schema_registry_auth_port) + arg = { + "url": reg_url, + "basic.auth.credentials.source": "USER_INFO", + "basic.auth.user.info": "schemauser:letmein", + } schema_registry_client = CachedSchemaRegistryClient(arg) serializer = MessageSerializer(schema_registry_client) @@ -160,13 +169,22 @@ def test_select_auth_encoded(started_cluster): instance = started_cluster.instances["dummy"] # type: ClickHouseInstance schema_registry_url = "http://{}:{}@{}:{}".format( - parse.quote_plus('schemauser/slash'), parse.quote_plus('letmein'), - started_cluster.schema_registry_auth_host, started_cluster.schema_registry_auth_port + parse.quote_plus("schemauser/slash"), + parse.quote_plus("letmein"), + started_cluster.schema_registry_auth_host, + started_cluster.schema_registry_auth_port, ) - run_query(instance, "create table avro_data_auth_encoded(value Int64) engine = Memory()") + run_query( + instance, "create table avro_data_auth_encoded(value Int64) engine = Memory()" + ) settings = {"format_avro_schema_registry_url": schema_registry_url} - run_query(instance, "insert into avro_data_auth_encoded format AvroConfluent", data, settings) + run_query( + instance, + "insert into avro_data_auth_encoded format AvroConfluent", + data, + settings, + ) stdout = run_query(instance, "select * from avro_data_auth_encoded") assert list(map(str.split, stdout.splitlines())) == [ ["0"], From aa2b7e248d719ed94386e5cf066ed03fe71fef12 Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Tue, 9 May 2023 08:37:46 +0000 Subject: [PATCH 1738/1997] test_for_basic_auth_registry - fix port in test_kafka_formats --- .../runner/compose/docker_compose_kafka.yml | 7 ++++--- tests/integration/test_storage_kafka/test.py | 14 +++++++------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/docker/test/integration/runner/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml index d701af1d425..c0185afb7df 100644 --- a/docker/test/integration/runner/compose/docker_compose_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kafka.yml @@ -17,10 +17,11 @@ services: ports: - ${KAFKA_EXTERNAL_PORT}:${KAFKA_EXTERNAL_PORT} environment: - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT, PLAINTEXT_HOST:PLAINTEXT - KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka1:19092, PLAINTEXT_HOST://localhost:${KAFKA_EXTERNAL_PORT} + KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:${KAFKA_EXTERNAL_PORT},OUTSIDE://kafka1:19092 KAFKA_ADVERTISED_HOST_NAME: kafka1 - KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT + KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE + KAFKA_BROKER_ID: 1 KAFKA_ZOOKEEPER_CONNECT: kafka_zookeeper:2181 KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 9a6d3e0513c..d0686c7c36f 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -762,7 +762,7 @@ def test_kafka_formats(kafka_cluster): ), ], "extra_settings": ", format_avro_schema_registry_url='http://{}:{}'".format( - kafka_cluster.schema_registry_host, 8081 + kafka_cluster.schema_registry_host, kafka_cluster.schema_registry_port ), "supports_empty_value": True, }, @@ -4339,7 +4339,7 @@ def test_row_based_formats(kafka_cluster): f""" DROP TABLE IF EXISTS test.view; DROP TABLE IF EXISTS test.kafka; - + CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', @@ -4347,10 +4347,10 @@ def test_row_based_formats(kafka_cluster): kafka_group_name = '{format_name}', kafka_format = '{format_name}', kafka_max_rows_per_message = 5; - + CREATE MATERIALIZED VIEW test.view Engine=Log AS SELECT key, value FROM test.kafka; - + INSERT INTO test.kafka SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}); """ ) @@ -4459,17 +4459,17 @@ def test_block_based_formats_2(kafka_cluster): f""" DROP TABLE IF EXISTS test.view; DROP TABLE IF EXISTS test.kafka; - + CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', kafka_topic_list = '{format_name}', kafka_group_name = '{format_name}', kafka_format = '{format_name}'; - + CREATE MATERIALIZED VIEW test.view Engine=Log AS SELECT key, value FROM test.kafka; - + INSERT INTO test.kafka SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}) settings max_block_size=12, optimize_trivial_insert_select=0; """ ) From 4259176f24b223decafd0d07bef430a30844e850 Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Tue, 9 May 2023 09:23:28 +0000 Subject: [PATCH 1739/1997] test_for_basic_auth_registry - original zk configuration restored --- .../test/integration/runner/compose/docker_compose_kafka.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/test/integration/runner/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml index c0185afb7df..30d1b0bed3f 100644 --- a/docker/test/integration/runner/compose/docker_compose_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kafka.yml @@ -7,7 +7,9 @@ services: ports: - 2181:2181 environment: - ZOOKEEPER_CLIENT_PORT: 2181 + ZOO_MY_ID: 1 + ZOO_PORT: 2181 + ZOO_SERVERS: server.1=kafka_zookeeper:2888:3888 security_opt: - label:disable From f1ce1da00744f17c42d94f1736417474eba478fe Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Thu, 18 May 2023 23:21:29 +0000 Subject: [PATCH 1740/1997] test_for_basic_auth_registry - new test and cleanup per code review --- .../secrets/password | 1 + .../test_format_avro_confluent/test.py | 56 ++++++++++++++++++- 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_format_avro_confluent/secrets/password b/tests/integration/test_format_avro_confluent/secrets/password index 7fde510bf5a..a367925c806 100644 --- a/tests/integration/test_format_avro_confluent/secrets/password +++ b/tests/integration/test_format_avro_confluent/secrets/password @@ -1,2 +1,3 @@ schemauser: MD5:0d107d09f5bbe40cade3de5c71e9e9b7,user schemauser/slash: MD5:0d107d09f5bbe40cade3de5c71e9e9b7,user +complexschemauser: MD5:fcaeda86837fcd37755044e7258edc5d,user diff --git a/tests/integration/test_format_avro_confluent/test.py b/tests/integration/test_format_avro_confluent/test.py index 61d839ee63e..2d78668f000 100644 --- a/tests/integration/test_format_avro_confluent/test.py +++ b/tests/integration/test_format_avro_confluent/test.py @@ -88,7 +88,6 @@ def test_select(started_cluster): def test_select_auth(started_cluster): # type: (ClickHouseCluster) -> None - time.sleep(5) reg_url = "http://localhost:{}".format(started_cluster.schema_registry_auth_port) arg = { @@ -139,7 +138,6 @@ def test_select_auth(started_cluster): def test_select_auth_encoded(started_cluster): # type: (ClickHouseCluster) -> None - time.sleep(5) reg_url = "http://localhost:{}".format(started_cluster.schema_registry_auth_port) arg = { @@ -191,3 +189,57 @@ def test_select_auth_encoded(started_cluster): ["1"], ["2"], ] + +def test_select_auth_encoded_complex(started_cluster): + # type: (ClickHouseCluster) -> None + + reg_url = "http://localhost:{}".format(started_cluster.schema_registry_auth_port) + arg = { + "url": reg_url, + "basic.auth.credentials.source": "USER_INFO", + "basic.auth.user.info": "schemauser:letmein", + } + + schema_registry_client = CachedSchemaRegistryClient(arg) + serializer = MessageSerializer(schema_registry_client) + + schema = avro.schema.make_avsc_object( + { + "name": "test_record_auth_encoded_complex", + "type": "record", + "fields": [{"name": "value", "type": "long"}], + } + ) + + buf = io.BytesIO() + for x in range(0, 3): + message = serializer.encode_record_with_schema( + "test_subject_auth_encoded_complex", schema, {"value": x} + ) + buf.write(message) + data = buf.getvalue() + + instance = started_cluster.instances["dummy"] # type: ClickHouseInstance + schema_registry_url = "http://{}:{}@{}:{}".format( + parse.quote_plus("complexschemauser"), + parse.quote_plus("letmein%@:/"), + started_cluster.schema_registry_auth_host, + started_cluster.schema_registry_auth_port, + ) + + run_query( + instance, "create table avro_data_auth_encoded_complex(value Int64) engine = Memory()" + ) + settings = {"format_avro_schema_registry_url": schema_registry_url} + run_query( + instance, + "insert into avro_data_auth_encoded_complex format AvroConfluent", + data, + settings, + ) + stdout = run_query(instance, "select * from avro_data_auth_encoded_complex") + assert list(map(str.split, stdout.splitlines())) == [ + ["0"], + ["1"], + ["2"], + ] From 9f6ab5e816378dce815957e396cf4389986256e4 Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Fri, 19 May 2023 09:31:24 +0000 Subject: [PATCH 1741/1997] test_for_basic_auth_registry - made black formatter happy --- tests/integration/test_format_avro_confluent/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_format_avro_confluent/test.py b/tests/integration/test_format_avro_confluent/test.py index 2d78668f000..b27642c921b 100644 --- a/tests/integration/test_format_avro_confluent/test.py +++ b/tests/integration/test_format_avro_confluent/test.py @@ -190,6 +190,7 @@ def test_select_auth_encoded(started_cluster): ["2"], ] + def test_select_auth_encoded_complex(started_cluster): # type: (ClickHouseCluster) -> None @@ -228,7 +229,8 @@ def test_select_auth_encoded_complex(started_cluster): ) run_query( - instance, "create table avro_data_auth_encoded_complex(value Int64) engine = Memory()" + instance, + "create table avro_data_auth_encoded_complex(value Int64) engine = Memory()", ) settings = {"format_avro_schema_registry_url": schema_registry_url} run_query( From c8347bd31300bec4cdd3277680f398808d37533c Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Wed, 14 Jun 2023 15:55:44 +0000 Subject: [PATCH 1742/1997] test_for_basic_auth_registry: some comments removed per code review --- tests/integration/test_format_avro_confluent/test.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/integration/test_format_avro_confluent/test.py b/tests/integration/test_format_avro_confluent/test.py index b27642c921b..540f90ae05e 100644 --- a/tests/integration/test_format_avro_confluent/test.py +++ b/tests/integration/test_format_avro_confluent/test.py @@ -38,16 +38,10 @@ def run_query(instance, query, data=None, settings=None): return result - # reg_url="http://localhost:{}".format(started_cluster.schema_registry_port) - # arg={'url':reg_url} - # schema_registry_client = CachedSchemaRegistryClient(arg) - def test_select(started_cluster): # type: (ClickHouseCluster) -> None - # input("Top of test_select, press any key") - reg_url = "http://localhost:{}".format(started_cluster.schema_registry_port) arg = {"url": reg_url} From 2d46052d62bf8b7efd16aeb769e278b9df54971b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 19 Jul 2023 08:35:46 +0000 Subject: [PATCH 1743/1997] Update description of events "QueryCacheHits/Misses" --- src/Common/ProfileEvents.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 0838e0366df..75d1e493873 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -57,8 +57,8 @@ M(TableFunctionExecute, "Number of table function calls.") \ M(MarkCacheHits, "Number of times an entry has been found in the mark cache, so we didn't have to load a mark file.") \ M(MarkCacheMisses, "Number of times an entry has not been found in the mark cache, so we had to load a mark file in memory, which is a costly operation, adding to query latency.") \ - M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided).") \ - M(QueryCacheMisses, "Number of times a query result has not been found in the query cache (and required query computation).") \ + M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided). Only updated for SELECT queries with SETTING use_query_cache = 1.") \ + M(QueryCacheMisses, "Number of times a query result has not been found in the query cache (and required query computation). Only updated for SELECT queries with SETTING use_query_cache = 1.") \ M(CreatedReadBufferOrdinary, "Number of times ordinary read buffer was created for reading data (while choosing among other read methods).") \ M(CreatedReadBufferDirectIO, "Number of times a read buffer with O_DIRECT was created for reading data (while choosing among other read methods).") \ M(CreatedReadBufferDirectIOFailed, "Number of times a read buffer with O_DIRECT was attempted to be created for reading data (while choosing among other read methods), but the OS did not allow it (due to lack of filesystem support or other reasons) and we fallen back to the ordinary reading method.") \ From aa888ad64a95ef801977844b2b253bb8162cfc1a Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 19 Jul 2023 08:46:57 +0000 Subject: [PATCH 1744/1997] Separate thread mutex, add test --- src/Common/SystemLogBase.cpp | 2 +- src/Common/SystemLogBase.h | 3 ++- src/Interpreters/SystemLog.cpp | 2 +- src/Interpreters/SystemLog.h | 1 + tests/queries/0_stateless/02813_starting_in_text_log.reference | 1 + tests/queries/0_stateless/02813_starting_in_text_log.sql | 2 ++ 6 files changed, 8 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02813_starting_in_text_log.reference create mode 100755 tests/queries/0_stateless/02813_starting_in_text_log.sql diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index baee7021c35..bed6d661db7 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -214,7 +214,7 @@ SystemLogBase::SystemLogBase( template void SystemLogBase::startup() { - std::lock_guard lock(queue->mutex); + std::lock_guard lock(thread_mutex); saving_thread = std::make_unique([this] { savingThreadFunction(); }); } diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index fa9f9b6f72e..0ac376769ad 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -67,6 +67,7 @@ public: virtual void savingThreadFunction() = 0; protected: + std::mutex thread_mutex; std::unique_ptr saving_thread; bool is_shutdown = false; @@ -93,10 +94,10 @@ public: Index pop(std::vector& output, bool& should_prepare_tables_anyway, bool& exit_this_thread); void confirm(Index to_flush_end); +private: /// Data shared between callers of add()/flush()/shutdown(), and the saving thread std::mutex mutex; -private: Poco::Logger * log; // Queue is bounded. But its size is quite large to not block in all normal cases. diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 674210cbaad..0b89b1dec26 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -358,7 +358,7 @@ template void SystemLog::stopFlushThread() { { - std::lock_guard lock(queue->mutex); + std::lock_guard lock(thread_mutex); if (!saving_thread || !saving_thread->joinable()) return; diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 91fb7f49221..5d8bb30150d 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -124,6 +124,7 @@ protected: using ISystemLog::is_shutdown; using ISystemLog::saving_thread; + using ISystemLog::thread_mutex; using Base::queue; private: diff --git a/tests/queries/0_stateless/02813_starting_in_text_log.reference b/tests/queries/0_stateless/02813_starting_in_text_log.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02813_starting_in_text_log.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02813_starting_in_text_log.sql b/tests/queries/0_stateless/02813_starting_in_text_log.sql new file mode 100755 index 00000000000..8ef78945a72 --- /dev/null +++ b/tests/queries/0_stateless/02813_starting_in_text_log.sql @@ -0,0 +1,2 @@ +SYSTEM FLUSH LOGS; +SELECT count() > 0 FROM system.text_log WHERE event_date >= yesterday() AND message LIKE '%Application: Starting ClickHouse%'; From 70543e8ef9fe8523c5604d62fac3376da91c6d2c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 19 Jul 2023 08:47:53 +0000 Subject: [PATCH 1745/1997] Automatic style fix --- .../test_replicated_database/test.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index 17dd2adcde4..ed034a326da 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -302,12 +302,21 @@ def test_alter_attach(started_cluster, attachable_part, engine): ) main_node.query(f"ALTER TABLE {database}.alter_attach_test ATTACH PART 'all_1_1_0'") # On the main node, data is attached - assert main_node.query(f"SELECT CounterID FROM {database}.alter_attach_test") == "123\n" + assert ( + main_node.query(f"SELECT CounterID FROM {database}.alter_attach_test") + == "123\n" + ) # On the other node, data is replicated only if using a Replicated table engine if engine == "ReplicatedMergeTree": - assert dummy_node.query(f"SELECT CounterID FROM {database}.alter_attach_test") == "123\n" + assert ( + dummy_node.query(f"SELECT CounterID FROM {database}.alter_attach_test") + == "123\n" + ) else: - assert dummy_node.query(f"SELECT CounterID FROM {database}.alter_attach_test") == "" + assert ( + dummy_node.query(f"SELECT CounterID FROM {database}.alter_attach_test") + == "" + ) main_node.query(f"DROP DATABASE {database} SYNC") dummy_node.query(f"DROP DATABASE {database} SYNC") @@ -333,7 +342,9 @@ def test_alter_drop_part(started_cluster, engine): assert main_node.query(f"SELECT CounterID FROM {database}.alter_drop_part") == "" if engine == "ReplicatedMergeTree": # The DROP operation is still replicated at the table engine level - assert dummy_node.query(f"SELECT CounterID FROM {database}.alter_drop_part") == "" + assert ( + dummy_node.query(f"SELECT CounterID FROM {database}.alter_drop_part") == "" + ) else: assert ( dummy_node.query(f"SELECT CounterID FROM {database}.alter_drop_part") From 96f048f7f8895507c6827f373699244f345730ec Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Wed, 19 Jul 2023 10:46:02 +0200 Subject: [PATCH 1746/1997] Convert output UInt128 to FixedString even if input is empty --- src/Functions/FunctionsHashing.h | 48 +++++++++---------- .../0_stateless/02534_keyed_siphash.reference | 2 + .../0_stateless/02534_keyed_siphash.sql | 3 ++ 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 4965d1f7b49..82944630b10 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -1535,33 +1535,33 @@ public: { auto col_to = ColumnVector::create(input_rows_count); - if (input_rows_count == 0) - return col_to; - - typename ColumnVector::Container & vec_to = col_to->getData(); - - /// If using a "keyed" algorithm, the first argument is the key and - /// the data starts from the second argument. - /// Otherwise there is no key and all arguments are interpreted as data. - constexpr size_t first_data_argument = Keyed; - - if (arguments.size() <= first_data_argument) + if (input_rows_count != 0) { - /// Return a fixed random-looking magic number when input is empty - vec_to.assign(input_rows_count, static_cast(0xe28dbde7fe22e41c)); - } + typename ColumnVector::Container & vec_to = col_to->getData(); - KeyColumnsType key_cols{}; - if constexpr (Keyed) - if (!arguments.empty()) - key_cols = Impl::parseKeyColumns(arguments[0]); + /// If using a "keyed" algorithm, the first argument is the key and + /// the data starts from the second argument. + /// Otherwise there is no key and all arguments are interpreted as data. + constexpr size_t first_data_argument = Keyed; - /// The function supports arbitrary number of arguments of arbitrary types. - bool is_first_argument = true; - for (size_t i = first_data_argument; i < arguments.size(); ++i) - { - const auto & col = arguments[i]; - executeForArgument(key_cols, col.type.get(), col.column.get(), vec_to, is_first_argument); + if (arguments.size() <= first_data_argument) + { + /// Return a fixed random-looking magic number when input is empty + vec_to.assign(input_rows_count, static_cast(0xe28dbde7fe22e41c)); + } + + KeyColumnsType key_cols{}; + if constexpr (Keyed) + if (!arguments.empty()) + key_cols = Impl::parseKeyColumns(arguments[0]); + + /// The function supports arbitrary number of arguments of arbitrary types. + bool is_first_argument = true; + for (size_t i = first_data_argument; i < arguments.size(); ++i) + { + const auto & col = arguments[i]; + executeForArgument(key_cols, col.type.get(), col.column.get(), vec_to, is_first_argument); + } } if constexpr (std::is_same_v) /// backward-compatible diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference index de783d7dddf..a9f724365a8 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.reference +++ b/tests/queries/0_stateless/02534_keyed_siphash.reference @@ -232,3 +232,5 @@ Check multiple keys as separate ints from a table with constant data 9357996107237883963 86AE90BB6A238D3F6221457630142C9B F6D93D8FEA6D7DECCDD95A7A0A2AA36D +Check asan bug +0 diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql index 14b422ac713..4f3ae7d62bd 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.sql +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -331,3 +331,6 @@ INSERT INTO sipHashKeyed_keys VALUES (4, 4); SELECT sipHash64Keyed((key0, key1), 4::UInt64) FROM sipHashKeyed_keys ORDER by key0; SELECT hex(sipHash128Keyed((key0, key1), 4::UInt64)) FROM sipHashKeyed_keys ORDER by key0; DROP TABLE sipHashKeyed_keys; + +SELECT 'Check asan bug'; +SELECT sipHash128((toUInt64(9223372036854775806), 1)) = sipHash128(1) GROUP BY sipHash128(1::UInt8), toUInt64(9223372036854775806); From f22452c78c36c2d2529444a137d4853749c04945 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 19 Jul 2023 08:54:31 +0000 Subject: [PATCH 1747/1997] Beautify pretty-printing of the query string in SYSTEM.QUERY_CACHE. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Output of SYSTEM.QUERY_CACHE - before this PR: SELECT * FROM system.query_cache Query id: 4989008b-b84c-4e57-bfe5-7fb551814812 Row 1: ────── query: SELECT 1 SETTINGS [...] - after this PR: SELECT * FROM system.query_cache Query id: 4989008b-b84c-4e57-bfe5-7fb551814812 Row 1: ────── query: SELECT 1 SETTINGS use_query_cache = 1 [...] --- src/Interpreters/Cache/QueryCache.cpp | 35 ++++++++++--------- src/Interpreters/Cache/QueryCache.h | 8 +++-- .../System/StorageSystemQueryCache.cpp | 2 +- .../02494_query_cache_secrets.reference | 2 +- 4 files changed, 26 insertions(+), 21 deletions(-) diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index f46a10ca51d..1d1543844a2 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -115,6 +115,15 @@ ASTPtr removeQueryCacheSettings(ASTPtr ast) return transformed_ast; } +String queryStringFromAst(ASTPtr ast) +{ + WriteBufferFromOwnString buf; + IAST::FormatSettings format_settings(buf, /*one_line*/ true); + format_settings.show_secrets = false; + ast->format(format_settings); + return buf.str(); +} + } QueryCache::Key::Key( @@ -129,6 +138,7 @@ QueryCache::Key::Key( , is_shared(is_shared_) , expires_at(expires_at_) , is_compressed(is_compressed_) + , query_string(queryStringFromAst(ast_)) { } @@ -142,15 +152,6 @@ bool QueryCache::Key::operator==(const Key & other) const return ast->getTreeHash() == other.ast->getTreeHash(); } -String QueryCache::Key::queryStringFromAst() const -{ - WriteBufferFromOwnString buf; - IAST::FormatSettings format_settings(buf, /*one_line*/ true); - format_settings.show_secrets = false; - ast->format(format_settings); - return buf.str(); -} - size_t QueryCache::KeyHasher::operator()(const Key & key) const { SipHash hash; @@ -191,7 +192,7 @@ QueryCache::Writer::Writer( if (auto entry = cache.getWithKey(key); entry.has_value() && !IsStale()(entry->key)) { skip_insert = true; /// Key already contained in cache and did not expire yet --> don't replace it - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.query_string); } } @@ -263,14 +264,14 @@ void QueryCache::Writer::finalizeWrite() if (std::chrono::duration_cast(std::chrono::system_clock::now() - query_start_time) < min_query_runtime) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query not expensive enough), query: {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query not expensive enough), query: {}", key.query_string); return; } if (auto entry = cache.getWithKey(key); entry.has_value() && !IsStale()(entry->key)) { /// Same check as in ctor because a parallel Writer could have inserted the current key in the meantime - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.query_string); return; } @@ -353,7 +354,7 @@ void QueryCache::Writer::finalizeWrite() if ((new_entry_size_in_bytes > max_entry_size_in_bytes) || (new_entry_size_in_rows > max_entry_size_in_rows)) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query result too big), new_entry_size_in_bytes: {} ({}), new_entry_size_in_rows: {} ({}), query: {}", new_entry_size_in_bytes, max_entry_size_in_bytes, new_entry_size_in_rows, max_entry_size_in_rows, key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query result too big), new_entry_size_in_bytes: {} ({}), new_entry_size_in_rows: {} ({}), query: {}", new_entry_size_in_bytes, max_entry_size_in_bytes, new_entry_size_in_rows, max_entry_size_in_rows, key.query_string); return; } @@ -388,7 +389,7 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar if (!entry.has_value()) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "No entry found for query {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "No entry found for query {}", key.query_string); return; } @@ -397,13 +398,13 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar if (!entry_key.is_shared && entry_key.user_name != key.user_name) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Inaccessible entry found for query {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Inaccessible entry found for query {}", key.query_string); return; } if (IsStale()(entry_key)) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Stale entry found for query {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Stale entry found for query {}", key.query_string); return; } @@ -441,7 +442,7 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar buildSourceFromChunks(entry_key.header, std::move(decompressed_chunks), entry_mapped->totals, entry_mapped->extremes); } - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Entry found for query {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Entry found for query {}", key.query_string); } bool QueryCache::Reader::hasCacheEntryForKey() const diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index 6ef7cc60918..a67adcc86c9 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -30,7 +30,7 @@ public: /// ---------------------------------------------------- /// The actual key (data which gets hashed): - /// Unlike the query string, the AST is agnostic to lower/upper case (SELECT vs. select) + /// Unlike the query string, the AST is agnostic to lower/upper case (SELECT vs. select). const ASTPtr ast; /// Note: For a transactionally consistent cache, we would need to include the system settings in the cache key or invalidate the @@ -58,6 +58,11 @@ public: /// (we could theoretically apply compression also to the totals and extremes but it's an obscure use case) const bool is_compressed; + /// The SELECT query as plain string, displayed in SYSTEM.QUERY_CACHE. Stored explicitly, i.e. not constructed from the AST, for the + /// sole reason that QueryCache-related SETTINGS are pruned from the AST (see removeQueryCacheSettings()) which will look ugly in + /// the SYSTEM.QUERY_CACHE. + const String query_string; + /// Ctor to construct a Key for writing into query cache. Key(ASTPtr ast_, Block header_, @@ -69,7 +74,6 @@ public: Key(ASTPtr ast_, const String & user_name_); bool operator==(const Key & other) const; - String queryStringFromAst() const; }; struct Entry diff --git a/src/Storages/System/StorageSystemQueryCache.cpp b/src/Storages/System/StorageSystemQueryCache.cpp index 117fb4e8a5c..288e4fd52a0 100644 --- a/src/Storages/System/StorageSystemQueryCache.cpp +++ b/src/Storages/System/StorageSystemQueryCache.cpp @@ -44,7 +44,7 @@ void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr if (!key.is_shared && key.user_name != user_name) continue; - res_columns[0]->insert(key.queryStringFromAst()); /// approximates the original query string + res_columns[0]->insert(key.query_string); /// approximates the original query string res_columns[1]->insert(QueryCache::QueryCacheEntryWeight()(*query_result)); res_columns[2]->insert(key.expires_at < std::chrono::system_clock::now()); res_columns[3]->insert(key.is_shared); diff --git a/tests/queries/0_stateless/02494_query_cache_secrets.reference b/tests/queries/0_stateless/02494_query_cache_secrets.reference index dd6341262bc..306374eed4b 100644 --- a/tests/queries/0_stateless/02494_query_cache_secrets.reference +++ b/tests/queries/0_stateless/02494_query_cache_secrets.reference @@ -1,2 +1,2 @@ A2193552DCF8A9F99AC35F86BC4D2FFD -SELECT hex(encrypt(\'aes-128-ecb\', \'[HIDDEN]\')) SETTINGS +SELECT hex(encrypt(\'aes-128-ecb\', \'[HIDDEN]\')) SETTINGS use_query_cache = 1 From d3663c356f4a8cab8b77f36fcc9b8251e6ddd02e Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Wed, 19 Jul 2023 12:11:57 +0200 Subject: [PATCH 1748/1997] Wait fo KILL MUTATION to finish --- .../00834_kill_mutation_replicated_zookeeper.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh b/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh index 2e917f67fe8..16ad08deeb2 100755 --- a/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh +++ b/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh @@ -57,6 +57,14 @@ $CLICKHOUSE_CLIENT --query="SELECT count() FROM system.mutations WHERE database ${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = '$CLICKHOUSE_DATABASE' AND table = 'kill_mutation_r1' AND mutation_id = '0000000001'" +# Wait for the 1st mutation to be actually killed and the 2nd to finish +query_result=$($CLICKHOUSE_CLIENT --query="$check_query1" 2>&1) +while [ "$query_result" != "0" ] +do + query_result=$($CLICKHOUSE_CLIENT --query="$check_query1" 2>&1) + sleep 0.5 +done + ${CLICKHOUSE_CLIENT} --query="SYSTEM SYNC REPLICA kill_mutation_r1" ${CLICKHOUSE_CLIENT} --query="SYSTEM SYNC REPLICA kill_mutation_r2" From 95424177d5de5bd7973823ffdaaacafce442e8ba Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Wed, 19 Jul 2023 18:26:54 +0800 Subject: [PATCH 1749/1997] review fix --- docs/en/interfaces/formats.md | 3 +- .../operations/settings/settings-formats.md | 12 +-- src/Core/Settings.h | 3 +- src/Formats/FormatFactory.cpp | 3 +- src/Formats/FormatSettings.h | 3 +- .../Formats/Impl/CSVRowInputFormat.cpp | 78 ++++++++++--------- .../Formats/Impl/CSVRowInputFormat.h | 2 + ...11_csv_input_field_type_mismatch.reference | 8 +- .../02811_csv_input_field_type_mismatch.sh | 4 +- .../data_csv/csv_with_bad_field_values.csv | 5 ++ .../data_csv/csv_with_diff_field_types.csv | 3 - 11 files changed, 62 insertions(+), 62 deletions(-) create mode 100644 tests/queries/0_stateless/data_csv/csv_with_bad_field_values.csv delete mode 100644 tests/queries/0_stateless/data_csv/csv_with_diff_field_types.csv diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index f45c55a9734..c20f304c346 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -472,8 +472,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`. - [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`. -- [input_format_csv_allow_check_field_deserialization](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_check_field_deserialization) - Allow to check whether the csv input field can be successful deserialized. Default value - `false`. -- [input_format_csv_set_default_if_deserialization_failed](/docs/en/operations/settings/settings-formats.md/#input_format_csv_set_default_if_deserialization_failed) - Set default value to column if the csv input field deserialization failed. Default value - `false`. +- [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialize failed on bad value. Default value - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 409ac4bd58a..5fac8df02d7 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -969,15 +969,9 @@ Result a b ``` -### input_format_csv_allow_check_field_deserialization {#input_format_csv_allow_check_field_deserialization} +### input_format_csv_use_default_on_bad_values {#input_format_csv_use_default_on_bad_values} -Allow to use whitespace or tab as field delimiter in CSV strings. - -Default value: `false`. - -### input_format_csv_set_default_if_deserialization_failed {#input_format_csv_set_default_if_deserialization_failed} - -Allow to set default value to column if the csv input field's deserialization failed +Allow to set default value to column when CSV field deserialize failed on bad value Default value: `false`. @@ -988,7 +982,7 @@ Query ```bash echo 'a,b,c' > 1.txt ./clickhouse local -q "create table test_tbl (x String, y UInt32, z Date) engine=MergeTree order by x" -cat 1.txt | ./clickhouse local -q "INSERT INTO test_tbl SETTINGS input_format_csv_allow_check_field_deserialization=true, input_format_csv_set_default_if_deserialization_failed=true FORMAT CSV" +cat 1.txt | ./clickhouse local -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV" ./clickhouse local -q "select * from test_tbl" ``` diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9d93ba9ad2c..311813fb38f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -872,8 +872,7 @@ class IColumn; M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \ M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \ M(Bool, input_format_csv_trim_whitespaces, true, "Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings", 0) \ - M(Bool, input_format_csv_allow_check_field_deserialization, false, "Allow to check the csv input field deserialization whether success or not.", 0) \ - M(Bool, input_format_csv_set_default_if_deserialization_failed, false, "All to set column default value if the input field's deserialization failed.", 0) \ + M(Bool, input_format_csv_use_default_on_bad_values, false, "Allow to set default value to column when CSV field deserialize failed on bad value", 0) \ M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \ M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \ M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index b3b9609f9fe..3df2ca7d2e0 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -73,8 +73,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces; format_settings.csv.allow_whitespace_or_tab_as_delimiter = settings.input_format_csv_allow_whitespace_or_tab_as_delimiter; format_settings.csv.allow_variable_number_of_columns = settings.input_format_csv_allow_variable_number_of_columns; - format_settings.csv.allow_check_field_deserialization = settings.input_format_csv_allow_check_field_deserialization; - format_settings.csv.set_default_if_deserialization_failed = settings.input_format_csv_set_default_if_deserialization_failed; + format_settings.csv.use_default_on_bad_values = settings.input_format_csv_use_default_on_bad_values; format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter; format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter; format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 4d4eb926992..4e49d338e43 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -141,8 +141,7 @@ struct FormatSettings bool trim_whitespaces = true; bool allow_whitespace_or_tab_as_delimiter = false; bool allow_variable_number_of_columns = false; - bool allow_check_field_deserialization=false; - bool set_default_if_deserialization_failed=false; + bool use_default_on_bad_values = false; } csv; struct HiveText diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index e1be6b21610..34d5b589591 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -316,49 +317,52 @@ bool CSVFormatReader::readField( return false; } - BufferBase::Position pos_start = buf->position(); + if (format_settings.csv.use_default_on_bad_values) + return readFieldOrDefault(column, type, serialization); + return readFieldImpl(*buf, column, type, serialization); +} + +bool CSVFormatReader::readFieldImpl(ReadBuffer & istr, DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization) +{ + if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) + { + /// If value is null but type is not nullable then use default value instead. + return SerializationNullable::deserializeTextCSVImpl(column, istr, format_settings, serialization); + } + + /// Read the column normally. + serialization->deserializeTextCSV(column, istr, format_settings); + return true; +} + +bool CSVFormatReader::readFieldOrDefault(DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization) +{ + String field; + readCSVField(field, *buf, format_settings.csv); + ReadBufferFromString tmp_buf(field); + bool is_bad_value = false; + bool res = false; + size_t col_size = column.size(); try { - if (format_settings.csv.allow_check_field_deserialization) - { - std::string field; - readCSVField(field, *buf, format_settings.csv); - ReadBufferFromMemory tmp(field); - if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) - SerializationNullable::deserializeTextCSVImpl(column, tmp, format_settings, serialization); - else - serialization->deserializeTextCSV(column, tmp, format_settings); - if (column.size() == col_size + 1 && field.size() > 0 && !tmp.eof()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Text CSV deserialize field bytes logical error."); - } - else - { - if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) - { - /// If value is null but type is not nullable then use default value instead. - return SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization); - } - /// Read the column normally. - serialization->deserializeTextCSV(column, *buf, format_settings); - } + res = readFieldImpl(tmp_buf, column, type, serialization); + /// Check if we parsed the whole field successfully. + if (!field.empty() && !tmp_buf.eof()) + is_bad_value = true; } - catch (Exception & e) + catch (const Exception &) { - LOG_DEBUG(&Poco::Logger::get("CSVRowInputFormat"), "Failed to deserialize CSV column, exception message:{}", e.what()); - if (format_settings.csv.set_default_if_deserialization_failed) - { - // Reset the column and buffer position, then skip the field and set column default value. - if (column.size() == col_size + 1) - column.popBack(1); - buf->position() = pos_start; - skipField(); - column.insertDefault(); - } - else - throw; + is_bad_value = true; } - return true; + + if (!is_bad_value) + return res; + + if (column.size() == col_size + 1) + column.popBack(1); + column.insertDefault(); + return false; } void CSVFormatReader::skipPrefixBeforeHeader() diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index 8ccf04feed3..7b1a1fc433d 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -89,6 +89,8 @@ public: void setReadBuffer(ReadBuffer & in_) override; FormatSettings::EscapingRule getEscapingRule() const override { return FormatSettings::EscapingRule::CSV; } + bool readFieldImpl(ReadBuffer & istr, DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization); + bool readFieldOrDefault(DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization); protected: PeekableReadBuffer * buf; diff --git a/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.reference b/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.reference index c5ee611a230..19c7956ba84 100644 --- a/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.reference +++ b/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.reference @@ -1,3 +1,5 @@ -a 1 2023-03-14 -a 0 1970-01-01 -c 1 1970-01-01 +0 111 1970-01-01 2023-03-24 00:00:00 false +1 abc 2023-03-14 2023-03-14 11:22:33 true +2 c 1970-01-01 1970-01-01 08:00:00 false +4 888 2023-03-14 1970-06-03 14:43:53 false +5 bks 1970-01-01 2023-07-19 18:17:59 false diff --git a/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh b/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh index df736ea6792..3961664b9b3 100644 --- a/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh +++ b/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_CLIENT -q "drop table if exists test_tbl" -$CLICKHOUSE_CLIENT -q "create table test_tbl (x String, y UInt32, z Date) engine=MergeTree order by x" -cat $CURDIR/data_csv/csv_with_diff_field_types.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_tbl SETTINGS input_format_csv_allow_check_deserialize=true, input_format_csv_set_default_if_deserialize_failed=true FORMAT CSV" +$CLICKHOUSE_CLIENT -q "create table test_tbl (a Int32, b String, c Date, d DateTime, e Boolean) engine=MergeTree order by a" +cat $CURDIR/data_csv/csv_with_bad_field_values.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV" $CLICKHOUSE_CLIENT -q "select * from test_tbl" $CLICKHOUSE_CLIENT -q "drop table test_tbl" \ No newline at end of file diff --git a/tests/queries/0_stateless/data_csv/csv_with_bad_field_values.csv b/tests/queries/0_stateless/data_csv/csv_with_bad_field_values.csv new file mode 100644 index 00000000000..faedd9b6705 --- /dev/null +++ b/tests/queries/0_stateless/data_csv/csv_with_bad_field_values.csv @@ -0,0 +1,5 @@ +1,abc,2023-03-14,2023-03-14 11:22:33,true +2,c,ab,2023,false +bc,111,ab,2023-03-24,ban +4,888,2023-03-14,13243433,false +5,bks,2023-03,1689761879,abdd \ No newline at end of file diff --git a/tests/queries/0_stateless/data_csv/csv_with_diff_field_types.csv b/tests/queries/0_stateless/data_csv/csv_with_diff_field_types.csv deleted file mode 100644 index 464172c515c..00000000000 --- a/tests/queries/0_stateless/data_csv/csv_with_diff_field_types.csv +++ /dev/null @@ -1,3 +0,0 @@ -a,1,2023-03-14 -a,b,c -c,1,a \ No newline at end of file From 380b4ffe2be4107ae3965cba19c5b697e7108128 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 19 Jul 2023 12:29:39 +0200 Subject: [PATCH 1750/1997] Reduce dependencies for skim by avoid using default features By default skim requires cli -> clap -> termcolor -> winapi-util Signed-off-by: Azat Khuzhin --- rust/skim/Cargo.lock | 204 ++++++++++++------------------------------- rust/skim/Cargo.toml | 2 +- 2 files changed, 58 insertions(+), 148 deletions(-) diff --git a/rust/skim/Cargo.lock b/rust/skim/Cargo.lock index 9f948ee1c38..f55ea8a84b0 100644 --- a/rust/skim/Cargo.lock +++ b/rust/skim/Cargo.lock @@ -42,17 +42,6 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", -] - [[package]] name = "autocfg" version = "1.1.0" @@ -104,31 +93,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "clap" -version = "3.2.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" -dependencies = [ - "atty", - "bitflags", - "clap_lex", - "indexmap", - "once_cell", - "strsim", - "termcolor", - "textwrap", -] - -[[package]] -name = "clap_lex" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" -dependencies = [ - "os_str_bytes", -] - [[package]] name = "codespan-reporting" version = "0.11.1" @@ -214,9 +178,9 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.97" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e88abab2f5abbe4c56e8f1fb431b784d710b709888f35755a160e62e33fe38e8" +checksum = "5032837c1384de3708043de9d4e97bb91290faca6c16529a28aa340592a78166" dependencies = [ "cc", "cxxbridge-flags", @@ -226,9 +190,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.97" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c0c11acd0e63bae27dcd2afced407063312771212b7a823b4fd72d633be30fb" +checksum = "51368b3d0dbf356e10fcbfd455a038503a105ee556f7ee79b6bb8c53a7247456" dependencies = [ "cc", "codespan-reporting", @@ -236,24 +200,24 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn 2.0.23", + "syn 2.0.26", ] [[package]] name = "cxxbridge-flags" -version = "1.0.97" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d3816ed957c008ccd4728485511e3d9aaf7db419aa321e3d2c5a2f3411e36c8" +checksum = "0d9062157072e4aafc8e56ceaf8325ce850c5ae37578c852a0d4de2cecdded13" [[package]] name = "cxxbridge-macro" -version = "1.0.97" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26acccf6f445af85ea056362561a24ef56cdc15fcc685f03aec50b9c702cb6d" +checksum = "cf01e8a540f5a4e0f284595834f81cf88572f244b768f051724537afa99a2545" dependencies = [ "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.26", ] [[package]] @@ -359,19 +323,6 @@ version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" -[[package]] -name = "env_logger" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" -dependencies = [ - "atty", - "humantime", - "log", - "regex", - "termcolor", -] - [[package]] name = "fnv" version = "1.0.7" @@ -398,32 +349,11 @@ dependencies = [ "wasi 0.11.0+wasi-snapshot-preview1", ] -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - [[package]] name = "hermit-abi" -version = "0.1.19" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "hermit-abi" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" - -[[package]] -name = "humantime" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" [[package]] name = "iana-time-zone" @@ -454,16 +384,6 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown", -] - [[package]] name = "js-sys" version = "0.3.64" @@ -487,9 +407,9 @@ checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" [[package]] name = "link-cplusplus" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" +checksum = "9d240c6f7e1ba3a28b0249f774e6a9dd0175054b52dfbb61b16eb8505c3785c9" dependencies = [ "cc", ] @@ -564,7 +484,7 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi 0.3.1", + "hermit-abi", "libc", ] @@ -574,12 +494,6 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" -[[package]] -name = "os_str_bytes" -version = "6.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d5d9eb14b174ee9aa2ef96dc2b94637a2d4b6e7cb873c7e171f0c20c6cf3eac" - [[package]] name = "pin-utils" version = "0.1.0" @@ -588,18 +502,18 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "proc-macro2" -version = "1.0.63" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.29" +version = "1.0.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105" +checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0" dependencies = [ "proc-macro2", ] @@ -648,9 +562,21 @@ dependencies = [ [[package]] name = "regex" -version = "1.8.4" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f" +checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310" dependencies = [ "aho-corasick", "memchr", @@ -659,39 +585,33 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.7.2" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" +checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" [[package]] name = "rustversion" -version = "1.0.12" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" [[package]] name = "scopeguard" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "scratch" -version = "1.0.5" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1" +checksum = "a3cf7c11c38cb994f3d40e8a8cde3bbd1f72a435e4c49e85d6553d8312306152" [[package]] name = "serde" -version = "1.0.164" +version = "1.0.171" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d" - -[[package]] -name = "shlex" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" +checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9" [[package]] name = "skim" @@ -699,23 +619,19 @@ version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5d28de0a6cb2cdd83a076f1de9d965b973ae08b244df1aa70b432946dda0f32" dependencies = [ - "atty", "beef", "bitflags", "chrono", - "clap", "crossbeam", "defer-drop", "derive_builder", - "env_logger", "fuzzy-matcher", "lazy_static", "log", "nix 0.25.1", "rayon", "regex", - "shlex", - "time 0.3.22", + "time 0.3.23", "timer", "tuikit", "unicode-width", @@ -741,9 +657,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.23" +version = "2.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737" +checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970" dependencies = [ "proc-macro2", "quote", @@ -770,30 +686,24 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "textwrap" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" - [[package]] name = "thiserror" -version = "1.0.40" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" +checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.40" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" +checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.26", ] [[package]] @@ -819,9 +729,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.22" +version = "0.3.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea9e1b3cf1243ae005d9e74085d4d542f3125458f3a81af210d901dcd7411efd" +checksum = "59e399c068f43a5d116fedaf73b203fa4f9c519f17e2b34f63221d3792f81446" dependencies = [ "serde", "time-core", @@ -858,9 +768,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.9" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" [[package]] name = "unicode-width" @@ -928,7 +838,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.26", "wasm-bindgen-shared", ] @@ -950,7 +860,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.26", "wasm-bindgen-backend", "wasm-bindgen-shared", ] diff --git a/rust/skim/Cargo.toml b/rust/skim/Cargo.toml index e5801a26f77..0381ad81619 100644 --- a/rust/skim/Cargo.toml +++ b/rust/skim/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -skim = "0.10.2" +skim = { version = "0.10.2", default-features = false } cxx = "1.0.83" term = "0.7.0" From af6361e2a0c78f45500a37bc67f563bd74412076 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Wed, 19 Jul 2023 12:35:52 +0200 Subject: [PATCH 1751/1997] Fix 02725_memory-for-merges --- tests/queries/0_stateless/02725_memory-for-merges.sql | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02725_memory-for-merges.sql b/tests/queries/0_stateless/02725_memory-for-merges.sql index 347c8b2a8d3..1a8402dff4b 100644 --- a/tests/queries/0_stateless/02725_memory-for-merges.sql +++ b/tests/queries/0_stateless/02725_memory-for-merges.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-s3-storage, no-random-merge-tree-settings -- We allocate a lot of memory for buffers when reading or writing to S3 DROP TABLE IF EXISTS 02725_memory_for_merges SYNC; @@ -21,7 +21,6 @@ OPTIMIZE TABLE 02725_memory_for_merges FINAL; SYSTEM FLUSH LOGS; -WITH (SELECT uuid FROM system.tables WHERE table='02725_memory_for_merges' and database=currentDatabase()) as uuid -SELECT (sum(peak_memory_usage) < 1024 * 1024 * 200 AS x) ? x : sum(peak_memory_usage) from system.part_log where table_uuid=uuid and event_type='MergeParts'; +SELECT (sum(peak_memory_usage) < 1024 * 1024 * 200 AS x) ? x : sum(peak_memory_usage) from system.part_log where database=currentDatabase() and table='02725_memory_for_merges' and event_type='MergeParts'; DROP TABLE IF EXISTS 02725_memory_for_merges SYNC; From 08409059cc198873ffbf11060bfdabaa0c74f07f Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 19 Jul 2023 18:46:20 +0800 Subject: [PATCH 1752/1997] support alias for new analyzer --- src/Analyzer/Passes/UniqToCountPass.cpp | 108 +++++++++++++----- .../test_rewrite_uniq_to_count/test.py | 16 +-- 2 files changed, 90 insertions(+), 34 deletions(-) diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp index ae7952051e7..7533a99107b 100644 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ b/src/Analyzer/Passes/UniqToCountPass.cpp @@ -21,36 +21,82 @@ bool matchFnUniq(String func_name) || name == "uniqCombined64"; } -bool nodeEquals(const QueryTreeNodePtr & lhs, const QueryTreeNodePtr & rhs) +/// Extract the corresponding projection columns for group by node list. +/// For example: +/// SELECT a as aa, any(b) FROM table group by a; -> aa(ColumnNode) +NamesAndTypes extractProjectionColumnsForGroupBy(const QueryNode * query_node) { - auto * lhs_node = lhs->as(); - auto * rhs_node = rhs->as(); + if (!query_node->hasGroupBy()) + return {}; - if (lhs_node && rhs_node && lhs_node->getColumn() == rhs_node->getColumn()) - return true; - return false; + NamesAndTypes result; + for (const auto & group_by_ele : query_node->getGroupByNode()->getChildren()) + { + const auto & projection_columns = query_node->getProjectionColumns(); + const auto & projection_nodes = query_node->getProjection().getNodes(); + + assert(projection_columns.size() == projection_nodes.size()); + + for (size_t i = 0; i < projection_columns.size(); i++) + { + if (projection_nodes[i]->isEqual(*group_by_ele)) + result.push_back(projection_columns[i]); + } + } + return result; } -bool nodeListEquals(const QueryTreeNodes & lhs, const QueryTreeNodes & rhs) +/// Whether query_columns equals subquery_columns. +/// query_columns: query columns from query +/// subquery_columns: projection columns from subquery +bool nodeListEquals(const QueryTreeNodes & query_columns, const NamesAndTypes & subquery_columns) { - if (lhs.size() != rhs.size()) + if (query_columns.size() != subquery_columns.size()) return false; - for (size_t i = 0; i < lhs.size(); i++) + + for (const auto & query_column : query_columns) { - if (!nodeEquals(lhs[i], rhs[i])) + auto find = std::find_if( + subquery_columns.begin(), + subquery_columns.end(), + [&](const auto & subquery_column) -> bool + { + if (auto * column_node = query_column->as()) + { + return subquery_column == column_node->getColumn(); + } + return false; + }); + + if (find == subquery_columns.end()) return false; } return true; } -bool nodeListContainsAll(const QueryTreeNodes & lhs, const QueryTreeNodes & rhs) +/// Whether subquery_columns contains all columns in subquery_columns. +/// query_columns: query columns from query +/// subquery_columns: projection columns from subquery +bool nodeListContainsAll(const QueryTreeNodes & query_columns, const NamesAndTypes & subquery_columns) { - if (lhs.size() < rhs.size()) + if (query_columns.size() > subquery_columns.size()) return false; - for (const auto & re : rhs) + + for (const auto & query_column : query_columns) { - auto predicate = [&](const QueryTreeNodePtr & le) { return nodeEquals(le, re); }; - if (std::find_if(lhs.begin(), lhs.end(), predicate) == lhs.end()) + auto find = std::find_if( + subquery_columns.begin(), + subquery_columns.end(), + [&](const auto & subquery_column) -> bool + { + if (auto * column_node = query_column->as()) + { + return subquery_column == column_node->getColumn(); + } + return false; + }); + + if (find == subquery_columns.end()) return false; } return true; @@ -58,17 +104,14 @@ bool nodeListContainsAll(const QueryTreeNodes & lhs, const QueryTreeNodes & rhs) } -class UniqToCountVisitor : public InDepthQueryTreeVisitorWithContext +class UniqToCountVisitor : public InDepthQueryTreeVisitor { public: - using Base = InDepthQueryTreeVisitorWithContext; + using Base = InDepthQueryTreeVisitor; using Base::Base; void visitImpl(QueryTreeNodePtr & node) { - if (!getSettings().optimize_uniq_to_count) - return; - auto * query_node = node->as(); if (!query_node) return; @@ -100,9 +143,11 @@ public: { if (!subquery_node->isDistinct()) return false; - /// uniq expression list == subquery group by expression list - if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getProjection().getNodes())) + + /// uniq expression list == subquery projection columns + if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getProjectionColumns())) return false; + return true; }; @@ -111,12 +156,17 @@ public: { if (!subquery_node->hasGroupBy()) return false; + /// uniq argument node list == subquery group by node list - if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getGroupByNode()->getChildren())) + auto group_by_columns = extractProjectionColumnsForGroupBy(subquery_node); + + if (!nodeListEquals(uniq_arguments_nodes, group_by_columns)) return false; - /// subquery select node list must contain all columns in uniq argument node list - if (!nodeListContainsAll(subquery_node->getProjection().getNodes(), uniq_arguments_nodes)) + + /// subquery projection columns must contain all columns in uniq argument node list + if (!nodeListContainsAll(uniq_arguments_nodes, subquery_node->getProjectionColumns())) return false; + return true; }; @@ -125,8 +175,11 @@ public: { AggregateFunctionProperties properties; auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties); + function_node->resolveAsAggregateFunction(std::move(aggregate_function)); function_node->getArguments().getNodes().clear(); + + /// Update projection columns query_node->resolveProjectionColumns({{"count()", function_node->getResultType()}}); } } @@ -135,7 +188,10 @@ public: void UniqToCountPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) { - UniqToCountVisitor visitor(std::move(context)); + if (!context->getSettings().optimize_uniq_to_count) + return; + + UniqToCountVisitor visitor; visitor.visit(query_tree_node); } diff --git a/tests/integration/test_rewrite_uniq_to_count/test.py b/tests/integration/test_rewrite_uniq_to_count/test.py index d7fa9f39441..e38e57f5cee 100644 --- a/tests/integration/test_rewrite_uniq_to_count/test.py +++ b/tests/integration/test_rewrite_uniq_to_count/test.py @@ -83,13 +83,13 @@ def test_rewrite_distinct(started_cluster): ) # test select expression alias - check_by_old_analyzer( - "SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a as alias_of_a FROM test_rewrite_uniq_to_count) t", + check( + "SELECT uniq(alias_of_a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a as alias_of_a FROM test_rewrite_uniq_to_count) t", 3, ) # test select expression alias - check_by_old_analyzer( + check( "SELECT uniq(alias_of_a) FROM (SELECT DISTINCT a as alias_of_a FROM test_rewrite_uniq_to_count) t", 3, ) @@ -109,19 +109,19 @@ def test_rewrite_group_by(started_cluster): ) # test select expression alias - check_by_old_analyzer( + check( "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", 3, ) # test select expression alias - check_by_old_analyzer( - "SELECT uniq(t.a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", + check( + "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", 3, ) # test select expression alias - check_by_old_analyzer( - "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", + check( + "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", 3, ) From 2ebbbf0000ce7f5767d754b0aee777a4255ab7b3 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 19 Jul 2023 19:13:45 +0800 Subject: [PATCH 1753/1997] Also need to fix aggregate projections --- .../QueryPlan/Optimizations/optimizeTree.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index 01d192bb1f3..b13dda9a8f0 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -114,6 +114,10 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s while (!stack.empty()) { + /// NOTE: optimizePrewhere can modify the stack. + optimizePrewhere(stack, nodes); + optimizePrimaryKeyCondition(stack); + { /// NOTE: frame cannot be safely used after stack was modified. auto & frame = stack.back(); @@ -125,6 +129,7 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s if (optimization_settings.read_in_order) optimizeReadInOrder(*frame.node, nodes); + /// Projection optimization relies on PK optimization if (optimization_settings.optimize_projection) num_applied_projection += optimizeUseAggregateProjections(*frame.node, nodes, optimization_settings.optimize_use_implicit_projections); @@ -146,13 +151,9 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s } } - /// NOTE: optimizePrewhere can modify the stack. - optimizePrewhere(stack, nodes); - optimizePrimaryKeyCondition(stack); - if (optimization_settings.optimize_projection) { - /// Normal projection optimization relies on PK optimization + /// Projection optimization relies on PK optimization if (optimizeUseNormalProjections(stack, nodes)) { ++num_applied_projection; From 94796f28adcd5b304b9fbc8a715462f4cfb1c1fd Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Wed, 19 Jul 2023 19:24:16 +0800 Subject: [PATCH 1754/1997] ci fix --- docs/en/interfaces/formats.md | 2 +- docs/en/operations/settings/settings-formats.md | 2 +- src/Core/Settings.h | 2 +- .../queries/0_stateless/02811_csv_input_field_type_mismatch.sh | 0 4 files changed, 3 insertions(+), 3 deletions(-) mode change 100644 => 100755 tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index c20f304c346..ddf4ab3f78e 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -472,7 +472,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`. - [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`. -- [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialize failed on bad value. Default value - `false`. +- [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 5fac8df02d7..fb04ac23d3a 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -971,7 +971,7 @@ a b ### input_format_csv_use_default_on_bad_values {#input_format_csv_use_default_on_bad_values} -Allow to set default value to column when CSV field deserialize failed on bad value +Allow to set default value to column when CSV field deserialization failed on bad value Default value: `false`. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 311813fb38f..309dfe0d2ec 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -872,7 +872,7 @@ class IColumn; M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \ M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \ M(Bool, input_format_csv_trim_whitespaces, true, "Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings", 0) \ - M(Bool, input_format_csv_use_default_on_bad_values, false, "Allow to set default value to column when CSV field deserialize failed on bad value", 0) \ + M(Bool, input_format_csv_use_default_on_bad_values, false, "Allow to set default value to column when CSV field deserialization failed on bad value", 0) \ M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \ M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \ M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \ diff --git a/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh b/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh old mode 100644 new mode 100755 From 7837559dbfdc194f28681dda808bc06b6609dd8b Mon Sep 17 00:00:00 2001 From: Song Liyong Date: Wed, 12 Jul 2023 17:13:04 +0200 Subject: [PATCH 1755/1997] MaterializedMySQL: Support CREATE TABLE AS SELECT --- src/Core/MySQL/MySQLReplication.cpp | 11 +++++++ .../materialized_with_ddl.py | 29 +++++++++++++++++++ .../test_materialized_mysql_database/test.py | 6 ++++ 3 files changed, 46 insertions(+) diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp index 1ee027b7185..ab4a37d2466 100644 --- a/src/Core/MySQL/MySQLReplication.cpp +++ b/src/Core/MySQL/MySQLReplication.cpp @@ -121,6 +121,17 @@ namespace MySQLReplication { typ = QUERY_SAVEPOINT; } + + // https://dev.mysql.com/worklog/task/?id=13355 + // When doing query "CREATE TABLE xx AS SELECT", the binlog will be + // "CREATE TABLE ... START TRANSACTION", the DDL will be failed + // so, just ignore the "START TRANSACTION" suffix + if (query.ends_with("START TRANSACTION")) + { + auto pos = query.rfind("START TRANSACTION"); + if (pos > 0) + query.resize(pos); + } } void QueryEvent::dump(WriteBuffer & out) const diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py index 8cf9e67bf63..60326e422c9 100644 --- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py @@ -2336,3 +2336,32 @@ def named_collections(clickhouse_node, mysql_node, service_name): ) clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + + +def create_table_as_select(clickhouse_node, mysql_node, service_name): + db = "create_table_as_select" + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"CREATE DATABASE {db}") + clickhouse_node.query( + f"CREATE DATABASE {db} ENGINE = MaterializeMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')" + ) + mysql_node.query( + f"CREATE TABLE {db}.t1(a INT NOT NULL PRIMARY KEY) ENGINE = InnoDB" + ) + mysql_node.query(f"INSERT INTO {db}.t1 VALUES (1)") + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db} FORMAT TSV", + "t1\n", + ) + + mysql_node.query(f"CREATE TABLE {db}.t2(PRIMARY KEY(a)) AS SELECT * FROM {db}.t1") + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db} FORMAT TSV", + "t1\nt2\n", + ) + + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index 21316d1a474..f227c19e6b8 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -529,3 +529,9 @@ def test_named_collections(started_cluster, started_mysql_8_0, clickhouse_node): materialized_with_ddl.named_collections( clickhouse_node, started_mysql_8_0, "mysql80" ) + + +def test_create_table_as_select(started_cluster, started_mysql_8_0, clickhouse_node): + materialized_with_ddl.create_table_as_select( + clickhouse_node, started_mysql_8_0, "mysql80" + ) From dcf7ba25348f88bda0ef144ce068cc9005cb3ada Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Wed, 19 Jul 2023 19:36:19 +0800 Subject: [PATCH 1756/1997] remove unuseful code --- docs/en/operations/settings/settings-formats.md | 3 +-- src/Processors/Formats/Impl/CSVRowInputFormat.cpp | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index fb04ac23d3a..b3bc3afafd3 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -980,9 +980,8 @@ Default value: `false`. Query ```bash -echo 'a,b,c' > 1.txt ./clickhouse local -q "create table test_tbl (x String, y UInt32, z Date) engine=MergeTree order by x" -cat 1.txt | ./clickhouse local -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV" +echo 'a,b,c' | ./clickhouse local -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV" ./clickhouse local -q "select * from test_tbl" ``` diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 34d5b589591..244b906549e 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -12,7 +12,6 @@ #include #include #include -#include namespace DB From cc9da46efa2af4dfd4f8dfdfa84327f5f14a8630 Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Wed, 19 Jul 2023 20:11:03 +0800 Subject: [PATCH 1757/1997] ci fix --- .../02811_csv_input_field_type_mismatch.reference | 10 +++++----- .../0_stateless/02811_csv_input_field_type_mismatch.sh | 2 +- .../0_stateless/data_csv/csv_with_bad_field_values.csv | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.reference b/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.reference index 19c7956ba84..6abcc56bacc 100644 --- a/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.reference +++ b/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.reference @@ -1,5 +1,5 @@ -0 111 1970-01-01 2023-03-24 00:00:00 false -1 abc 2023-03-14 2023-03-14 11:22:33 true -2 c 1970-01-01 1970-01-01 08:00:00 false -4 888 2023-03-14 1970-06-03 14:43:53 false -5 bks 1970-01-01 2023-07-19 18:17:59 false +0 111 1970-01-01 false +1 abc 2023-03-14 true +2 c 1970-01-01 false +4 888 2023-03-14 false +5 bks 1970-01-01 false diff --git a/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh b/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh index 3961664b9b3..30223329eca 100755 --- a/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh +++ b/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_CLIENT -q "drop table if exists test_tbl" -$CLICKHOUSE_CLIENT -q "create table test_tbl (a Int32, b String, c Date, d DateTime, e Boolean) engine=MergeTree order by a" +$CLICKHOUSE_CLIENT -q "create table test_tbl (a Int32, b String, c Date, e Boolean) engine=MergeTree order by a" cat $CURDIR/data_csv/csv_with_bad_field_values.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV" $CLICKHOUSE_CLIENT -q "select * from test_tbl" $CLICKHOUSE_CLIENT -q "drop table test_tbl" \ No newline at end of file diff --git a/tests/queries/0_stateless/data_csv/csv_with_bad_field_values.csv b/tests/queries/0_stateless/data_csv/csv_with_bad_field_values.csv index faedd9b6705..e829cc0106a 100644 --- a/tests/queries/0_stateless/data_csv/csv_with_bad_field_values.csv +++ b/tests/queries/0_stateless/data_csv/csv_with_bad_field_values.csv @@ -1,5 +1,5 @@ -1,abc,2023-03-14,2023-03-14 11:22:33,true -2,c,ab,2023,false -bc,111,ab,2023-03-24,ban -4,888,2023-03-14,13243433,false -5,bks,2023-03,1689761879,abdd \ No newline at end of file +1,abc,2023-03-14,true +2,c,ab,false +bc,111,ab,ban +4,888,2023-03-14,false +5,bks,2023-03,abdd \ No newline at end of file From 0c86df519ffa8921b6c546b304705366838dfa21 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 19 Jul 2023 12:41:25 +0000 Subject: [PATCH 1758/1997] Fix unspported disks in Keeper --- src/Coordination/KeeperContext.cpp | 31 ++++++++++++++++++- src/Disks/DiskSelector.cpp | 5 ++- src/Disks/DiskSelector.h | 3 +- .../configs/enable_keeper.xml | 4 +++ tests/integration/test_keeper_disks/test.py | 12 ++++++- 5 files changed, 51 insertions(+), 4 deletions(-) diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index 408344ee67f..32f8b98a7ed 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -41,9 +41,38 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config) initializeDisks(config); } +namespace +{ + +bool diskValidator(const Poco::Util::AbstractConfiguration & config, const std::string & disk_config_prefix) +{ + const auto disk_type = config.getString(disk_config_prefix + ".type", "local"); + + using namespace std::literals; + static constexpr std::array supported_disk_types + { + "s3"sv, + "s3_plain"sv, + "local"sv + }; + + if (std::all_of( + supported_disk_types.begin(), + supported_disk_types.end(), + [&](const auto supported_type) { return disk_type != supported_type; })) + { + LOG_INFO(&Poco::Logger::get("KeeperContext"), "Disk type '{}' is not supported for Keeper", disk_type); + return false; + } + + return true; +} + +} + void KeeperContext::initializeDisks(const Poco::Util::AbstractConfiguration & config) { - disk_selector->initialize(config, "storage_configuration.disks", Context::getGlobalContextInstance()); + disk_selector->initialize(config, "storage_configuration.disks", Context::getGlobalContextInstance(), diskValidator); log_storage = getLogsPathFromConfig(config); diff --git a/src/Disks/DiskSelector.cpp b/src/Disks/DiskSelector.cpp index e51f79867b5..415e10a55fc 100644 --- a/src/Disks/DiskSelector.cpp +++ b/src/Disks/DiskSelector.cpp @@ -27,7 +27,7 @@ void DiskSelector::assertInitialized() const } -void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) +void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator) { Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_prefix, keys); @@ -46,6 +46,9 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, auto disk_config_prefix = config_prefix + "." + disk_name; + if (disk_validator && !disk_validator(config, disk_config_prefix)) + continue; + disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context, disks)); } if (!has_default_disk) diff --git a/src/Disks/DiskSelector.h b/src/Disks/DiskSelector.h index 58adeb953db..c91c3acb3bd 100644 --- a/src/Disks/DiskSelector.h +++ b/src/Disks/DiskSelector.h @@ -23,7 +23,8 @@ public: DiskSelector() = default; DiskSelector(const DiskSelector & from) = default; - void initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); + using DiskValidator = std::function; + void initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator = {}); DiskSelectorPtr updateFromConfig( const Poco::Util::AbstractConfiguration & config, diff --git a/tests/integration/test_keeper_disks/configs/enable_keeper.xml b/tests/integration/test_keeper_disks/configs/enable_keeper.xml index 5814979229c..50d0329637a 100644 --- a/tests/integration/test_keeper_disks/configs/enable_keeper.xml +++ b/tests/integration/test_keeper_disks/configs/enable_keeper.xml @@ -1,6 +1,10 @@ + + hdfs + hdfs://hdfs1:9000/ + local /var/lib/clickhouse/coordination/logs/ diff --git a/tests/integration/test_keeper_disks/test.py b/tests/integration/test_keeper_disks/test.py index 11bb215be54..86682bcde01 100644 --- a/tests/integration/test_keeper_disks/test.py +++ b/tests/integration/test_keeper_disks/test.py @@ -9,7 +9,11 @@ import os CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) cluster = ClickHouseCluster(__file__) node = cluster.add_instance( - "node", main_configs=["configs/enable_keeper.xml"], stay_alive=True, with_minio=True + "node", + main_configs=["configs/enable_keeper.xml"], + stay_alive=True, + with_minio=True, + with_hdfs=True, ) from kazoo.client import KazooClient, KazooState @@ -117,6 +121,12 @@ def get_local_snapshots(): return get_local_files("/var/lib/clickhouse/coordination/snapshots") +def test_supported_disk_types(started_cluster): + node.stop_clickhouse() + node.start_clickhouse() + node.contains_in_log("Disk type 'hdfs' is not supported for Keeper") + + def test_logs_with_disks(started_cluster): setup_local_storage(started_cluster) From 7b3564f96aa44bde8aa33914930ca3bbf1c5f52e Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 19 Jul 2023 14:44:59 +0200 Subject: [PATCH 1759/1997] Revert "Improve CSVInputFormat to check and set default value to column if deserialize failed" --- docs/en/interfaces/formats.md | 1 - .../operations/settings/settings-formats.md | 22 ---------- src/Core/Settings.h | 1 - src/Formats/FormatFactory.cpp | 1 - src/Formats/FormatSettings.h | 1 - .../Formats/Impl/CSVRowInputFormat.cpp | 42 +------------------ .../Formats/Impl/CSVRowInputFormat.h | 2 - ...11_csv_input_field_type_mismatch.reference | 5 --- .../02811_csv_input_field_type_mismatch.sh | 13 ------ .../data_csv/csv_with_bad_field_values.csv | 5 --- 10 files changed, 2 insertions(+), 91 deletions(-) delete mode 100644 tests/queries/0_stateless/02811_csv_input_field_type_mismatch.reference delete mode 100755 tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh delete mode 100644 tests/queries/0_stateless/data_csv/csv_with_bad_field_values.csv diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index ddf4ab3f78e..ed2f010a632 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -472,7 +472,6 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`. - [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`. -- [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index c8adc83d3ad..0915c51806a 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -989,28 +989,6 @@ Result a b ``` -### input_format_csv_use_default_on_bad_values {#input_format_csv_use_default_on_bad_values} - -Allow to set default value to column when CSV field deserialization failed on bad value - -Default value: `false`. - -**Examples** - -Query - -```bash -./clickhouse local -q "create table test_tbl (x String, y UInt32, z Date) engine=MergeTree order by x" -echo 'a,b,c' | ./clickhouse local -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV" -./clickhouse local -q "select * from test_tbl" -``` - -Result - -```text -a 0 1971-01-01 -``` - ## Values format settings {#values-format-settings} ### input_format_values_interpret_expressions {#input_format_values_interpret_expressions} diff --git a/src/Core/Settings.h b/src/Core/Settings.h index a4cb0c2dbd9..730b6ab80ed 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -874,7 +874,6 @@ class IColumn; M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \ M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \ M(Bool, input_format_csv_trim_whitespaces, true, "Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings", 0) \ - M(Bool, input_format_csv_use_default_on_bad_values, false, "Allow to set default value to column when CSV field deserialization failed on bad value", 0) \ M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \ M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \ M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 6e3e086859b..8eacc7acc97 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -73,7 +73,6 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces; format_settings.csv.allow_whitespace_or_tab_as_delimiter = settings.input_format_csv_allow_whitespace_or_tab_as_delimiter; format_settings.csv.allow_variable_number_of_columns = settings.input_format_csv_allow_variable_number_of_columns; - format_settings.csv.use_default_on_bad_values = settings.input_format_csv_use_default_on_bad_values; format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter; format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter; format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index e321e5264ca..af90e4462dd 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -152,7 +152,6 @@ struct FormatSettings bool trim_whitespaces = true; bool allow_whitespace_or_tab_as_delimiter = false; bool allow_variable_number_of_columns = false; - bool use_default_on_bad_values = false; } csv; struct HiveText diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 244b906549e..79ce2549b4d 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -1,5 +1,4 @@ #include -#include #include #include @@ -316,54 +315,17 @@ bool CSVFormatReader::readField( return false; } - if (format_settings.csv.use_default_on_bad_values) - return readFieldOrDefault(column, type, serialization); - return readFieldImpl(*buf, column, type, serialization); -} - -bool CSVFormatReader::readFieldImpl(ReadBuffer & istr, DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization) -{ if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) { /// If value is null but type is not nullable then use default value instead. - return SerializationNullable::deserializeTextCSVImpl(column, istr, format_settings, serialization); + return SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization); } /// Read the column normally. - serialization->deserializeTextCSV(column, istr, format_settings); + serialization->deserializeTextCSV(column, *buf, format_settings); return true; } -bool CSVFormatReader::readFieldOrDefault(DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization) -{ - String field; - readCSVField(field, *buf, format_settings.csv); - ReadBufferFromString tmp_buf(field); - bool is_bad_value = false; - bool res = false; - - size_t col_size = column.size(); - try - { - res = readFieldImpl(tmp_buf, column, type, serialization); - /// Check if we parsed the whole field successfully. - if (!field.empty() && !tmp_buf.eof()) - is_bad_value = true; - } - catch (const Exception &) - { - is_bad_value = true; - } - - if (!is_bad_value) - return res; - - if (column.size() == col_size + 1) - column.popBack(1); - column.insertDefault(); - return false; -} - void CSVFormatReader::skipPrefixBeforeHeader() { for (size_t i = 0; i != format_settings.csv.skip_first_lines; ++i) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index 7b1a1fc433d..8ccf04feed3 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -89,8 +89,6 @@ public: void setReadBuffer(ReadBuffer & in_) override; FormatSettings::EscapingRule getEscapingRule() const override { return FormatSettings::EscapingRule::CSV; } - bool readFieldImpl(ReadBuffer & istr, DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization); - bool readFieldOrDefault(DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization); protected: PeekableReadBuffer * buf; diff --git a/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.reference b/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.reference deleted file mode 100644 index 6abcc56bacc..00000000000 --- a/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.reference +++ /dev/null @@ -1,5 +0,0 @@ -0 111 1970-01-01 false -1 abc 2023-03-14 true -2 c 1970-01-01 false -4 888 2023-03-14 false -5 bks 1970-01-01 false diff --git a/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh b/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh deleted file mode 100755 index 30223329eca..00000000000 --- a/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash - -# NOTE: this sh wrapper is required because of shell_config - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -$CLICKHOUSE_CLIENT -q "drop table if exists test_tbl" -$CLICKHOUSE_CLIENT -q "create table test_tbl (a Int32, b String, c Date, e Boolean) engine=MergeTree order by a" -cat $CURDIR/data_csv/csv_with_bad_field_values.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV" -$CLICKHOUSE_CLIENT -q "select * from test_tbl" -$CLICKHOUSE_CLIENT -q "drop table test_tbl" \ No newline at end of file diff --git a/tests/queries/0_stateless/data_csv/csv_with_bad_field_values.csv b/tests/queries/0_stateless/data_csv/csv_with_bad_field_values.csv deleted file mode 100644 index e829cc0106a..00000000000 --- a/tests/queries/0_stateless/data_csv/csv_with_bad_field_values.csv +++ /dev/null @@ -1,5 +0,0 @@ -1,abc,2023-03-14,true -2,c,ab,false -bc,111,ab,ban -4,888,2023-03-14,false -5,bks,2023-03,abdd \ No newline at end of file From f0026af1893772a7a14f21d11cc88307ba07500a Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 19 Jul 2023 14:51:11 +0200 Subject: [PATCH 1760/1997] Revert "Revert "Improve CSVInputFormat to check and set default value to column if deserialize failed"" --- docs/en/interfaces/formats.md | 1 + .../operations/settings/settings-formats.md | 22 ++++++++++ src/Core/Settings.h | 1 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + .../Formats/Impl/CSVRowInputFormat.cpp | 42 ++++++++++++++++++- .../Formats/Impl/CSVRowInputFormat.h | 2 + ...11_csv_input_field_type_mismatch.reference | 5 +++ .../02811_csv_input_field_type_mismatch.sh | 13 ++++++ .../data_csv/csv_with_bad_field_values.csv | 5 +++ 10 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02811_csv_input_field_type_mismatch.reference create mode 100755 tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh create mode 100644 tests/queries/0_stateless/data_csv/csv_with_bad_field_values.csv diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index ed2f010a632..ddf4ab3f78e 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -472,6 +472,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`. - [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`. +- [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 0915c51806a..c8adc83d3ad 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -989,6 +989,28 @@ Result a b ``` +### input_format_csv_use_default_on_bad_values {#input_format_csv_use_default_on_bad_values} + +Allow to set default value to column when CSV field deserialization failed on bad value + +Default value: `false`. + +**Examples** + +Query + +```bash +./clickhouse local -q "create table test_tbl (x String, y UInt32, z Date) engine=MergeTree order by x" +echo 'a,b,c' | ./clickhouse local -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV" +./clickhouse local -q "select * from test_tbl" +``` + +Result + +```text +a 0 1971-01-01 +``` + ## Values format settings {#values-format-settings} ### input_format_values_interpret_expressions {#input_format_values_interpret_expressions} diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 730b6ab80ed..a4cb0c2dbd9 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -874,6 +874,7 @@ class IColumn; M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \ M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \ M(Bool, input_format_csv_trim_whitespaces, true, "Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings", 0) \ + M(Bool, input_format_csv_use_default_on_bad_values, false, "Allow to set default value to column when CSV field deserialization failed on bad value", 0) \ M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \ M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \ M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 8eacc7acc97..6e3e086859b 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -73,6 +73,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces; format_settings.csv.allow_whitespace_or_tab_as_delimiter = settings.input_format_csv_allow_whitespace_or_tab_as_delimiter; format_settings.csv.allow_variable_number_of_columns = settings.input_format_csv_allow_variable_number_of_columns; + format_settings.csv.use_default_on_bad_values = settings.input_format_csv_use_default_on_bad_values; format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter; format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter; format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index af90e4462dd..e321e5264ca 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -152,6 +152,7 @@ struct FormatSettings bool trim_whitespaces = true; bool allow_whitespace_or_tab_as_delimiter = false; bool allow_variable_number_of_columns = false; + bool use_default_on_bad_values = false; } csv; struct HiveText diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 79ce2549b4d..244b906549e 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -315,17 +316,54 @@ bool CSVFormatReader::readField( return false; } + if (format_settings.csv.use_default_on_bad_values) + return readFieldOrDefault(column, type, serialization); + return readFieldImpl(*buf, column, type, serialization); +} + +bool CSVFormatReader::readFieldImpl(ReadBuffer & istr, DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization) +{ if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) { /// If value is null but type is not nullable then use default value instead. - return SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization); + return SerializationNullable::deserializeTextCSVImpl(column, istr, format_settings, serialization); } /// Read the column normally. - serialization->deserializeTextCSV(column, *buf, format_settings); + serialization->deserializeTextCSV(column, istr, format_settings); return true; } +bool CSVFormatReader::readFieldOrDefault(DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization) +{ + String field; + readCSVField(field, *buf, format_settings.csv); + ReadBufferFromString tmp_buf(field); + bool is_bad_value = false; + bool res = false; + + size_t col_size = column.size(); + try + { + res = readFieldImpl(tmp_buf, column, type, serialization); + /// Check if we parsed the whole field successfully. + if (!field.empty() && !tmp_buf.eof()) + is_bad_value = true; + } + catch (const Exception &) + { + is_bad_value = true; + } + + if (!is_bad_value) + return res; + + if (column.size() == col_size + 1) + column.popBack(1); + column.insertDefault(); + return false; +} + void CSVFormatReader::skipPrefixBeforeHeader() { for (size_t i = 0; i != format_settings.csv.skip_first_lines; ++i) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index 8ccf04feed3..7b1a1fc433d 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -89,6 +89,8 @@ public: void setReadBuffer(ReadBuffer & in_) override; FormatSettings::EscapingRule getEscapingRule() const override { return FormatSettings::EscapingRule::CSV; } + bool readFieldImpl(ReadBuffer & istr, DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization); + bool readFieldOrDefault(DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization); protected: PeekableReadBuffer * buf; diff --git a/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.reference b/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.reference new file mode 100644 index 00000000000..6abcc56bacc --- /dev/null +++ b/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.reference @@ -0,0 +1,5 @@ +0 111 1970-01-01 false +1 abc 2023-03-14 true +2 c 1970-01-01 false +4 888 2023-03-14 false +5 bks 1970-01-01 false diff --git a/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh b/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh new file mode 100755 index 00000000000..30223329eca --- /dev/null +++ b/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# NOTE: this sh wrapper is required because of shell_config + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test_tbl" +$CLICKHOUSE_CLIENT -q "create table test_tbl (a Int32, b String, c Date, e Boolean) engine=MergeTree order by a" +cat $CURDIR/data_csv/csv_with_bad_field_values.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV" +$CLICKHOUSE_CLIENT -q "select * from test_tbl" +$CLICKHOUSE_CLIENT -q "drop table test_tbl" \ No newline at end of file diff --git a/tests/queries/0_stateless/data_csv/csv_with_bad_field_values.csv b/tests/queries/0_stateless/data_csv/csv_with_bad_field_values.csv new file mode 100644 index 00000000000..e829cc0106a --- /dev/null +++ b/tests/queries/0_stateless/data_csv/csv_with_bad_field_values.csv @@ -0,0 +1,5 @@ +1,abc,2023-03-14,true +2,c,ab,false +bc,111,ab,ban +4,888,2023-03-14,false +5,bks,2023-03,abdd \ No newline at end of file From bdaf82922a599ab5abb123b4bbd1f41249543ec7 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 19 Jul 2023 12:54:25 +0000 Subject: [PATCH 1761/1997] Use formatAST() --- src/Interpreters/Cache/QueryCache.cpp | 9 ++++----- src/Parsers/formatAST.cpp | 4 ++-- src/Parsers/formatAST.h | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index 1d1543844a2..a6c509e8bb1 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -115,12 +116,10 @@ ASTPtr removeQueryCacheSettings(ASTPtr ast) return transformed_ast; } -String queryStringFromAst(ASTPtr ast) +String queryStringFromAST(ASTPtr ast) { WriteBufferFromOwnString buf; - IAST::FormatSettings format_settings(buf, /*one_line*/ true); - format_settings.show_secrets = false; - ast->format(format_settings); + formatAST(*ast, buf, /*hilite*/ false, /*one_line*/ true, /*show_secrets*/ false); return buf.str(); } @@ -138,7 +137,7 @@ QueryCache::Key::Key( , is_shared(is_shared_) , expires_at(expires_at_) , is_compressed(is_compressed_) - , query_string(queryStringFromAst(ast_)) + , query_string(queryStringFromAST(ast_)) { } diff --git a/src/Parsers/formatAST.cpp b/src/Parsers/formatAST.cpp index fca8ea0aa35..aa1afe17c75 100644 --- a/src/Parsers/formatAST.cpp +++ b/src/Parsers/formatAST.cpp @@ -4,9 +4,9 @@ namespace DB { -void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite, bool one_line) +void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite, bool one_line, bool show_secrets) { - IAST::FormatSettings settings(buf, one_line); + IAST::FormatSettings settings(buf, one_line, show_secrets); settings.hilite = hilite; ast.format(settings); diff --git a/src/Parsers/formatAST.h b/src/Parsers/formatAST.h index 28af2400a4c..ebd284fc18a 100644 --- a/src/Parsers/formatAST.h +++ b/src/Parsers/formatAST.h @@ -11,7 +11,7 @@ class WriteBuffer; /** Takes a syntax tree and turns it back into text. * In case of INSERT query, the data will be missing. */ -void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite = true, bool one_line = false); +void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite = true, bool one_line = false, bool show_secrets = true); String serializeAST(const IAST & ast, bool one_line = true); From 9c4005b33fc74ab4ec3b68ebc877fdda499e8932 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 19 Jul 2023 15:12:47 +0200 Subject: [PATCH 1762/1997] Add logging about all found workflows for merge_pr.py --- tests/ci/merge_pr.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 14844ed9b25..35b0614b01f 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -246,6 +246,12 @@ def main(): if args.check_running_workflows: workflows = get_workflows_for_head(repo, pr.head.sha) + logging.info( + "The PR #%s has following workflows:\n%s", + pr.number, + "\n".join(f"{wf.html_url}: status is {wf.status}" for wf in workflows), + ) + workflows_in_progress = [wf for wf in workflows if wf.status != "completed"] # At most one workflow in progress is fine. We check that there no # cases like, e.g. PullRequestCI and DocksCheck in progress at once From 53818dde8cef7dd573217fa049d01b233a076ac2 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 19 Jul 2023 15:22:25 +0200 Subject: [PATCH 1763/1997] MergeTree/ReplicatedMergeTree should use server timezone for log entries Otherwise session_timezone/use_client_time_zone will break things Signed-off-by: Azat Khuzhin --- src/Storages/MergeTree/MergeTreeMutationEntry.cpp | 2 +- src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp | 2 +- src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp index cac26c5ac23..4dbccb91620 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp @@ -61,7 +61,7 @@ MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskP { auto out = disk->writeFile(std::filesystem::path(path_prefix) / file_name, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, settings); *out << "format version: 1\n" - << "create time: " << LocalDateTime(create_time) << "\n"; + << "create time: " << LocalDateTime(create_time, DateLUT::serverTimezoneInstance()) << "\n"; *out << "commands: "; commands.writeText(*out, /* with_pure_metadata_commands = */ false); *out << "\n"; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index ac956433eab..9eb8b6ce24c 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -48,7 +48,7 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const format_version = std::max(format_version, FORMAT_WITH_LOG_ENTRY_ID); out << "format version: " << format_version << "\n" - << "create_time: " << LocalDateTime(create_time ? create_time : time(nullptr)) << "\n" + << "create_time: " << LocalDateTime(create_time ? create_time : time(nullptr), DateLUT::serverTimezoneInstance()) << "\n" << "source replica: " << source_replica << '\n' << "block_id: " << escape << block_id << '\n'; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp index 1bbb246338c..e2c23ecfe85 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp @@ -12,7 +12,7 @@ namespace DB void ReplicatedMergeTreeMutationEntry::writeText(WriteBuffer & out) const { out << "format version: 1\n" - << "create time: " << LocalDateTime(create_time ? create_time : time(nullptr)) << "\n" + << "create time: " << LocalDateTime(create_time ? create_time : time(nullptr), DateLUT::serverTimezoneInstance()) << "\n" << "source replica: " << source_replica << "\n" << "block numbers count: " << block_numbers.size() << "\n"; From 7ad399cc6d97aaa356cbbe50f697b563d0a2f995 Mon Sep 17 00:00:00 2001 From: Rory Crispin Date: Wed, 19 Jul 2023 14:25:36 +0100 Subject: [PATCH 1764/1997] Rephrase ALL supported note I interpreted the note as 'ALL' is only supported before 23.4 I think this reordering makes it clearer --- docs/en/operations/backup.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index c3ddee07d0b..61c8a73673f 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -30,7 +30,7 @@ description: In order to effectively mitigate possible human errors, you should ``` :::note ALL -`ALL` is only applicable to the `RESTORE` command prior to version 23.4 of Clickhouse. +Prior to version 23.4 of Clickhouse, `ALL` was only applicable to the `RESTORE` command. ::: ## Background From c47f19303afc3e4f18ceddbfe7b8f7e7cc4622b3 Mon Sep 17 00:00:00 2001 From: Rory Crispin Date: Wed, 19 Jul 2023 14:27:21 +0100 Subject: [PATCH 1765/1997] Correct CH capitalisation --- docs/en/operations/backup.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 61c8a73673f..62f931a76b4 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -30,7 +30,7 @@ description: In order to effectively mitigate possible human errors, you should ``` :::note ALL -Prior to version 23.4 of Clickhouse, `ALL` was only applicable to the `RESTORE` command. +Prior to version 23.4 of ClickHouse, `ALL` was only applicable to the `RESTORE` command. ::: ## Background From 688b55b6ff80ee333ab9ef318d42937d5b5d3064 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 19 Jul 2023 13:29:07 +0000 Subject: [PATCH 1766/1997] Try to fix test, rename arg --- src/Common/SystemLogBase.cpp | 9 +++++---- src/Common/SystemLogBase.h | 4 ++-- src/Loggers/Loggers.cpp | 6 +++--- tests/queries/0_stateless/02813_starting_in_text_log.sql | 2 +- 4 files changed, 11 insertions(+), 10 deletions(-) mode change 100755 => 100644 tests/queries/0_stateless/02813_starting_in_text_log.sql diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index bed6d661db7..8cf8103e1c7 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -41,9 +41,9 @@ ISystemLog::~ISystemLog() = default; template SystemLogQueue::SystemLogQueue( - const String & name_, + const String & table_name_, size_t flush_interval_milliseconds_) - : log(&Poco::Logger::get(name_)) + : log(&Poco::Logger::get("SystemLogQueue (" + table_name_ + ")")) , flush_interval_milliseconds(flush_interval_milliseconds_) {} @@ -120,6 +120,7 @@ void SystemLogQueue::push(const LogElement & element) template uint64_t SystemLogQueue::notifyFlush(bool should_prepare_tables_anyway) { + uint64_t this_thread_requested_offset; { @@ -204,10 +205,10 @@ void SystemLogQueue::shutdown() template SystemLogBase::SystemLogBase( - const String& name, + const String& table_name_, size_t flush_interval_milliseconds_, std::shared_ptr> queue_) - : queue(queue_ ? queue_ : std::make_shared>(name, flush_interval_milliseconds_)) + : queue(queue_ ? queue_ : std::make_shared>(table_name_, flush_interval_milliseconds_)) { } diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index 0ac376769ad..3716584be24 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -80,7 +80,7 @@ class SystemLogQueue public: SystemLogQueue( - const String & name_, + const String & table_name_, size_t flush_interval_milliseconds_); void shutdown(); @@ -130,7 +130,7 @@ public: using Self = SystemLogBase; SystemLogBase( - const String& name, + const String& table_name_, size_t flush_interval_milliseconds_, std::shared_ptr> queue_ = nullptr); diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 4cc74902ee1..85a8152602f 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -255,10 +255,10 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log #ifndef WITHOUT_TEXT_LOG if (config.has("text_log")) { - String text_log_level_str = config.getString("text_log.level", ""); - int text_log_level = text_log_level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(text_log_level_str); + String text_log_level_str = config.getString("text_log.level", "trace"); + int text_log_level = Poco::Logger::parseLevel(text_log_level_str); size_t flush_interval_milliseconds = config.getUInt64("text_log.flush_interval_milliseconds", - DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS); + DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS); split->addTextLog(DB::TextLog::getLogQueue(flush_interval_milliseconds), text_log_level); } #endif diff --git a/tests/queries/0_stateless/02813_starting_in_text_log.sql b/tests/queries/0_stateless/02813_starting_in_text_log.sql old mode 100755 new mode 100644 index 8ef78945a72..e007f58189e --- a/tests/queries/0_stateless/02813_starting_in_text_log.sql +++ b/tests/queries/0_stateless/02813_starting_in_text_log.sql @@ -1,2 +1,2 @@ SYSTEM FLUSH LOGS; -SELECT count() > 0 FROM system.text_log WHERE event_date >= yesterday() AND message LIKE '%Application: Starting ClickHouse%'; +SELECT count() > 0 FROM system.text_log WHERE event_date >= yesterday() AND message LIKE '%Starting ClickHouse%'; From 9f7e40e8e57cc5e8c997dff16b5c6645283ffcb3 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 19 Jul 2023 13:43:22 +0000 Subject: [PATCH 1767/1997] Remove empty line --- src/Common/SystemLogBase.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index 8cf8103e1c7..294ba09e375 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -120,7 +120,6 @@ void SystemLogQueue::push(const LogElement & element) template uint64_t SystemLogQueue::notifyFlush(bool should_prepare_tables_anyway) { - uint64_t this_thread_requested_offset; { From 777026e42e00311c4a751e8899fac407d6c8b874 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 19 Jul 2023 13:43:40 +0000 Subject: [PATCH 1768/1997] Fix test --- src/Functions/s2RectAdd.cpp | 8 ++++---- src/Functions/s2RectContains.cpp | 8 ++++---- src/Functions/s2RectIntersection.cpp | 4 ++-- src/Functions/s2RectUnion.cpp | 6 +++--- tests/queries/0_stateless/01849_geoToS2.reference | 1 - tests/queries/0_stateless/01849_geoToS2.sql | 2 +- .../0_stateless/02224_s2_test_const_columns.reference | 2 +- tests/queries/0_stateless/02224_s2_test_const_columns.sql | 2 +- 8 files changed, 16 insertions(+), 17 deletions(-) diff --git a/src/Functions/s2RectAdd.cpp b/src/Functions/s2RectAdd.cpp index 9266f4ae1a7..0be304234cb 100644 --- a/src/Functions/s2RectAdd.cpp +++ b/src/Functions/s2RectAdd.cpp @@ -118,14 +118,14 @@ public: if (!point.is_valid()) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Point is invalid. For valid point the latitude is between -90 and 90 degrees inclusive" + "Point is invalid. For valid point the latitude is between -90 and 90 degrees inclusive " "and the longitude is between -180 and 180 degrees inclusive."); if (!rect.is_valid()) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Rectangle is invalid. For valid rectangles the latitude bounds do not exceed" - "Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value." - "Also, if either the latitude or longitude bound is empty then both must be."); + "Rectangle is invalid. For valid rectangles the latitude bounds do not exceed " + "Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. " + "Also, if either the latitude or longitude bound is empty then both must be. "); rect.AddPoint(point.ToPoint()); diff --git a/src/Functions/s2RectContains.cpp b/src/Functions/s2RectContains.cpp index aed123ce8ee..898e12a6466 100644 --- a/src/Functions/s2RectContains.cpp +++ b/src/Functions/s2RectContains.cpp @@ -111,14 +111,14 @@ public: if (!point.is_valid()) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Point is invalid. For valid point the latitude is between -90 and 90 degrees inclusive" + "Point is invalid. For valid point the latitude is between -90 and 90 degrees inclusive " "and the longitude is between -180 and 180 degrees inclusive."); if (!rect.is_valid()) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Rectangle is invalid. For valid rectangles the latitude bounds do not exceed" - "Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value." - "Also, if either the latitude or longitude bound is empty then both must be."); + "Rectangle is invalid. For valid rectangles the latitude bounds do not exceed " + "Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. " + "Also, if either the latitude or longitude bound is empty then both must be. "); dst_data.emplace_back(rect.Contains(point.ToLatLng())); } diff --git a/src/Functions/s2RectIntersection.cpp b/src/Functions/s2RectIntersection.cpp index ffe26d171d0..f0cc02de9d9 100644 --- a/src/Functions/s2RectIntersection.cpp +++ b/src/Functions/s2RectIntersection.cpp @@ -133,8 +133,8 @@ public: if (!rect1.is_valid() || !rect2.is_valid()) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Rectangle is invalid. For valid rectangles the latitude bounds do not exceed" - "Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value." + "Rectangle is invalid. For valid rectangles the latitude bounds do not exceed " + "Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. " "Also, if either the latitude or longitude bound is empty then both must be."); S2LatLngRect rect_intersection = rect1.Intersection(rect2); diff --git a/src/Functions/s2RectUnion.cpp b/src/Functions/s2RectUnion.cpp index 472b30c2d55..a5cedd35812 100644 --- a/src/Functions/s2RectUnion.cpp +++ b/src/Functions/s2RectUnion.cpp @@ -131,9 +131,9 @@ public: if (!rect1.is_valid() || !rect2.is_valid()) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Rectangle is invalid. For valid rectangles the latitude bounds do not exceed" - "Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value." - "Also, if either the latitude or longitude bound is empty then both must be."); + "Rectangle is invalid. For valid rectangles the latitude bounds do not exceed " + "Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. " + "Also, if either the latitude or longitude bound is empty then both must be. "); S2LatLngRect rect_union = rect1.Union(rect2); diff --git a/tests/queries/0_stateless/01849_geoToS2.reference b/tests/queries/0_stateless/01849_geoToS2.reference index 08d76978791..a8196994361 100644 --- a/tests/queries/0_stateless/01849_geoToS2.reference +++ b/tests/queries/0_stateless/01849_geoToS2.reference @@ -39,4 +39,3 @@ Checking s2 index generation. (74.0061,-68.32124) (74.0061,-68.32124) ok (10.61077,-64.1841) (10.61077,-64.1841) ok (-89.81096,-57.01398) (-89.81096,-57.01398) ok -4864204703484167331 diff --git a/tests/queries/0_stateless/01849_geoToS2.sql b/tests/queries/0_stateless/01849_geoToS2.sql index abd084a2b19..e997fec14e5 100644 --- a/tests/queries/0_stateless/01849_geoToS2.sql +++ b/tests/queries/0_stateless/01849_geoToS2.sql @@ -44,7 +44,7 @@ SELECT first, second, result FROM ( SELECT s2ToGeo(toUInt64(-1)); -- { serverError 36 } SELECT s2ToGeo(nan); -- { serverError 43 } -SELECT geoToS2(toFloat64(toUInt64(-1)), toFloat64(toUInt64(-1))); +SELECT geoToS2(toFloat64(toUInt64(-1)), toFloat64(toUInt64(-1))); -- { serverError BAD_ARGUMENTS } SELECT geoToS2(nan, nan); -- { serverError 43 } SELECT geoToS2(-inf, 1.1754943508222875e-38); -- { serverError 43 } diff --git a/tests/queries/0_stateless/02224_s2_test_const_columns.reference b/tests/queries/0_stateless/02224_s2_test_const_columns.reference index 9982596f097..20f32ec6be7 100644 --- a/tests/queries/0_stateless/02224_s2_test_const_columns.reference +++ b/tests/queries/0_stateless/02224_s2_test_const_columns.reference @@ -16,4 +16,4 @@ (5179062030687166815,5177056748191934217) (5179062030687166815,5177057445452335297) (5178914411069187297,5177056748191934217) -(6304347505408739331,8070450532247928833) +(5178914411069187297,5177912432982045463) diff --git a/tests/queries/0_stateless/02224_s2_test_const_columns.sql b/tests/queries/0_stateless/02224_s2_test_const_columns.sql index f33a7f2b696..1d3e51065b5 100644 --- a/tests/queries/0_stateless/02224_s2_test_const_columns.sql +++ b/tests/queries/0_stateless/02224_s2_test_const_columns.sql @@ -9,4 +9,4 @@ SELECT s2CapUnion(3814912406305146967, toFloat64(1), 1157347770437378819, toFloa SELECT s2RectAdd(5178914411069187297, 5177056748191934217, arrayJoin([5179056748191934217,5177914411069187297])); SELECT s2RectContains(5179062030687166815, 5177056748191934217, arrayJoin([5177914411069187297, 5177914411069187297])); SELECT s2RectUnion(5178914411069187297, 5177056748191934217, 5179062030687166815, arrayJoin([5177056748191934217, 5177914411069187297])); -SELECT s2RectIntersection(5178914411069187297, 5177056748191934217, 5179062030687166815, arrayJoin([5177056748191934217,1157347770437378819])); +SELECT s2RectIntersection(5178914411069187297, 5177056748191934217, 5179062030687166815, arrayJoin([5177056748191934217,5177914411069187297])); From 53500be941bc1d63ef85c3b5afb6bcc01103fb85 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 19 Jul 2023 17:03:00 +0200 Subject: [PATCH 1769/1997] Updated fix of multiple usage in parameterized view to support cte by not adding column which is previously added --- src/Interpreters/ActionsVisitor.cpp | 22 +++++++------------ ...zed_view_with_cte_multiple_usage.reference | 2 ++ ...meterized_view_with_cte_multiple_usage.sql | 16 ++++++++++++++ 3 files changed, 26 insertions(+), 14 deletions(-) create mode 100644 tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.reference create mode 100755 tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.sql diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index efab11003f5..8b10df516dc 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1202,22 +1202,16 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & else if (data.is_create_parameterized_view && query_parameter) { const auto data_type = DataTypeFactory::instance().get(query_parameter->type); - /// Use getUniqueName() to allow multiple use of query parameter in the query: - /// - /// CREATE VIEW view AS - /// SELECT * - /// FROM system.one - /// WHERE dummy = {k1:Int}+1 OR dummy = {k1:Int}+2 - /// ^^ ^^ - /// - /// NOTE: query in the VIEW will not be modified this is needed - /// only during analysis for CREATE VIEW to avoid duplicated - /// column names. - ColumnWithTypeAndName column(data_type, data.getUniqueName("__" + query_parameter->getColumnName())); - data.addColumn(column); + /// During analysis for CREATE VIEW of a parameterized view, if parameter is + /// used multiple times, column is only added once + if (!data.hasColumn(query_parameter->name)) + { + ColumnWithTypeAndName column(data_type, query_parameter->name); + data.addColumn(column); + } argument_types.push_back(data_type); - argument_names.push_back(column.name); + argument_names.push_back(query_parameter->name); } else { diff --git a/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.reference b/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.reference new file mode 100644 index 00000000000..004d27bacad --- /dev/null +++ b/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.reference @@ -0,0 +1,2 @@ +3 2 +3 2 3 diff --git a/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.sql b/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.sql new file mode 100755 index 00000000000..d56d9c4e181 --- /dev/null +++ b/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.sql @@ -0,0 +1,16 @@ +create view test_param_view as +with {param_test_val:UInt8} as param_test_val +select param_test_val, + arrayCount((a)->(a < param_test_val), t.arr) as cnt1 +from (select [1,2,3,4,5] as arr) t; + +select * from test_param_view(param_test_val = 3); + +create view test_param_view2 as +with {param_test_val:UInt8} as param_test_val +select param_test_val, + arrayCount((a)->(a < param_test_val), t.arr) as cnt1, + arrayCount((a)->(a < param_test_val+1), t.arr) as cnt2 +from (select [1,2,3,4,5] as arr) t; + +select * from test_param_view2(param_test_val = 3); \ No newline at end of file From b34655e74310dba07c25b06cac817168b6012907 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 19 Jul 2023 17:05:03 +0200 Subject: [PATCH 1770/1997] Update src/Storages/StorageReplicatedMergeTree.cpp Co-authored-by: Alexander Tokmakov --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 53fac578fca..04799a08e37 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4114,7 +4114,7 @@ std::set StorageReplicatedMergeTree::findReplicaUniqueParts(c } if (!our_parts.empty() && our_unique_parts.empty()) - LOG_TRACE(log_, "All parts found on replica"); + LOG_TRACE(log_, "All parts found on replicas"); return our_unique_parts; } From 544081163d751a62dcdfc21e5841c9cb53877cb0 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Wed, 19 Jul 2023 17:21:01 +0200 Subject: [PATCH 1771/1997] Remove redundant deactivate --- src/Storages/StorageReplicatedMergeTree.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 04799a08e37..06f5330f6d9 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4903,7 +4903,6 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() fetcher.blocker.cancelForever(); merger_mutator.merges_blocker.cancelForever(); parts_mover.moves_blocker.cancelForever(); - mutations_finalizing_task->deactivate(); stopBeingLeader(); if (attach_thread) From 13d1e21da820dd97ddb624eb7671ca2fee86d530 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 19 Jul 2023 18:26:26 +0200 Subject: [PATCH 1772/1997] Fixed test file permissions --- .../02818_parameterized_view_with_cte_multiple_usage.sql | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.sql diff --git a/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.sql b/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.sql old mode 100755 new mode 100644 From 90be5e6160f2342121dfe2d014f4d247a4efa39c Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Wed, 19 Jul 2023 20:04:43 +0100 Subject: [PATCH 1773/1997] Remove reference to `TIMEOUT` in live views documentation Temporary live views were removed in #42173, but the documentation was not fully updated to reflect the change. --- docs/en/sql-reference/statements/create/view.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 10b15638152..11026340a0f 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -97,7 +97,7 @@ This is an experimental feature that may change in backwards-incompatible ways i ::: ```sql -CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... +CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ... ``` Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query. From 02fe735b768e2d171191091c402f4732ace4669e Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Wed, 19 Jul 2023 20:12:35 +0100 Subject: [PATCH 1774/1997] fix in other other languages --- docs/ru/sql-reference/statements/create/view.md | 2 +- docs/zh/sql-reference/statements/create/view.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md index d3846aac289..1a60dc0716c 100644 --- a/docs/ru/sql-reference/statements/create/view.md +++ b/docs/ru/sql-reference/statements/create/view.md @@ -73,7 +73,7 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view). ::: ```sql -CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... +CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ... ``` `LIVE VIEW` хранит результат запроса [SELECT](../../../sql-reference/statements/select/index.md), указанного при создании, и обновляется сразу же при изменении этого результата. Конечный результат запроса и промежуточные данные, из которых формируется результат, хранятся в оперативной памяти, и это обеспечивает высокую скорость обработки для повторяющихся запросов. LIVE-представления могут отправлять push-уведомления при изменении результата исходного запроса `SELECT`. Для этого используйте запрос [WATCH](../../../sql-reference/statements/watch.md). diff --git a/docs/zh/sql-reference/statements/create/view.md b/docs/zh/sql-reference/statements/create/view.md index 8ce2d20a10c..bce0994ecd2 100644 --- a/docs/zh/sql-reference/statements/create/view.md +++ b/docs/zh/sql-reference/statements/create/view.md @@ -72,7 +72,7 @@ ClickHouse 中的物化视图更像是插入触发器。 如果视图查询中 使用[allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view)设置启用实时视图和`WATCH`查询的使用。 输入命令`set allow_experimental_live_view = 1`。 ```sql -CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... +CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ... ``` 实时视图存储相应[SELECT](../../../sql-reference/statements/select/index.md)查询的结果,并在查询结果更改时随时更新。 查询结果以及与新数据结合所需的部分结果存储在内存中,为重复查询提供更高的性能。当使用[WATCH](../../../sql-reference/statements/watch.md)查询更改查询结果时,实时视图可以提供推送通知。 From dbdac5d823d431fb34405649f7125e76c88f1f05 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 19 Jul 2023 19:34:49 +0000 Subject: [PATCH 1775/1997] Add query with UNION --- .../02500_remove_redundant_distinct.reference | 29 +++++++++++++++++++ .../02500_remove_redundant_distinct.sh | 12 ++++++++ ...move_redundant_distinct_analyzer.reference | 29 +++++++++++++++++++ 3 files changed, 70 insertions(+) diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference index 2e049dbc936..763a7cc4286 100644 --- a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference @@ -477,3 +477,32 @@ Expression (Projection) ReadFromStorage (SystemNumbers) -- execute 1 +-- UNION ALL with DISTINCT => do _not_ remove DISTINCT +-- query +SELECT DISTINCT number +FROM +( + SELECT DISTINCT number + FROM numbers(1) + UNION ALL + SELECT DISTINCT number + FROM numbers(2) +) +-- explain +Expression (Projection) + Distinct + Distinct (Preliminary DISTINCT) + Union + Expression ((Before ORDER BY + Projection)) + Distinct + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + ReadFromStorage (SystemNumbers) + Expression (( + Projection)) + Distinct + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + ReadFromStorage (SystemNumbers) +-- execute +0 +1 diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh index 41744cc59f9..f07cdca4b5a 100755 --- a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh @@ -264,3 +264,15 @@ run_query "$query" echo "-- DISTINCT COUNT() with GROUP BY => do _not_ remove DISTINCT" query="select distinct count() from numbers(10) group by number" run_query "$query" + +echo "-- UNION ALL with DISTINCT => do _not_ remove DISTINCT" +query="SELECT DISTINCT number +FROM +( + SELECT DISTINCT number + FROM numbers(1) + UNION ALL + SELECT DISTINCT number + FROM numbers(2) +)" +run_query "$query" diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference b/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference index c9301c1f0a3..50ca5981cf1 100644 --- a/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference @@ -479,3 +479,32 @@ Expression (Project names) ReadFromStorage (SystemNumbers) -- execute 1 +-- UNION ALL with DISTINCT => do _not_ remove DISTINCT +-- query +SELECT DISTINCT number +FROM +( + SELECT DISTINCT number + FROM numbers(1) + UNION ALL + SELECT DISTINCT number + FROM numbers(2) +) +-- explain +Expression (Project names) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Union + Expression ((Projection + (Change column names to column identifiers + Project names))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromStorage (SystemNumbers) + Expression (( + ( + Project names))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromStorage (SystemNumbers) +-- execute +0 +1 From 2b8e4ebd4c3df56c2d3e445321cedb157c7956f7 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 19 Jul 2023 19:48:39 +0000 Subject: [PATCH 1776/1997] Allow to disable decoding/encoding path in uri in URL engine --- base/poco/Foundation/include/Poco/URI.h | 6 +++- base/poco/Foundation/src/URI.cpp | 39 ++++++++++++++++++------- docs/en/operations/settings/settings.md | 6 ++++ src/Core/Settings.h | 1 + src/IO/ReadWriteBufferFromHTTP.cpp | 6 ++-- src/Storages/StorageURL.cpp | 2 +- 6 files changed, 45 insertions(+), 15 deletions(-) diff --git a/base/poco/Foundation/include/Poco/URI.h b/base/poco/Foundation/include/Poco/URI.h index 1880af4ccd2..5e6e7efd938 100644 --- a/base/poco/Foundation/include/Poco/URI.h +++ b/base/poco/Foundation/include/Poco/URI.h @@ -57,7 +57,7 @@ public: URI(); /// Creates an empty URI. - explicit URI(const std::string & uri); + explicit URI(const std::string & uri, bool decode_and_encode_path = true); /// Parses an URI from the given string. Throws a /// SyntaxException if the uri is not valid. @@ -350,6 +350,8 @@ protected: static const std::string ILLEGAL; private: + void encodePath(std::string & encodedStr) const; + std::string _scheme; std::string _userInfo; std::string _host; @@ -357,6 +359,8 @@ private: std::string _path; std::string _query; std::string _fragment; + + bool _decode_and_encode_path = true; }; diff --git a/base/poco/Foundation/src/URI.cpp b/base/poco/Foundation/src/URI.cpp index 5543e02b279..91a82868dcf 100644 --- a/base/poco/Foundation/src/URI.cpp +++ b/base/poco/Foundation/src/URI.cpp @@ -36,8 +36,8 @@ URI::URI(): } -URI::URI(const std::string& uri): - _port(0) +URI::URI(const std::string& uri, bool decode_and_encode_path): + _port(0), _decode_and_encode_path(decode_and_encode_path) { parse(uri); } @@ -107,7 +107,8 @@ URI::URI(const URI& uri): _port(uri._port), _path(uri._path), _query(uri._query), - _fragment(uri._fragment) + _fragment(uri._fragment), + _decode_and_encode_path(uri._decode_and_encode_path) { } @@ -119,7 +120,8 @@ URI::URI(const URI& baseURI, const std::string& relativeURI): _port(baseURI._port), _path(baseURI._path), _query(baseURI._query), - _fragment(baseURI._fragment) + _fragment(baseURI._fragment), + _decode_and_encode_path(baseURI._decode_and_encode_path) { resolve(relativeURI); } @@ -151,6 +153,7 @@ URI& URI::operator = (const URI& uri) _path = uri._path; _query = uri._query; _fragment = uri._fragment; + _decode_and_encode_path = uri._decode_and_encode_path; } return *this; } @@ -181,6 +184,7 @@ void URI::swap(URI& uri) std::swap(_path, uri._path); std::swap(_query, uri._query); std::swap(_fragment, uri._fragment); + std::swap(_decode_and_encode_path, uri._decode_and_encode_path); } @@ -201,7 +205,7 @@ std::string URI::toString() const std::string uri; if (isRelative()) { - encode(_path, RESERVED_PATH, uri); + encodePath(uri); } else { @@ -217,7 +221,7 @@ std::string URI::toString() const { if (!auth.empty() && _path[0] != '/') uri += '/'; - encode(_path, RESERVED_PATH, uri); + encodePath(uri); } else if (!_query.empty() || !_fragment.empty()) { @@ -313,7 +317,10 @@ void URI::setAuthority(const std::string& authority) void URI::setPath(const std::string& path) { _path.clear(); - decode(path, _path); + if (_decode_and_encode_path) + decode(path, _path); + else + _path = path; } @@ -418,7 +425,7 @@ void URI::setPathEtc(const std::string& pathEtc) std::string URI::getPathEtc() const { std::string pathEtc; - encode(_path, RESERVED_PATH, pathEtc); + encodePath(pathEtc); if (!_query.empty()) { pathEtc += '?'; @@ -436,7 +443,7 @@ std::string URI::getPathEtc() const std::string URI::getPathAndQuery() const { std::string pathAndQuery; - encode(_path, RESERVED_PATH, pathAndQuery); + encodePath(pathAndQuery); if (!_query.empty()) { pathAndQuery += '?'; @@ -626,6 +633,8 @@ void URI::encode(const std::string& str, const std::string& reserved, std::strin for (std::string::const_iterator it = str.begin(); it != str.end(); ++it) { char c = *it; + if (c == '%') + throw std::runtime_error("WTF"); if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || @@ -681,6 +690,13 @@ void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpa } } +void URI::encodePath(std::string & encodedStr) const +{ + if (_decode_and_encode_path) + encode(_path, RESERVED_PATH, encodedStr); + else + encodedStr = _path; +} bool URI::isWellKnownPort() const { @@ -820,7 +836,10 @@ void URI::parsePath(std::string::const_iterator& it, const std::string::const_it { std::string path; while (it != end && *it != '?' && *it != '#') path += *it++; - decode(path, _path); + if (_decode_and_encode_path) + decode(path, _path); + else + _path = path; } diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 8b969f87a4d..db5d1a2f5d9 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3466,6 +3466,12 @@ Possible values: Default value: `0`. +## decode_and_encode_path_in_url {#decode_and_encode_path_in_url} + +Enables or disables decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables. + +Enabled by default. + ## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously} Adds a modifier `SYNC` to all `DROP` and `DETACH` queries. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 8f304f0aab6..ffa72d841be 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -621,6 +621,7 @@ class IColumn; M(Bool, engine_file_allow_create_multiple_files, false, "Enables or disables creating a new file on each insert in file engine tables if format has suffix.", 0) \ M(Bool, engine_file_skip_empty_files, false, "Allows to skip empty files in file table engine", 0) \ M(Bool, engine_url_skip_empty_files, false, "Allows to skip empty files in url table engine", 0) \ + M(Bool, decode_and_encode_path_in_url, true, "Enables or disables decoding/encoding path in uri in URL table engine", 0) \ M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \ M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \ M(Bool, database_replicated_enforce_synchronous_settings, false, "Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings.", 0) \ diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 6d1c0f7aafa..eea801ce65e 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -305,12 +305,12 @@ void ReadWriteBufferFromHTTPBase::callWithRedirects(Poco::N current_session = session; call(current_session, response, method_, throw_on_all_errors, for_object_info); - Poco::URI prev_uri = uri; + saved_uri_redirect = uri; while (isRedirect(response.getStatus())) { - Poco::URI uri_redirect = getUriAfterRedirect(prev_uri, response); - prev_uri = uri_redirect; + Poco::URI uri_redirect = getUriAfterRedirect(*saved_uri_redirect, response); + saved_uri_redirect = uri_redirect; if (remote_host_filter) remote_host_filter->checkURL(uri_redirect); diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index e6953afe68e..4cfefbc5527 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -389,7 +389,7 @@ std::pair> StorageURLSource: for (; option != end; ++option) { bool skip_url_not_found_error = glob_url && read_settings.http_skip_not_found_url_for_globs && option == std::prev(end); - auto request_uri = Poco::URI(*option); + auto request_uri = Poco::URI(*option, context->getSettingsRef().decode_and_encode_path_in_url); for (const auto & [param, value] : params) request_uri.addQueryParameter(param, value); From 483ddb53ebfa01c02deda76a39bc44cc08df4f00 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 19 Jul 2023 19:51:58 +0000 Subject: [PATCH 1777/1997] Fixes --- base/poco/Foundation/src/URI.cpp | 2 -- docs/en/engines/table-engines/special/url.md | 1 + docs/en/sql-reference/table-functions/url.md | 3 ++- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/base/poco/Foundation/src/URI.cpp b/base/poco/Foundation/src/URI.cpp index 91a82868dcf..9bad1b39a87 100644 --- a/base/poco/Foundation/src/URI.cpp +++ b/base/poco/Foundation/src/URI.cpp @@ -633,8 +633,6 @@ void URI::encode(const std::string& str, const std::string& reserved, std::strin for (std::string::const_iterator it = str.begin(); it != str.end(); ++it) { char c = *it; - if (c == '%') - throw std::runtime_error("WTF"); if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index 26d4975954f..9f2bf177c96 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -106,3 +106,4 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da ## Storage Settings {#storage-settings} - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. +- [decode_and_encode_path_in_url](/docs/en/operations/settings/settings.md#decode_and_encode_path_in_url) - enables or disables decoding/encoding path in uri. Enabled by default. diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md index 2ab43f1b895..96f36f03949 100644 --- a/docs/en/sql-reference/table-functions/url.md +++ b/docs/en/sql-reference/table-functions/url.md @@ -56,7 +56,8 @@ Character `|` inside patterns is used to specify failover addresses. They are it ## Storage Settings {#storage-settings} - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. +- [decode_and_encode_path_in_url](/docs/en/operations/settings/settings.md#decode_and_encode_path_in_url) - enables or disables decoding/encoding path in uri. Enabled by default. -**See Also** +- **See Also** - [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns) From e7b8767585e748d91796e669b871d40546c40bc8 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 19 Jul 2023 22:14:05 +0200 Subject: [PATCH 1778/1997] Mark test 02125_many_mutations_2 as no-parallel to avoid flakiness --- tests/queries/0_stateless/02125_many_mutations_2.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02125_many_mutations_2.sh b/tests/queries/0_stateless/02125_many_mutations_2.sh index 5b779c1b276..819ac8c9524 100755 --- a/tests/queries/0_stateless/02125_many_mutations_2.sh +++ b/tests/queries/0_stateless/02125_many_mutations_2.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-tsan, no-debug, no-asan, no-msan, no-ubsan +# Tags: long, no-tsan, no-debug, no-asan, no-msan, no-ubsan, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From ff235e0f3078f6c27a9a1ab1383a91378313ab77 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 20 Jul 2023 05:41:39 +0000 Subject: [PATCH 1779/1997] Turn off log in queue, fix data race --- src/Common/SystemLogBase.cpp | 9 +++++++-- src/Common/SystemLogBase.h | 3 ++- src/Interpreters/TextLog.h | 6 ++++-- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index 294ba09e375..d1845a292b9 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -42,10 +42,14 @@ ISystemLog::~ISystemLog() = default; template SystemLogQueue::SystemLogQueue( const String & table_name_, - size_t flush_interval_milliseconds_) + size_t flush_interval_milliseconds_, + bool turn_off_logger_) : log(&Poco::Logger::get("SystemLogQueue (" + table_name_ + ")")) , flush_interval_milliseconds(flush_interval_milliseconds_) -{} +{ + if (turn_off_logger_) + log->setLevel(0); +} static thread_local bool recursive_push_call = false; @@ -197,6 +201,7 @@ SystemLogQueue::Index SystemLogQueue::pop(std::vector void SystemLogQueue::shutdown() { + std::unique_lock lock(mutex); is_shutdown = true; /// Tell thread to shutdown. flush_event.notify_all(); diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index 3716584be24..f6e4a579edf 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -81,7 +81,8 @@ class SystemLogQueue public: SystemLogQueue( const String & table_name_, - size_t flush_interval_milliseconds_); + size_t flush_interval_milliseconds_, + bool turn_off_logger_ = false); void shutdown(); diff --git a/src/Interpreters/TextLog.h b/src/Interpreters/TextLog.h index 0febce03abc..60ca11632aa 100644 --- a/src/Interpreters/TextLog.h +++ b/src/Interpreters/TextLog.h @@ -40,6 +40,8 @@ struct TextLogElement class TextLog : public SystemLog { public: + using Queue = SystemLogQueue; + TextLog( ContextPtr context_, const String & database_name_, @@ -47,9 +49,9 @@ public: const String & storage_def_, size_t flush_interval_milliseconds_); - static std::shared_ptr> getLogQueue(size_t flush_interval_milliseconds) + static std::shared_ptr getLogQueue(size_t flush_interval_milliseconds) { - static std::shared_ptr> queue = std::make_shared>("text_log", flush_interval_milliseconds); + static std::shared_ptr queue = std::make_shared("text_log", flush_interval_milliseconds, true); return queue; } }; From 27ca367b2cb349391946c45d7e3d22fe6d543c42 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 20 Jul 2023 10:01:07 +0200 Subject: [PATCH 1780/1997] ZooKeeperRetriesControl rethrows with original callstack. --- src/Storages/MergeTree/ZooKeeperRetries.h | 44 ++++++++++++++++++----- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/src/Storages/MergeTree/ZooKeeperRetries.h b/src/Storages/MergeTree/ZooKeeperRetries.h index e55b04c27b3..512c0800de7 100644 --- a/src/Storages/MergeTree/ZooKeeperRetries.h +++ b/src/Storages/MergeTree/ZooKeeperRetries.h @@ -72,7 +72,7 @@ public: if (!Coordination::isHardwareError(e.code)) throw; - setKeeperError(e.code, e.message()); + setKeeperError(std::current_exception(), e.code, e.message()); } catch (...) { @@ -91,16 +91,16 @@ public: } catch (const zkutil::KeeperException & e) { - setKeeperError(e.code, e.message()); + setKeeperError(std::current_exception(), e.code, e.message()); } catch (const Exception & e) { - setUserError(e.code(), e.what()); + setUserError(std::current_exception(), e.code(), e.what()); } return false; } - void setUserError(int code, std::string message) + void setUserError(std::exception_ptr exception, int code, std::string message) { if (retries_info.logger) LOG_TRACE( @@ -113,16 +113,28 @@ public: iteration_succeeded = false; user_error.code = code; user_error.message = std::move(message); + user_error.exception = exception; keeper_error = KeeperError{}; } + template + void setUserError(std::exception_ptr exception, int code, fmt::format_string fmt, Args &&... args) + { + setUserError(exception, code, fmt::format(fmt, std::forward(args)...)); + } + + void setUserError(int code, std::string message) + { + setUserError(std::make_exception_ptr(Exception::createDeprecated(message, code)), code, message); + } + template void setUserError(int code, fmt::format_string fmt, Args &&... args) { setUserError(code, fmt::format(fmt, std::forward(args)...)); } - void setKeeperError(Coordination::Error code, std::string message) + void setKeeperError(std::exception_ptr exception, Coordination::Error code, std::string message) { if (retries_info.logger) LOG_TRACE( @@ -135,9 +147,21 @@ public: iteration_succeeded = false; keeper_error.code = code; keeper_error.message = std::move(message); + keeper_error.exception = exception; user_error = UserError{}; } + template + void setKeeperError(std::exception_ptr exception, Coordination::Error code, fmt::format_string fmt, Args &&... args) + { + setKeeperError(exception, code, fmt::format(fmt, std::forward(args)...)); + } + + void setKeeperError(Coordination::Error code, std::string message) + { + setKeeperError(std::make_exception_ptr(zkutil::KeeperException(message, code)), code, message); + } + template void setKeeperError(Coordination::Error code, fmt::format_string fmt, Args &&... args) { @@ -163,12 +187,14 @@ private: using Code = Coordination::Error; Code code = Code::ZOK; std::string message; + std::exception_ptr exception; }; struct UserError { int code = ErrorCodes::OK; std::string message; + std::exception_ptr exception; }; bool canTry() @@ -232,11 +258,11 @@ private: void throwIfError() const { - if (user_error.code != ErrorCodes::OK) - throw Exception::createDeprecated(user_error.message, user_error.code); + if (user_error.exception) + std::rethrow_exception(user_error.exception); - if (keeper_error.code != KeeperError::Code::ZOK) - throw zkutil::KeeperException(keeper_error.message, keeper_error.code); + if (keeper_error.exception) + std::rethrow_exception(keeper_error.exception); } void logLastError(std::string_view header) From 6a21995b2097e747a28a23333e651208c25f0224 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 20 Jul 2023 10:42:19 +0200 Subject: [PATCH 1781/1997] Added test to analyzer_tech_debt.txt --- tests/analyzer_tech_debt.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index e0f259306aa..9a9412e55db 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -130,3 +130,4 @@ 02581_share_big_sets_between_mutation_tasks_long 02581_share_big_sets_between_multiple_mutations_tasks_long 00992_system_parts_race_condition_zookeeper_long +02818_parameterized_view_with_cte_multiple_usage From c7ab6e908adf2a088ad41e00ea2bfad5ea16526a Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 20 Jul 2023 08:55:22 +0000 Subject: [PATCH 1782/1997] Move tode to to try to make the diff simpler --- src/Common/SystemLogBase.cpp | 60 ++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index d1845a292b9..ed5ffd78a7b 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -121,6 +121,36 @@ void SystemLogQueue::push(const LogElement & element) LOG_INFO(log, "Queue is half full for system log '{}'.", demangle(typeid(*this).name())); } +template +void SystemLogBase::flush(bool force) +{ + uint64_t this_thread_requested_offset = queue->notifyFlush(force); + if (this_thread_requested_offset == uint64_t(-1)) + return; + + queue->waitFlush(this_thread_requested_offset); +} + +template +void SystemLogQueue::waitFlush(uint64_t expected_flushed_up_to) +{ + // Use an arbitrary timeout to avoid endless waiting. 60s proved to be + // too fast for our parallel functional tests, probably because they + // heavily load the disk. + const int timeout_seconds = 180; + std::unique_lock lock(mutex); + bool result = flush_event.wait_for(lock, std::chrono::seconds(timeout_seconds), [&] + { + return flushed_up_to >= expected_flushed_up_to && !is_force_prepare_tables; + }); + + if (!result) + { + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout exceeded ({} s) while flushing system log '{}'.", + toString(timeout_seconds), demangle(typeid(*this).name())); + } +} + template uint64_t SystemLogQueue::notifyFlush(bool should_prepare_tables_anyway) { @@ -145,26 +175,6 @@ uint64_t SystemLogQueue::notifyFlush(bool should_prepare_tables_anyw return this_thread_requested_offset; } -template -void SystemLogQueue::waitFlush(uint64_t expected_flushed_up_to) -{ - // Use an arbitrary timeout to avoid endless waiting. 60s proved to be - // too fast for our parallel functional tests, probably because they - // heavily load the disk. - const int timeout_seconds = 180; - std::unique_lock lock(mutex); - bool result = flush_event.wait_for(lock, std::chrono::seconds(timeout_seconds), [&] - { - return flushed_up_to >= expected_flushed_up_to && !is_force_prepare_tables; - }); - - if (!result) - { - throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout exceeded ({} s) while flushing system log '{}'.", - toString(timeout_seconds), demangle(typeid(*this).name())); - } -} - template void SystemLogQueue::confirm(uint64_t to_flush_end) { @@ -229,16 +239,6 @@ void SystemLogBase::add(const LogElement & element) queue->push(element); } -template -void SystemLogBase::flush(bool force) -{ - uint64_t this_thread_requested_offset = queue->notifyFlush(force); - if (this_thread_requested_offset == uint64_t(-1)) - return; - - queue->waitFlush(this_thread_requested_offset); -} - template void SystemLogBase::notifyFlush(bool force) { queue->notifyFlush(force); } From 067e3caa2c43ed981a7c598c45668f37b0ac32c6 Mon Sep 17 00:00:00 2001 From: chen768959 <934103231@qq.com> Date: Thu, 20 Jul 2023 18:13:19 +0800 Subject: [PATCH 1783/1997] Remove constants from description_sorted_. --- src/Processors/Transforms/FinishSortingTransform.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/Processors/Transforms/FinishSortingTransform.cpp b/src/Processors/Transforms/FinishSortingTransform.cpp index 066928446f2..744d035d0ee 100644 --- a/src/Processors/Transforms/FinishSortingTransform.cpp +++ b/src/Processors/Transforms/FinishSortingTransform.cpp @@ -38,15 +38,11 @@ FinishSortingTransform::FinishSortingTransform( /// Remove constants from description_sorted_. SortDescription description_sorted_without_constants; description_sorted_without_constants.reserve(description_sorted_.size()); - size_t num_columns = header.columns(); - ColumnNumbers map(num_columns, num_columns); for (const auto & column_description : description_sorted_) { - auto old_pos = header.getPositionByName(column_description.column_name); - auto new_pos = map[old_pos]; + auto pos = header.getPositionByName(column_description.column_name); - if (new_pos < num_columns) - { + if (!const_columns_to_remove[pos]){ description_sorted_without_constants.push_back(column_description); } } From 0ba97eeea597ad027c375cf292419dd555a9cb73 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 15 Jun 2023 08:05:47 +0800 Subject: [PATCH 1784/1997] wip: grace hash join support full & right join --- docs/en/operations/settings/settings.md | 2 + src/Interpreters/GraceHashJoin.cpp | 31 +++++++-- src/Interpreters/GraceHashJoin.h | 3 +- .../Transforms/JoiningTransform.cpp | 65 +++++++++++++++++-- src/Processors/Transforms/JoiningTransform.h | 24 ++++++- src/QueryPipeline/QueryPipelineBuilder.cpp | 2 +- ...01721_join_implicit_cast_long.reference.j2 | 40 ------------ .../01721_join_implicit_cast_long.sql.j2 | 1 - .../02273_full_sort_join.reference.j2 | 18 +---- .../0_stateless/02273_full_sort_join.sql.j2 | 4 +- ...274_full_sort_join_nodistinct.reference.j2 | 34 +--------- .../02274_full_sort_join_nodistinct.sql.j2 | 6 +- .../02275_full_sort_join_long.reference | 24 ++++++- .../02275_full_sort_join_long.sql.j2 | 9 +-- 14 files changed, 138 insertions(+), 125 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 22aeecf4335..580b51a984d 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -537,6 +537,8 @@ Possible values: The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned. + Supports `INNER/LEFT/RIGHT/FULL ALL/ANY JOIN`. + - hash [Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section. diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index edf604bc0b4..f94453293f6 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -301,8 +301,10 @@ void GraceHashJoin::initBuckets() bool GraceHashJoin::isSupported(const std::shared_ptr & table_join) { + bool is_asof = (table_join->strictness() == JoinStrictness::Asof); - return !is_asof && isInnerOrLeft(table_join->kind()) && table_join->oneDisjunct(); + auto kind = table_join->kind(); + return !is_asof && (isInner(kind) || isLeft(kind) || isRight(kind) || isFull(kind)) && table_join->oneDisjunct(); } GraceHashJoin::~GraceHashJoin() = default; @@ -322,7 +324,6 @@ bool GraceHashJoin::hasMemoryOverflow(size_t total_rows, size_t total_bytes) con /// One row can't be split, avoid loop if (total_rows < 2) return false; - bool has_overflow = !table_join->sizeLimits().softCheck(total_rows, total_bytes); if (has_overflow) @@ -494,17 +495,30 @@ bool GraceHashJoin::alwaysReturnsEmptySet() const return hash_join_is_empty; } -IBlocksStreamPtr GraceHashJoin::getNonJoinedBlocks(const Block &, const Block &, UInt64) const +/// Each bucket are handled by the following steps +/// 1. build hash_join by the right side blocks. +/// 2. join left side with the hash_join, +/// 3. read right non-joined blocks from hash_join. +/// buckets are handled one by one, each hash_join will not be release before the right non-joined blocks are emitted. +/// +/// There is a finished counter in JoiningTransform/DelayedJoinedBlocksWorkerTransform, +/// only one processor could take the non-joined blocks from right stream, and ensure all rows from +/// left stream have been emitted before this. +IBlocksStreamPtr +GraceHashJoin::getNonJoinedBlocks(const Block & left_sample_block_, const Block & result_sample_block_, UInt64 max_block_size_) const { - /// We do no support returning non joined blocks here. - /// TODO: They _should_ be reported by getDelayedBlocks instead - return nullptr; + return hash_join->getNonJoinedBlocks(left_sample_block_, result_sample_block_, max_block_size_); } class GraceHashJoin::DelayedBlocks : public IBlocksStream { public: - explicit DelayedBlocks(size_t current_bucket_, Buckets buckets_, InMemoryJoinPtr hash_join_, const Names & left_key_names_, const Names & right_key_names_) + explicit DelayedBlocks( + size_t current_bucket_, + Buckets buckets_, + InMemoryJoinPtr hash_join_, + const Names & left_key_names_, + const Names & right_key_names_) : current_bucket(current_bucket_) , buckets(std::move(buckets_)) , hash_join(std::move(hash_join_)) @@ -522,12 +536,15 @@ public: do { + // One DelayedBlocks is shared among multiple DelayedJoinedBlocksWorkerTransform. + // There is a lock inside left_reader.read() . block = left_reader.read(); if (!block) { return {}; } + // block comes from left_reader, need to join with right table to get the result. Blocks blocks = JoinCommon::scatterBlockByHash(left_key_names, block, num_buckets); block = std::move(blocks[current_idx]); diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h index bce04ee6b04..ce519892b0e 100644 --- a/src/Interpreters/GraceHashJoin.h +++ b/src/Interpreters/GraceHashJoin.h @@ -13,7 +13,6 @@ namespace DB { - class TableJoin; class HashJoin; @@ -79,7 +78,7 @@ public: bool supportTotals() const override { return false; } IBlocksStreamPtr - getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override; + getNonJoinedBlocks(const Block & left_sample_block_, const Block & result_sample_block_, UInt64 max_block_size) const override; /// Open iterator over joined blocks. /// Must be called after all @joinBlock calls. diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index 49b90d04b81..f1ceefbf229 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -189,7 +189,6 @@ void JoiningTransform::transform(Chunk & chunk) } else block = readExecute(chunk); - auto num_rows = block.rows(); chunk.setColumns(block.getColumns(), num_rows); } @@ -311,8 +310,16 @@ void FillingRightJoinSideTransform::work() } -DelayedJoinedBlocksWorkerTransform::DelayedJoinedBlocksWorkerTransform(Block output_header) - : IProcessor(InputPorts{Block()}, OutputPorts{output_header}) +DelayedJoinedBlocksWorkerTransform::DelayedJoinedBlocksWorkerTransform( + Block left_header_, + Block output_header_, + size_t max_block_size_, + JoinPtr join_) + : IProcessor(InputPorts{Block()}, OutputPorts{output_header_}) + , left_header(left_header_) + , output_header(output_header_) + , max_block_size(max_block_size_) + , join(join_) { } @@ -365,6 +372,7 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare() if (!data.chunk.hasChunkInfo()) throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have chunk info"); + task = std::dynamic_pointer_cast(data.chunk.getChunkInfo()); } else @@ -387,11 +395,24 @@ void DelayedJoinedBlocksWorkerTransform::work() if (!task) return; - Block block = task->delayed_blocks->next(); + Block block; + if (!left_delayed_stream_finished) + { + block = task->delayed_blocks->next(); + if (!block) + { + left_delayed_stream_finished = true; + block = nextNonJoinedBlock(); + } + } + else + { + block = nextNonJoinedBlock(); + } if (!block) { - task.reset(); + resetTask(); return; } @@ -400,6 +421,31 @@ void DelayedJoinedBlocksWorkerTransform::work() output_chunk.setColumns(block.getColumns(), rows); } +void DelayedJoinedBlocksWorkerTransform::resetTask() +{ + task.reset(); + left_delayed_stream_finished = false; + non_joined_delayed_stream = nullptr; +} + +Block DelayedJoinedBlocksWorkerTransform::nextNonJoinedBlock() +{ + // Before read from non-joined stream, all blocks in left file reader must have been joined. + // For example, in HashJoin, it may return invalid mismatch rows from non-joined stream before + // the all blocks in left file reader have been finished, since the used flags are incomplete. + // To make only one processor could read from non-joined stream seems be a easy way. + if (!non_joined_delayed_stream && task && task->left_delayed_stream_finish_counter->isLast()) + { + non_joined_delayed_stream = join->getNonJoinedBlocks(left_header, output_header, max_block_size); + } + + if (non_joined_delayed_stream) + { + return non_joined_delayed_stream->next(); + } + return {}; +} + DelayedJoinedBlocksTransform::DelayedJoinedBlocksTransform(size_t num_streams, JoinPtr join_) : IProcessor(InputPorts{}, OutputPorts(num_streams, Block())) , join(std::move(join_)) @@ -433,6 +479,9 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() if (finished) { + // Since have memory limit, cannot handle all buckets parallelly by different + // DelayedJoinedBlocksWorkerTransform. So send the same task to all outputs. + // Wait for all DelayedJoinedBlocksWorkerTransform be idle before getting next bucket. for (auto & output : outputs) { if (output.isFinished()) @@ -448,10 +497,14 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() if (delayed_blocks) { + // This counter is used to ensure that only the last DelayedJoinedBlocksWorkerTransform + // could read right non-joined blocks from the join. + auto left_delayed_stream_finished_counter = std::make_shared(outputs.size()); for (auto & output : outputs) { Chunk chunk; - chunk.setChunkInfo(std::make_shared(delayed_blocks)); + auto task = std::make_shared(delayed_blocks, left_delayed_stream_finished_counter); + chunk.setChunkInfo(task); output.push(std::move(chunk)); } delayed_blocks = nullptr; diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h index e7edff40c56..10b413ed4e5 100644 --- a/src/Processors/Transforms/JoiningTransform.h +++ b/src/Processors/Transforms/JoiningTransform.h @@ -116,9 +116,14 @@ class DelayedBlocksTask : public ChunkInfo public: explicit DelayedBlocksTask() : finished(true) {} - explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_) : delayed_blocks(std::move(delayed_blocks_)) {} + explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_, JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter_) + : delayed_blocks(std::move(delayed_blocks_)) + , left_delayed_stream_finish_counter(left_delayed_stream_finish_counter_) + { + } IBlocksStreamPtr delayed_blocks = nullptr; + JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter = nullptr; bool finished = false; }; @@ -147,7 +152,11 @@ private: class DelayedJoinedBlocksWorkerTransform : public IProcessor { public: - explicit DelayedJoinedBlocksWorkerTransform(Block output_header); + explicit DelayedJoinedBlocksWorkerTransform( + Block left_header_, + Block output_header_, + size_t max_block_size_, + JoinPtr join_); String getName() const override { return "DelayedJoinedBlocksWorkerTransform"; } @@ -155,10 +164,19 @@ public: void work() override; private: + Block left_header; + Block output_header; + size_t max_block_size; + JoinPtr join; DelayedBlocksTaskPtr task; Chunk output_chunk; - bool finished = false; + /// All joined and non-joined rows from left stream are emitted, only right non-joined rows are left + bool left_delayed_stream_finished = false; + IBlocksStreamPtr non_joined_delayed_stream = nullptr; + + void resetTask(); + Block nextNonJoinedBlock(); }; } diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index dedf85e409c..ba98d725532 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -491,7 +491,7 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe if (delayed_root) { // Process delayed joined blocks when all JoiningTransform are finished. - auto delayed = std::make_shared(joined_header); + auto delayed = std::make_shared(left_header, joined_header, max_block_size, join); if (delayed->getInputs().size() != 1 || delayed->getOutputs().size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform should have one input and one output"); diff --git a/tests/queries/0_stateless/01721_join_implicit_cast_long.reference.j2 b/tests/queries/0_stateless/01721_join_implicit_cast_long.reference.j2 index e9f32087439..ae43aa7195c 100644 --- a/tests/queries/0_stateless/01721_join_implicit_cast_long.reference.j2 +++ b/tests/queries/0_stateless/01721_join_implicit_cast_long.reference.j2 @@ -1,7 +1,6 @@ {% for join_algorithm in ['hash', 'partial_merge', 'auto', 'full_sorting_merge', 'grace_hash'] -%} === {{ join_algorithm }} === = full = -{% if join_algorithm not in ['grace_hash'] -%} -4 0 196 -3 0 197 -2 0 198 @@ -17,7 +16,6 @@ 8 108 \N 9 109 \N 10 110 \N -{% endif -%} = left = 1 101 201 2 102 202 @@ -30,7 +28,6 @@ 9 109 \N 10 110 \N = right = -{% if join_algorithm not in ['grace_hash'] -%} -4 0 196 -3 0 197 -2 0 198 @@ -41,7 +38,6 @@ 3 103 203 4 104 204 5 105 205 -{% endif -%} = inner = 1 101 201 2 102 202 @@ -49,7 +45,6 @@ 4 104 204 5 105 205 = full = -{% if join_algorithm not in ['grace_hash'] -%} 0 0 -4 0 0 -3 0 0 -2 @@ -65,7 +60,6 @@ 8 8 0 9 9 0 10 10 0 -{% endif -%} = left = 1 1 1 2 2 2 @@ -78,7 +72,6 @@ 9 9 0 10 10 0 = right = -{% if join_algorithm not in ['grace_hash'] -%} 0 0 -4 0 0 -3 0 0 -2 @@ -89,7 +82,6 @@ 3 3 3 4 4 4 5 5 5 -{% endif -%} = inner = 1 1 1 2 2 2 @@ -98,7 +90,6 @@ 5 5 5 = join on = = full = -{% if join_algorithm not in ['grace_hash'] -%} 0 0 -4 196 0 0 -3 197 0 0 -2 198 @@ -114,7 +105,6 @@ 8 108 0 \N 9 109 0 \N 10 110 0 \N -{% endif -%} = left = 1 101 1 201 2 102 2 202 @@ -127,7 +117,6 @@ 9 109 0 \N 10 110 0 \N = right = -{% if join_algorithm not in ['grace_hash'] -%} 0 0 -4 196 0 0 -3 197 0 0 -2 198 @@ -138,7 +127,6 @@ 3 103 3 203 4 104 4 204 5 105 5 205 -{% endif -%} = inner = 1 101 1 201 2 102 2 202 @@ -146,7 +134,6 @@ 4 104 4 204 5 105 5 205 = full = -{% if join_algorithm not in ['grace_hash'] -%} 0 0 -4 196 0 0 -3 197 0 0 -2 198 @@ -162,7 +149,6 @@ 8 108 0 \N 9 109 0 \N 10 110 0 \N -{% endif -%} = left = 1 101 1 201 2 102 2 202 @@ -175,7 +161,6 @@ 9 109 0 \N 10 110 0 \N = right = -{% if join_algorithm not in ['grace_hash'] -%} 0 0 -4 196 0 0 -3 197 0 0 -2 198 @@ -186,7 +171,6 @@ 3 103 3 203 4 104 4 204 5 105 5 205 -{% endif -%} = inner = 1 101 1 201 2 102 2 202 @@ -196,7 +180,6 @@ = agg = 1 1 -{% if join_algorithm not in ['grace_hash'] -%} 1 1 1 @@ -205,13 +188,11 @@ 1 55 1055 0 0 -10 0 990 1 55 15 1055 1015 -{% endif -%} = types = 1 1 1 1 -{% if join_algorithm not in ['grace_hash'] -%} 1 1 1 @@ -219,11 +200,9 @@ 1 1 1 -{% endif -%} {% if join_algorithm not in ['full_sorting_merge'] -%} === join use nulls === = full = -{% if join_algorithm not in ['grace_hash'] -%} -4 \N 196 -3 \N 197 -2 \N 198 @@ -239,7 +218,6 @@ 8 108 \N 9 109 \N 10 110 \N -{% endif -%} = left = 1 101 201 2 102 202 @@ -252,7 +230,6 @@ 9 109 \N 10 110 \N = right = -{% if join_algorithm not in ['grace_hash'] -%} -4 \N 196 -3 \N 197 -2 \N 198 @@ -263,7 +240,6 @@ 3 103 203 4 104 204 5 105 205 -{% endif -%} = inner = 1 101 201 2 102 202 @@ -271,7 +247,6 @@ 4 104 204 5 105 205 = full = -{% if join_algorithm not in ['grace_hash'] -%} 1 1 1 2 2 2 3 3 3 @@ -287,7 +262,6 @@ \N \N -2 \N \N -1 \N \N 0 -{% endif -%} = left = 1 1 1 2 2 2 @@ -300,7 +274,6 @@ 9 9 \N 10 10 \N = right = -{% if join_algorithm not in ['grace_hash'] -%} 1 1 1 2 2 2 3 3 3 @@ -311,7 +284,6 @@ \N \N -2 \N \N -1 \N \N 0 -{% endif -%} = inner = 1 1 1 2 2 2 @@ -320,7 +292,6 @@ 5 5 5 = join on = = full = -{% if join_algorithm not in ['grace_hash'] -%} 1 101 1 201 2 102 2 202 3 103 3 203 @@ -336,7 +307,6 @@ \N \N -2 198 \N \N -1 199 \N \N 0 200 -{% endif -%} = left = 1 101 1 201 2 102 2 202 @@ -349,7 +319,6 @@ 9 109 \N \N 10 110 \N \N = right = -{% if join_algorithm not in ['grace_hash'] -%} 1 101 1 201 2 102 2 202 3 103 3 203 @@ -360,7 +329,6 @@ \N \N -2 198 \N \N -1 199 \N \N 0 200 -{% endif -%} = inner = 1 101 1 201 2 102 2 202 @@ -368,7 +336,6 @@ 4 104 4 204 5 105 5 205 = full = -{% if join_algorithm not in ['grace_hash'] -%} 1 101 1 201 2 102 2 202 3 103 3 203 @@ -384,7 +351,6 @@ \N \N -2 198 \N \N -1 199 \N \N 0 200 -{% endif -%} = left = 1 101 1 201 2 102 2 202 @@ -397,7 +363,6 @@ 9 109 \N \N 10 110 \N \N = right = -{% if join_algorithm not in ['grace_hash'] -%} 1 101 1 201 2 102 2 202 3 103 3 203 @@ -408,7 +373,6 @@ \N \N -2 198 \N \N -1 199 \N \N 0 200 -{% endif -%} = inner = 1 101 1 201 2 102 2 202 @@ -418,7 +382,6 @@ = agg = 1 1 -{% if join_algorithm not in ['grace_hash'] -%} 1 1 1 @@ -427,13 +390,11 @@ 1 55 1055 1 55 15 1055 1015 \N \N -10 \N 990 -{% endif -%} = types = 1 1 1 1 -{% if join_algorithm not in ['grace_hash'] -%} 1 1 1 @@ -442,5 +403,4 @@ 1 1 {% endif -%} -{% endif -%} {% endfor -%} diff --git a/tests/queries/0_stateless/01721_join_implicit_cast_long.sql.j2 b/tests/queries/0_stateless/01721_join_implicit_cast_long.sql.j2 index f5321939f28..38f71f4c5ec 100644 --- a/tests/queries/0_stateless/01721_join_implicit_cast_long.sql.j2 +++ b/tests/queries/0_stateless/01721_join_implicit_cast_long.sql.j2 @@ -10,7 +10,6 @@ INSERT INTO t1 SELECT number as a, 100 + number as b FROM system.numbers LIMIT 1 INSERT INTO t2 SELECT number - 5 as a, 200 + number - 5 as b FROM system.numbers LIMIT 1, 10; {% macro is_implemented(join_algorithm) -%} -{% if join_algorithm == 'grace_hash' %} -- { serverError NOT_IMPLEMENTED } {% endif %} {% endmacro -%} {% for join_algorithm in ['hash', 'partial_merge', 'auto', 'full_sorting_merge', 'grace_hash'] -%} diff --git a/tests/queries/0_stateless/02273_full_sort_join.reference.j2 b/tests/queries/0_stateless/02273_full_sort_join.reference.j2 index 98bfd9d9b2b..0af4158e971 100644 --- a/tests/queries/0_stateless/02273_full_sort_join.reference.j2 +++ b/tests/queries/0_stateless/02273_full_sort_join.reference.j2 @@ -1,7 +1,7 @@ {% set table_size = 15 -%} {% for join_algorithm in ['default', 'full_sorting_merge', 'grace_hash'] -%} -- {{ join_algorithm }} -- -{% for block_size in range(1, table_size + 1) -%} +{% for block_size in range(1, table_size + 1, 4) -%} ALL INNER USING | bs = {{ block_size }} 4 0 0 5 0 0 @@ -50,7 +50,6 @@ ALL LEFT | bs = {{ block_size }} 14 14 val9 0 14 14 val9 0 ALL RIGHT | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 4 4 0 val10 5 5 0 val6 6 6 0 val8 @@ -64,7 +63,6 @@ ALL RIGHT | bs = {{ block_size }} 13 13 0 val9 14 14 0 val3 14 14 0 val7 -{% endif -%} ALL INNER | bs = {{ block_size }} | copmosite key 2 2 2 2 2 2 0 0 2 2 2 2 2 2 0 0 @@ -85,7 +83,6 @@ ALL LEFT | bs = {{ block_size }} | copmosite key 2 2 2 2 2 2 val12 0 2 2 2 2 2 2 val9 0 ALL RIGHT | bs = {{ block_size }} | copmosite key -{% if join_algorithm != 'grace_hash' -%} 0 \N 0 1 1 1 1 val2 0 \N 0 1 1 1 1 val7 0 \N 0 1 1 2 1 val5 @@ -99,7 +96,6 @@ ALL RIGHT | bs = {{ block_size }} | copmosite key 0 \N 0 2 2 \N 1 val9 2 2 2 2 2 2 0 val4 2 2 2 2 2 2 0 val4 -{% endif -%} ANY INNER USING | bs = {{ block_size }} 4 0 0 5 0 0 @@ -137,7 +133,6 @@ ANY LEFT | bs = {{ block_size }} 13 13 val13 0 14 14 val9 0 ANY RIGHT | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 4 4 0 val10 5 5 0 val6 6 6 0 val8 @@ -150,7 +145,6 @@ ANY RIGHT | bs = {{ block_size }} 13 13 0 val9 14 14 0 val3 14 14 0 val7 -{% endif -%} ANY INNER | bs = {{ block_size }} | copmosite key 2 2 2 2 2 2 0 0 ANY LEFT | bs = {{ block_size }} | copmosite key @@ -170,7 +164,6 @@ ANY LEFT | bs = {{ block_size }} | copmosite key 2 2 2 2 2 2 val12 0 2 2 2 2 2 2 val9 0 ANY RIGHT | bs = {{ block_size }} | copmosite key -{% if join_algorithm != 'grace_hash' -%} 0 \N 0 1 1 1 1 val2 0 \N 0 1 1 1 1 val7 0 \N 0 1 1 2 1 val5 @@ -183,7 +176,6 @@ ANY RIGHT | bs = {{ block_size }} | copmosite key 0 \N 0 2 1 \N 1 val3 0 \N 0 2 2 \N 1 val9 2 2 2 2 2 2 0 val4 -{% endif -%} {% endfor -%} ALL INNER | join_use_nulls = 1 4 4 0 0 @@ -219,7 +211,6 @@ ALL LEFT | join_use_nulls = 1 14 14 val9 0 14 14 val9 0 ALL RIGHT | join_use_nulls = 1 -{% if join_algorithm != 'grace_hash' -%} 4 4 0 val10 5 5 0 val6 6 6 0 val8 @@ -233,7 +224,6 @@ ALL RIGHT | join_use_nulls = 1 13 13 0 val9 14 14 0 val3 14 14 0 val7 -{% endif -%} ALL INNER | join_use_nulls = 1 | copmosite key 2 2 2 2 2 2 0 0 2 2 2 2 2 2 0 0 @@ -254,7 +244,6 @@ ALL LEFT | join_use_nulls = 1 | copmosite key 2 2 2 2 2 2 val12 0 2 2 2 2 2 2 val9 0 ALL RIGHT | join_use_nulls = 1 | copmosite key -{% if join_algorithm != 'grace_hash' -%} 2 2 2 2 2 2 0 val4 2 2 2 2 2 2 0 val4 \N \N \N 1 1 1 \N val2 @@ -268,7 +257,6 @@ ALL RIGHT | join_use_nulls = 1 | copmosite key \N \N \N 2 1 2 \N val8 \N \N \N 2 1 \N \N val3 \N \N \N 2 2 \N \N val9 -{% endif -%} ANY INNER | join_use_nulls = 1 4 4 0 0 5 5 0 0 @@ -296,7 +284,6 @@ ANY LEFT | join_use_nulls = 1 13 13 val13 0 14 14 val9 0 ANY RIGHT | join_use_nulls = 1 -{% if join_algorithm != 'grace_hash' -%} 4 4 0 val10 5 5 0 val6 6 6 0 val8 @@ -309,7 +296,6 @@ ANY RIGHT | join_use_nulls = 1 13 13 0 val9 14 14 0 val3 14 14 0 val7 -{% endif -%} ANY INNER | join_use_nulls = 1 | copmosite key 2 2 2 2 2 2 0 0 ANY LEFT | join_use_nulls = 1 | copmosite key @@ -329,7 +315,6 @@ ANY LEFT | join_use_nulls = 1 | copmosite key 2 2 2 2 2 2 val12 0 2 2 2 2 2 2 val9 0 ANY RIGHT | join_use_nulls = 1 | copmosite key -{% if join_algorithm != 'grace_hash' -%} 2 2 2 2 2 2 0 val4 \N \N \N 1 1 1 \N val2 \N \N \N 1 1 1 \N val7 @@ -342,5 +327,4 @@ ANY RIGHT | join_use_nulls = 1 | copmosite key \N \N \N 2 1 2 \N val8 \N \N \N 2 1 \N \N val3 \N \N \N 2 2 \N \N val9 -{% endif -%} {% endfor -%} diff --git a/tests/queries/0_stateless/02273_full_sort_join.sql.j2 b/tests/queries/0_stateless/02273_full_sort_join.sql.j2 index 43f7354017c..6b6aa53836e 100644 --- a/tests/queries/0_stateless/02273_full_sort_join.sql.j2 +++ b/tests/queries/0_stateless/02273_full_sort_join.sql.j2 @@ -28,9 +28,7 @@ INSERT INTO t2 'val' || toString(number) as s FROM numbers_mt({{ table_size - 3 }}); - {% macro is_implemented(join_algorithm) -%} -{% if join_algorithm == 'grace_hash' %} -- { serverError NOT_IMPLEMENTED } {% endif %} {% endmacro -%} {% for join_algorithm in ['default', 'full_sorting_merge', 'grace_hash'] -%} @@ -40,7 +38,7 @@ SET max_bytes_in_join = '{% if join_algorithm == 'grace_hash' %}10K{% else %}0{% SELECT '-- {{ join_algorithm }} --'; SET join_algorithm = '{{ join_algorithm }}'; -{% for block_size in range(1, table_size + 1) -%} +{% for block_size in range(1, table_size + 1, 4) -%} {% for kind in ['ALL', 'ANY'] -%} SET max_block_size = {{ block_size }}; diff --git a/tests/queries/0_stateless/02274_full_sort_join_nodistinct.reference.j2 b/tests/queries/0_stateless/02274_full_sort_join_nodistinct.reference.j2 index 2cc6c6e85d6..df968e86e8d 100644 --- a/tests/queries/0_stateless/02274_full_sort_join_nodistinct.reference.j2 +++ b/tests/queries/0_stateless/02274_full_sort_join_nodistinct.reference.j2 @@ -1,6 +1,6 @@ {% for join_algorithm in ['full_sorting_merge', 'grace_hash'] -%} --- {{ join_algorithm }} --- -{% for block_size in range(1, 11) -%} +{% for block_size in range(1, 11, 4) -%} t1 ALL INNER JOIN t2 | bs = {{ block_size }} 1 1 4 5 1 1 4 5 @@ -108,7 +108,6 @@ t1 ALL LEFT JOIN t2 | bs = {{ block_size }} 2 2 val27 5 3 3 val3 4 t1 ALL RIGHT JOIN t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 val11 1 1 4 val12 2 2 5 val22 @@ -161,7 +160,6 @@ t1 ALL RIGHT JOIN t2 | bs = {{ block_size }} 2 2 5 val28 2 2 5 val28 3 3 4 val3 -{% endif -%} t1 ANY INNER JOIN t2 | bs = {{ block_size }} 1 1 4 5 2 2 5 5 @@ -177,7 +175,6 @@ t1 ANY LEFT JOIN t2 | bs = {{ block_size }} 2 2 val27 5 3 3 val3 4 t1 ANY RIGHT JOIN t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 val11 1 1 4 val12 2 2 5 val22 @@ -188,9 +185,7 @@ t1 ANY RIGHT JOIN t2 | bs = {{ block_size }} 2 2 5 val27 2 2 5 val28 3 3 4 val3 -{% endif -%} t1 ALL FULL JOIN t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 5 1 1 4 5 2 2 5 5 @@ -243,9 +238,7 @@ t1 ALL FULL JOIN t2 | bs = {{ block_size }} 2 2 5 5 2 2 5 5 3 3 4 4 -{% endif -%} t1 ALL FULL JOIN USING t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 4 5 1 4 5 2 5 5 @@ -298,7 +291,6 @@ t1 ALL FULL JOIN USING t2 | bs = {{ block_size }} 2 5 5 2 5 5 3 4 4 -{% endif -%} t1 ALL INNER JOIN tn2 | bs = {{ block_size }} 1 1 4 5 1 1 4 5 @@ -315,7 +307,6 @@ t1 ALL LEFT JOIN tn2 | bs = {{ block_size }} 2 \N val27 0 3 3 val3 4 t1 ALL RIGHT JOIN tn2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 0 \N 0 val22 0 \N 0 val23 0 \N 0 val24 @@ -326,7 +317,6 @@ t1 ALL RIGHT JOIN tn2 | bs = {{ block_size }} 1 1 4 val11 1 1 4 val12 3 3 4 val3 -{% endif -%} t1 ANY INNER JOIN tn2 | bs = {{ block_size }} 1 1 4 5 3 3 4 4 @@ -341,7 +331,6 @@ t1 ANY LEFT JOIN tn2 | bs = {{ block_size }} 2 \N val27 0 3 3 val3 4 t1 ANY RIGHT JOIN tn2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 0 \N 0 val22 0 \N 0 val23 0 \N 0 val24 @@ -352,9 +341,7 @@ t1 ANY RIGHT JOIN tn2 | bs = {{ block_size }} 1 1 4 val11 1 1 4 val12 3 3 4 val3 -{% endif -%} t1 ALL FULL JOIN tn2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 0 \N 0 5 0 \N 0 5 0 \N 0 5 @@ -372,9 +359,8 @@ t1 ALL FULL JOIN tn2 | bs = {{ block_size }} 2 \N 5 0 2 \N 5 0 3 3 4 4 -{% endif -%} -t1 ALL FULL JOIN USING tn2 | bs = {{ block_size }} {% if join_algorithm != 'grace_hash' -%} +t1 ALL FULL JOIN USING tn2 | bs = {{ block_size }} 1 4 5 1 4 5 2 5 0 @@ -409,7 +395,6 @@ tn1 ALL LEFT JOIN t2 | bs = {{ block_size }} \N 0 val26 0 \N 0 val27 0 tn1 ALL RIGHT JOIN t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 val11 1 1 4 val12 3 3 4 val3 @@ -420,7 +405,6 @@ tn1 ALL RIGHT JOIN t2 | bs = {{ block_size }} \N 2 0 val26 \N 2 0 val27 \N 2 0 val28 -{% endif -%} tn1 ANY INNER JOIN t2 | bs = {{ block_size }} 1 1 4 5 3 3 4 4 @@ -435,7 +419,6 @@ tn1 ANY LEFT JOIN t2 | bs = {{ block_size }} \N 0 val26 0 \N 0 val27 0 tn1 ANY RIGHT JOIN t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 val11 1 1 4 val12 3 3 4 val3 @@ -446,9 +429,7 @@ tn1 ANY RIGHT JOIN t2 | bs = {{ block_size }} \N 2 0 val26 \N 2 0 val27 \N 2 0 val28 -{% endif -%} tn1 ALL FULL JOIN t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 5 1 1 4 5 3 3 4 4 @@ -466,9 +447,7 @@ tn1 ALL FULL JOIN t2 | bs = {{ block_size }} \N 2 0 5 \N 2 0 5 \N 2 0 5 -{% endif -%} tn1 ALL FULL JOIN USING t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 4 5 1 4 5 2 0 5 @@ -486,7 +465,6 @@ tn1 ALL FULL JOIN USING t2 | bs = {{ block_size }} \N 5 0 \N 5 0 \N 5 0 -{% endif -%} tn1 ALL INNER JOIN tn2 | bs = {{ block_size }} 1 1 4 5 1 1 4 5 @@ -503,7 +481,6 @@ tn1 ALL LEFT JOIN tn2 | bs = {{ block_size }} \N \N val26 0 \N \N val27 0 tn1 ALL RIGHT JOIN tn2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 val11 1 1 4 val12 3 3 4 val3 @@ -514,7 +491,6 @@ tn1 ALL RIGHT JOIN tn2 | bs = {{ block_size }} \N \N 0 val26 \N \N 0 val27 \N \N 0 val28 -{% endif -%} tn1 ANY INNER JOIN tn2 | bs = {{ block_size }} 1 1 4 5 3 3 4 4 @@ -529,7 +505,6 @@ tn1 ANY LEFT JOIN tn2 | bs = {{ block_size }} \N \N val26 0 \N \N val27 0 tn1 ANY RIGHT JOIN tn2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 val11 1 1 4 val12 3 3 4 val3 @@ -540,9 +515,7 @@ tn1 ANY RIGHT JOIN tn2 | bs = {{ block_size }} \N \N 0 val26 \N \N 0 val27 \N \N 0 val28 -{% endif -%} tn1 ALL FULL JOIN tn2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 5 1 1 4 5 3 3 4 4 @@ -560,9 +533,8 @@ tn1 ALL FULL JOIN tn2 | bs = {{ block_size }} \N \N 5 0 \N \N 5 0 \N \N 5 0 -{% endif -%} -tn1 ALL FULL JOIN USING tn2 | bs = {{ block_size }} {% if join_algorithm != 'grace_hash' -%} +tn1 ALL FULL JOIN USING tn2 | bs = {{ block_size }} 1 4 5 1 4 5 3 4 4 diff --git a/tests/queries/0_stateless/02274_full_sort_join_nodistinct.sql.j2 b/tests/queries/0_stateless/02274_full_sort_join_nodistinct.sql.j2 index 613da65421e..f8eb4b1a53e 100644 --- a/tests/queries/0_stateless/02274_full_sort_join_nodistinct.sql.j2 +++ b/tests/queries/0_stateless/02274_full_sort_join_nodistinct.sql.j2 @@ -16,7 +16,6 @@ INSERT INTO t2 VALUES (1, 'val11'), (1, 'val12'), (2, 'val22'), (2, 'val23'), (2 INSERT INTO tn2 VALUES (1, 'val11'), (1, 'val12'), (NULL, 'val22'), (NULL, 'val23'), (NULL, 'val24'), (NULL, 'val25'), (NULL, 'val26'), (NULL, 'val27'), (NULL, 'val28'), (3, 'val3'); {% macro is_implemented(join_algorithm) -%} -{% if join_algorithm == 'grace_hash' %} -- { serverError NOT_IMPLEMENTED } {% endif %} {% endmacro -%} {% for join_algorithm in ['full_sorting_merge', 'grace_hash'] -%} @@ -27,7 +26,7 @@ SET join_algorithm = '{{ join_algorithm }}'; SELECT '--- {{ join_algorithm }} ---'; -{% for block_size in range(1, 11) -%} +{% for block_size in range(1, 11, 4) -%} SET max_block_size = {{ block_size }}; {% for t1, t2 in [('t1', 't2'), ('t1', 'tn2'), ('tn1', 't2'), ('tn1', 'tn2')] -%} @@ -47,9 +46,10 @@ SELECT t1.key, t2.key, length(t1.s), t2.s FROM {{ t1 }} AS t1 {{ kind }} RIGHT J SELECT '{{ t1 }} ALL FULL JOIN {{ t2 }} | bs = {{ block_size }}'; SELECT t1.key, t2.key, length(t1.s), length(t2.s) FROM {{ t1 }} AS t1 {{ kind }} FULL JOIN {{ t2 }} AS t2 ON t1.key == t2.key ORDER BY t1.key, t2.key, length(t1.s), length(t2.s); {{ is_implemented(join_algorithm) }} +{% if join_algorithm == 'full_sorting_merge' or t2 != 'tn2' -%} SELECT '{{ t1 }} ALL FULL JOIN USING {{ t2 }} | bs = {{ block_size }}'; SELECT key, length(t1.s), length(t2.s) FROM {{ t1 }} AS t1 ALL FULL JOIN {{ t2 }} AS t2 USING (key) ORDER BY key, length(t1.s), length(t2.s); {{ is_implemented(join_algorithm) }} - +{% endif -%} {% endfor -%} {% endfor -%} SET max_bytes_in_join = 0; diff --git a/tests/queries/0_stateless/02275_full_sort_join_long.reference b/tests/queries/0_stateless/02275_full_sort_join_long.reference index 9ec06aea3e6..73482358d12 100644 --- a/tests/queries/0_stateless/02275_full_sort_join_long.reference +++ b/tests/queries/0_stateless/02275_full_sort_join_long.reference @@ -41,16 +41,34 @@ ALL INNER ALL LEFT 50195752660639 500353531835 10369589 10369589 1000342 ALL RIGHT -skipped +500353531835 684008812186 1367170 1000342 1367170 ALL INNER 500353531835 500353531835 1000342 1000342 1000342 ALL LEFT 50195752660639 500353531835 10369589 10369589 1000342 ALL RIGHT -skipped +500353531835 684008812186 1367170 1000342 1367170 ALL INNER 500353531835 500353531835 1000342 1000342 1000342 ALL LEFT 50195752660639 500353531835 10369589 10369589 1000342 ALL RIGHT -skipped +500353531835 684008812186 1367170 1000342 1367170 +ANY INNER +199622811843 199622811843 399458 399458 399458 +ANY LEFT +50010619420459 315220291655 10000000 10000000 630753 +ANY RIGHT +316611844056 500267124407 1000000 633172 1000000 +ANY INNER +199622811843 199622811843 399458 399458 399458 +ANY LEFT +50010619420459 315220291655 10000000 10000000 630753 +ANY RIGHT +316611844056 500267124407 1000000 633172 1000000 +ANY INNER +199622811843 199622811843 399458 399458 399458 +ANY LEFT +50010619420459 315220291655 10000000 10000000 630753 +ANY RIGHT +316611844056 500267124407 1000000 633172 1000000 diff --git a/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 b/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 index 7276e77dc16..621352f9c25 100644 --- a/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 +++ b/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 @@ -22,11 +22,6 @@ INSERT INTO t2 FROM numbers_mt({{ rtable_size }}) ; -{% macro is_implemented(join_algorithm) -%} -{% if join_algorithm == 'grace_hash' %} -- { serverError NOT_IMPLEMENTED } -SELECT 'skipped'; -{% endif -%} -{% endmacro -%} {% for join_algorithm in ['full_sorting_merge', 'grace_hash'] -%} @@ -40,7 +35,6 @@ SET join_algorithm = '{{ join_algorithm }}'; SET max_block_size = {{ block_size }}; -{% if not (kind == 'ANY' and join_algorithm == 'grace_hash') -%} SELECT '{{ kind }} INNER'; SELECT sum(t1.key), sum(t2.key), count(), countIf(t1.key != 0), countIf(t2.key != 0) FROM t1 @@ -58,9 +52,8 @@ SELECT '{{ kind }} RIGHT'; SELECT sum(t1.key), sum(t2.key), count(), countIf(t1.key != 0), countIf(t2.key != 0) FROM t1 {{ kind }} RIGHT JOIN t2 ON t1.key == t2.key -; {{ is_implemented(join_algorithm) }} +; -{% endif -%} {% endfor -%} {% endfor -%} From 91dc6a35e17417a44de46d76c0f0214911615244 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 19 Jul 2023 09:18:16 +0800 Subject: [PATCH 1785/1997] update --- src/Interpreters/GraceHashJoin.cpp | 1 - .../Transforms/JoiningTransform.cpp | 19 +++++-------------- src/Processors/Transforms/JoiningTransform.h | 14 ++++---------- src/QueryPipeline/QueryPipelineBuilder.cpp | 5 ++++- 4 files changed, 13 insertions(+), 26 deletions(-) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index f94453293f6..5d72cf20740 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -301,7 +301,6 @@ void GraceHashJoin::initBuckets() bool GraceHashJoin::isSupported(const std::shared_ptr & table_join) { - bool is_asof = (table_join->strictness() == JoinStrictness::Asof); auto kind = table_join->kind(); return !is_asof && (isInner(kind) || isLeft(kind) || isRight(kind) || isFull(kind)) && table_join->oneDisjunct(); diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index f1ceefbf229..5480fea27a4 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -311,15 +311,10 @@ void FillingRightJoinSideTransform::work() DelayedJoinedBlocksWorkerTransform::DelayedJoinedBlocksWorkerTransform( - Block left_header_, Block output_header_, - size_t max_block_size_, - JoinPtr join_) + NonJoinedStreamBuilder non_joined_stream_builder_) : IProcessor(InputPorts{Block()}, OutputPorts{output_header_}) - , left_header(left_header_) - , output_header(output_header_) - , max_block_size(max_block_size_) - , join(join_) + , non_joined_stream_builder(std::move(non_joined_stream_builder_)) { } @@ -396,15 +391,12 @@ void DelayedJoinedBlocksWorkerTransform::work() return; Block block; - if (!left_delayed_stream_finished) + /// All joined and non-joined rows from left stream are emitted, only right non-joined rows are left + if (!task->delayed_blocks->isFinished()) { block = task->delayed_blocks->next(); - if (!block) - { - left_delayed_stream_finished = true; block = nextNonJoinedBlock(); - } } else { @@ -424,7 +416,6 @@ void DelayedJoinedBlocksWorkerTransform::work() void DelayedJoinedBlocksWorkerTransform::resetTask() { task.reset(); - left_delayed_stream_finished = false; non_joined_delayed_stream = nullptr; } @@ -436,7 +427,7 @@ Block DelayedJoinedBlocksWorkerTransform::nextNonJoinedBlock() // To make only one processor could read from non-joined stream seems be a easy way. if (!non_joined_delayed_stream && task && task->left_delayed_stream_finish_counter->isLast()) { - non_joined_delayed_stream = join->getNonJoinedBlocks(left_header, output_header, max_block_size); + non_joined_delayed_stream = non_joined_stream_builder(); } if (non_joined_delayed_stream) diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h index 10b413ed4e5..5e7403dbbdb 100644 --- a/src/Processors/Transforms/JoiningTransform.h +++ b/src/Processors/Transforms/JoiningTransform.h @@ -152,11 +152,10 @@ private: class DelayedJoinedBlocksWorkerTransform : public IProcessor { public: + using NonJoinedStreamBuilder = std::function; explicit DelayedJoinedBlocksWorkerTransform( - Block left_header_, Block output_header_, - size_t max_block_size_, - JoinPtr join_); + NonJoinedStreamBuilder non_joined_stream_builder_); String getName() const override { return "DelayedJoinedBlocksWorkerTransform"; } @@ -164,15 +163,10 @@ public: void work() override; private: - Block left_header; - Block output_header; - size_t max_block_size; - JoinPtr join; DelayedBlocksTaskPtr task; Chunk output_chunk; - - /// All joined and non-joined rows from left stream are emitted, only right non-joined rows are left - bool left_delayed_stream_finished = false; + /// For building a block stream to access the non-joined rows. + NonJoinedStreamBuilder non_joined_stream_builder; IBlocksStreamPtr non_joined_delayed_stream = nullptr; void resetTask(); diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index ba98d725532..553b18dd57b 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -491,7 +491,10 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe if (delayed_root) { // Process delayed joined blocks when all JoiningTransform are finished. - auto delayed = std::make_shared(left_header, joined_header, max_block_size, join); + auto delayed = std::make_shared( + joined_header, + [left_header, joined_header, max_block_size, join]() + { return join->getNonJoinedBlocks(left_header, joined_header, max_block_size); }); if (delayed->getInputs().size() != 1 || delayed->getOutputs().size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform should have one input and one output"); From 7a0de384d498497fd026283a8232fcb8ed8ea5e6 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 19 Jul 2023 16:46:51 +0000 Subject: [PATCH 1786/1997] Cosmetics --- src/Parsers/IAST.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index 7a8ab36518d..f6b7f91fec8 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -196,20 +196,23 @@ public: bool always_quote_identifiers = false; IdentifierQuotingStyle identifier_quoting_style = IdentifierQuotingStyle::Backticks; bool show_secrets = true; /// Show secret parts of the AST (e.g. passwords, encryption keys). - - // Newline or whitespace. - char nl_or_ws; + char nl_or_ws; /// Newline or whitespace. FormatSettings(WriteBuffer & ostr_, bool one_line_, bool show_secrets_ = true) - : ostr(ostr_), one_line(one_line_), show_secrets(show_secrets_) + : ostr(ostr_) + , one_line(one_line_) + , show_secrets(show_secrets_) { nl_or_ws = one_line ? ' ' : '\n'; } FormatSettings(WriteBuffer & ostr_, const FormatSettings & other) - : ostr(ostr_), hilite(other.hilite), one_line(other.one_line), - always_quote_identifiers(other.always_quote_identifiers), identifier_quoting_style(other.identifier_quoting_style), - show_secrets(other.show_secrets) + : ostr(ostr_) + , hilite(other.hilite) + , one_line(other.one_line) + , always_quote_identifiers(other.always_quote_identifiers) + , identifier_quoting_style(other.identifier_quoting_style) + , show_secrets(other.show_secrets) { nl_or_ws = one_line ? ' ' : '\n'; } From 35a4fabc2d66ea28b3de3d77df4cfea4b91df870 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 19 Jul 2023 17:03:04 +0000 Subject: [PATCH 1787/1997] Make IAST::FormatSettings more regular --- src/Parsers/IAST.cpp | 4 +++- src/Parsers/IAST.h | 3 +-- src/Parsers/formatAST.cpp | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index 0138372ce89..bf4d6fc9dec 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -170,7 +170,9 @@ size_t IAST::checkDepthImpl(size_t max_depth) const String IAST::formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets) const { WriteBufferFromOwnString buf; - format({buf, one_line, show_secrets}); + FormatSettings settings(buf, one_line); + settings.show_secrets = show_secrets; + format(settings); return wipeSensitiveDataAndCutToLength(buf.str(), max_length); } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index f6b7f91fec8..13b2e5d9867 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -198,10 +198,9 @@ public: bool show_secrets = true; /// Show secret parts of the AST (e.g. passwords, encryption keys). char nl_or_ws; /// Newline or whitespace. - FormatSettings(WriteBuffer & ostr_, bool one_line_, bool show_secrets_ = true) + FormatSettings(WriteBuffer & ostr_, bool one_line_) : ostr(ostr_) , one_line(one_line_) - , show_secrets(show_secrets_) { nl_or_ws = one_line ? ' ' : '\n'; } diff --git a/src/Parsers/formatAST.cpp b/src/Parsers/formatAST.cpp index aa1afe17c75..bc7faf4bd1d 100644 --- a/src/Parsers/formatAST.cpp +++ b/src/Parsers/formatAST.cpp @@ -6,9 +6,9 @@ namespace DB void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite, bool one_line, bool show_secrets) { - IAST::FormatSettings settings(buf, one_line, show_secrets); + IAST::FormatSettings settings(buf, one_line); settings.hilite = hilite; - + settings.show_secrets = show_secrets; ast.format(settings); } From e5ec6a1523529db3d1b9d7f137997076c8c2adde Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 19 Jul 2023 17:21:03 +0000 Subject: [PATCH 1788/1997] Make IAST::FormatSettings more regular, pt. II --- src/Parsers/IAST.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index 13b2e5d9867..8e2971d0355 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -191,8 +191,8 @@ public: struct FormatSettings { WriteBuffer & ostr; - bool hilite = false; bool one_line; + bool hilite = false; bool always_quote_identifiers = false; IdentifierQuotingStyle identifier_quoting_style = IdentifierQuotingStyle::Backticks; bool show_secrets = true; /// Show secret parts of the AST (e.g. passwords, encryption keys). @@ -207,13 +207,13 @@ public: FormatSettings(WriteBuffer & ostr_, const FormatSettings & other) : ostr(ostr_) - , hilite(other.hilite) , one_line(other.one_line) + , hilite(other.hilite) , always_quote_identifiers(other.always_quote_identifiers) , identifier_quoting_style(other.identifier_quoting_style) , show_secrets(other.show_secrets) + , nl_or_ws(other.nl_or_ws) { - nl_or_ws = one_line ? ' ' : '\n'; } void writeIdentifier(const String & name) const; From 25ddcc256b04de71e84935cb60a53190c114a494 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 19 Jul 2023 17:31:53 +0000 Subject: [PATCH 1789/1997] Make IAST::FormatSettings more regular, pt. III --- src/Parsers/IAST.h | 22 ++++++++++++++----- src/Parsers/formatAST.cpp | 3 +-- src/Parsers/getInsertQuery.cpp | 4 +--- src/Parsers/tests/gtest_format_hiliting.cpp | 3 +-- src/Processors/QueryPlan/ReadFromRemote.cpp | 4 +--- .../MeiliSearch/StorageMeiliSearch.cpp | 7 +++--- src/Storages/StorageDistributed.cpp | 6 ++--- src/Storages/StorageReplicatedMergeTree.cpp | 3 +-- .../transformQueryForExternalDatabase.cpp | 7 +++--- 9 files changed, 31 insertions(+), 28 deletions(-) diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index 8e2971d0355..d217876459f 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -192,17 +192,27 @@ public: { WriteBuffer & ostr; bool one_line; - bool hilite = false; - bool always_quote_identifiers = false; - IdentifierQuotingStyle identifier_quoting_style = IdentifierQuotingStyle::Backticks; - bool show_secrets = true; /// Show secret parts of the AST (e.g. passwords, encryption keys). + bool hilite; + bool always_quote_identifiers; + IdentifierQuotingStyle identifier_quoting_style; + bool show_secrets; /// Show secret parts of the AST (e.g. passwords, encryption keys). char nl_or_ws; /// Newline or whitespace. - FormatSettings(WriteBuffer & ostr_, bool one_line_) + explicit FormatSettings( + WriteBuffer & ostr_, + bool one_line_, + bool hilite_ = false, + bool always_quote_identifiers_ = false, + IdentifierQuotingStyle identifier_quoting_style_ = IdentifierQuotingStyle::Backticks, + bool show_secrets_ = true) : ostr(ostr_) , one_line(one_line_) + , hilite(hilite_) + , always_quote_identifiers(always_quote_identifiers_) + , identifier_quoting_style(identifier_quoting_style_) + , show_secrets(show_secrets_) + , nl_or_ws(one_line ? ' ' : '\n') { - nl_or_ws = one_line ? ' ' : '\n'; } FormatSettings(WriteBuffer & ostr_, const FormatSettings & other) diff --git a/src/Parsers/formatAST.cpp b/src/Parsers/formatAST.cpp index bc7faf4bd1d..ae2c4a6fcad 100644 --- a/src/Parsers/formatAST.cpp +++ b/src/Parsers/formatAST.cpp @@ -6,8 +6,7 @@ namespace DB void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite, bool one_line, bool show_secrets) { - IAST::FormatSettings settings(buf, one_line); - settings.hilite = hilite; + IAST::FormatSettings settings(buf, one_line, hilite); settings.show_secrets = show_secrets; ast.format(settings); } diff --git a/src/Parsers/getInsertQuery.cpp b/src/Parsers/getInsertQuery.cpp index 6f52056dfe2..9d111b147bd 100644 --- a/src/Parsers/getInsertQuery.cpp +++ b/src/Parsers/getInsertQuery.cpp @@ -19,9 +19,7 @@ std::string getInsertQuery(const std::string & db_name, const std::string & tabl query.columns->children.emplace_back(std::make_shared(column.name)); WriteBufferFromOwnString buf; - IAST::FormatSettings settings(buf, true); - settings.always_quote_identifiers = true; - settings.identifier_quoting_style = quoting; + IAST::FormatSettings settings(buf, /*one_line*/ true, /*hilite*/ false, /*always_quote_identifiers*/ true, /*identifier_quoting_style*/ quoting); query.IAST::format(settings); return buf.str(); } diff --git a/src/Parsers/tests/gtest_format_hiliting.cpp b/src/Parsers/tests/gtest_format_hiliting.cpp index d0ce8f2c897..a4c3ed86182 100644 --- a/src/Parsers/tests/gtest_format_hiliting.cpp +++ b/src/Parsers/tests/gtest_format_hiliting.cpp @@ -51,8 +51,7 @@ void compare(const String & expected, const String & query) ASTPtr ast = parseQuery(parser, query, 0, 0); WriteBufferFromOwnString write_buffer; - IAST::FormatSettings settings(write_buffer, true); - settings.hilite = true; + IAST::FormatSettings settings(write_buffer, true, true); ast->format(settings); ASSERT_PRED2(HiliteComparator::are_equal_with_hilites_removed, expected, write_buffer.str()); diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index ed740e3e242..5cc13f45df4 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -86,9 +86,7 @@ static String formattedAST(const ASTPtr & ast) return {}; WriteBufferFromOwnString buf; - IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true); - ast_format_settings.hilite = false; - ast_format_settings.always_quote_identifiers = true; + IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true, /*hilite*/ false, /*always_quote_identifiers*/ true); ast->format(ast_format_settings); return buf.str(); } diff --git a/src/Storages/MeiliSearch/StorageMeiliSearch.cpp b/src/Storages/MeiliSearch/StorageMeiliSearch.cpp index 5d77fc080a4..aa8b437263a 100644 --- a/src/Storages/MeiliSearch/StorageMeiliSearch.cpp +++ b/src/Storages/MeiliSearch/StorageMeiliSearch.cpp @@ -62,9 +62,10 @@ ColumnsDescription StorageMeiliSearch::getTableStructureFromData(const MeiliSear String convertASTtoStr(ASTPtr ptr) { WriteBufferFromOwnString out; - IAST::FormatSettings settings(out, true); - settings.identifier_quoting_style = IdentifierQuotingStyle::BackticksMySQL; - settings.always_quote_identifiers = IdentifierQuotingStyle::BackticksMySQL != IdentifierQuotingStyle::None; + IAST::FormatSettings settings( + out, /*one_line*/ true, /*hilite*/ false, + /*always_quote_identifiers*/ IdentifierQuotingStyle::BackticksMySQL != IdentifierQuotingStyle::None, + /*identifier_quoting_style*/ IdentifierQuotingStyle::BackticksMySQL); ptr->format(settings); return out.str(); } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index c46192ab43b..e02d7f32b98 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -906,8 +906,7 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu String new_query_str; { WriteBufferFromOwnString buf; - IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true); - ast_format_settings.always_quote_identifiers = true; + IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true, /*hilite*/ false, /*always_quote_identifiers_=*/ true); new_query->IAST::format(ast_format_settings); new_query_str = buf.str(); } @@ -968,8 +967,7 @@ std::optional StorageDistributed::distributedWriteFromClusterStor String new_query_str; { WriteBufferFromOwnString buf; - IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true); - ast_format_settings.always_quote_identifiers = true; + IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true, /*hilite*/ false, /*always_quote_identifiers*/ true); new_query->IAST::format(ast_format_settings); new_query_str = buf.str(); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 5f20c497cb8..52f478d7729 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5074,8 +5074,7 @@ std::optional StorageReplicatedMergeTree::distributedWriteFromClu String query_str; { WriteBufferFromOwnString buf; - IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true); - ast_format_settings.always_quote_identifiers = true; + IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true, /*hilite*/ false, /*always_quote_identifiers*/ true); query.IAST::format(ast_format_settings); query_str = buf.str(); } diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index 548b55749d7..375510e62bf 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -334,9 +334,10 @@ String transformQueryForExternalDatabaseImpl( dropAliases(select_ptr); WriteBufferFromOwnString out; - IAST::FormatSettings settings(out, true); - settings.identifier_quoting_style = identifier_quoting_style; - settings.always_quote_identifiers = identifier_quoting_style != IdentifierQuotingStyle::None; + IAST::FormatSettings settings( + out, /*one_line*/ true, /*hilite*/ false, + /*always_quote_identifiers*/ identifier_quoting_style != IdentifierQuotingStyle::None, + /*identifier_quoting_style*/ identifier_quoting_style); select->format(settings); From bd761c365a95e97f1a92638f145353d54a4f2db5 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 19 Jul 2023 18:02:09 +0000 Subject: [PATCH 1790/1997] Make serializeAST() more regular --- src/Disks/getOrCreateDiskFromAST.cpp | 2 +- src/Interpreters/Cache/QueryCache.h | 2 +- src/Interpreters/ThreadStatusExt.cpp | 2 +- src/Parsers/formatAST.cpp | 4 ++-- src/Parsers/formatAST.h | 9 +++++---- src/Parsers/tests/gtest_Parser.cpp | 10 ++++++++-- src/Parsers/tests/gtest_dictionary_parser.cpp | 10 +++++----- .../Transforms/CheckConstraintsTransform.cpp | 4 ++-- src/Storages/ConstraintsDescription.cpp | 2 +- src/Storages/IndicesDescription.cpp | 2 +- src/Storages/ProjectionsDescription.cpp | 2 +- 11 files changed, 28 insertions(+), 21 deletions(-) diff --git a/src/Disks/getOrCreateDiskFromAST.cpp b/src/Disks/getOrCreateDiskFromAST.cpp index 81d5b7372f3..a9a0e972bd1 100644 --- a/src/Disks/getOrCreateDiskFromAST.cpp +++ b/src/Disks/getOrCreateDiskFromAST.cpp @@ -32,7 +32,7 @@ namespace /// We need a unique name for a created custom disk, but it needs to be the same /// after table is reattached or server is restarted, so take a hash of the disk /// configuration serialized ast as a disk name suffix. - auto disk_setting_string = serializeAST(function, true); + auto disk_setting_string = serializeAST(function); disk_name = DiskSelector::TMP_INTERNAL_DISK_PREFIX + toString(sipHash128(disk_setting_string.data(), disk_setting_string.size())); } diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index a67adcc86c9..c24b09c8e46 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -60,7 +60,7 @@ public: /// The SELECT query as plain string, displayed in SYSTEM.QUERY_CACHE. Stored explicitly, i.e. not constructed from the AST, for the /// sole reason that QueryCache-related SETTINGS are pruned from the AST (see removeQueryCacheSettings()) which will look ugly in - /// the SYSTEM.QUERY_CACHE. + /// SYSTEM.QUERY_CACHE. const String query_string; /// Ctor to construct a Key for writing into query cache. diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 5acfe500b1d..398bea26b87 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -518,7 +518,7 @@ void ThreadStatus::logToQueryThreadLog(QueryThreadLog & thread_log, const String static String getCleanQueryAst(const ASTPtr q, ContextPtr context) { - String res = serializeAST(*q, true); + String res = serializeAST(*q); if (auto * masker = SensitiveDataMasker::getInstance()) masker->wipeSensitiveData(res); diff --git a/src/Parsers/formatAST.cpp b/src/Parsers/formatAST.cpp index ae2c4a6fcad..9315279eae6 100644 --- a/src/Parsers/formatAST.cpp +++ b/src/Parsers/formatAST.cpp @@ -11,10 +11,10 @@ void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite, bool one_line, ast.format(settings); } -String serializeAST(const IAST & ast, bool one_line) +String serializeAST(const IAST & ast) { WriteBufferFromOwnString buf; - formatAST(ast, buf, false, one_line); + formatAST(ast, buf, false, true); return buf.str(); } diff --git a/src/Parsers/formatAST.h b/src/Parsers/formatAST.h index ebd284fc18a..dd72a59b4a2 100644 --- a/src/Parsers/formatAST.h +++ b/src/Parsers/formatAST.h @@ -8,12 +8,13 @@ namespace DB class WriteBuffer; -/** Takes a syntax tree and turns it back into text. - * In case of INSERT query, the data will be missing. - */ +/// Takes a syntax tree and turns it into text. +/// Intended for pretty-printing (multi-line + hiliting). +/// In case of INSERT query, the data will be missing. void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite = true, bool one_line = false, bool show_secrets = true); -String serializeAST(const IAST & ast, bool one_line = true); +/// Like formatAST() but intended for serialization w/o pretty-printing (single-line, no hiliting). +String serializeAST(const IAST & ast); inline WriteBuffer & operator<<(WriteBuffer & buf, const IAST & ast) { diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 2795de64b1d..a53de155355 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -64,7 +64,10 @@ TEST_P(ParserTest, parseQuery) if (std::string("CREATE USER or ALTER USER query") != parser->getName() && std::string("ATTACH access entity query") != parser->getName()) { - EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); + WriteBufferFromOwnString buf; + formatAST(*ast->clone(), buf, false, false); + String formatted_ast = buf.str(); + EXPECT_EQ(expected_ast, formatted_ast); } else { @@ -75,7 +78,10 @@ TEST_P(ParserTest, parseQuery) } else { - EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); + WriteBufferFromOwnString buf; + formatAST(*ast->clone(), buf, false, false); + String formatted_ast = buf.str(); + EXPECT_TRUE(std::regex_match(formatted_ast, std::regex(expected_ast))); } } } diff --git a/src/Parsers/tests/gtest_dictionary_parser.cpp b/src/Parsers/tests/gtest_dictionary_parser.cpp index 22484727ea2..c0a975f7a38 100644 --- a/src/Parsers/tests/gtest_dictionary_parser.cpp +++ b/src/Parsers/tests/gtest_dictionary_parser.cpp @@ -155,7 +155,7 @@ TEST(ParserDictionaryDDL, AttributesWithMultipleProperties) EXPECT_EQ(attributes_children[0]->as()->expression, nullptr); EXPECT_EQ(attributes_children[1]->as()->expression, nullptr); - EXPECT_EQ(serializeAST(*attributes_children[2]->as()->expression, true), "(rand() % 100) * 77"); + EXPECT_EQ(serializeAST(*attributes_children[2]->as()->expression), "(rand() % 100) * 77"); EXPECT_EQ(attributes_children[0]->as()->hierarchical, false); EXPECT_EQ(attributes_children[1]->as()->hierarchical, true); @@ -201,7 +201,7 @@ TEST(ParserDictionaryDDL, CustomAttributePropertiesOrder) EXPECT_EQ(attributes_children[0]->as()->expression, nullptr); EXPECT_EQ(attributes_children[1]->as()->expression, nullptr); - EXPECT_EQ(serializeAST(*attributes_children[2]->as()->expression, true), "(rand() % 100) * 77"); + EXPECT_EQ(serializeAST(*attributes_children[2]->as()->expression), "(rand() % 100) * 77"); EXPECT_EQ(attributes_children[0]->as()->hierarchical, false); EXPECT_EQ(attributes_children[1]->as()->hierarchical, true); @@ -288,7 +288,7 @@ TEST(ParserDictionaryDDL, Formatting) ParserCreateDictionaryQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); ASTCreateQuery * create = ast->as(); - auto str = serializeAST(*create, true); + auto str = serializeAST(*create); EXPECT_EQ(str, "CREATE DICTIONARY test.dict5 (`key_column1` UInt64 DEFAULT 1 HIERARCHICAL INJECTIVE, `key_column2` String DEFAULT '', `second_column` UInt8 EXPRESSION intDiv(50, rand() % 1000), `third_column` UInt8) PRIMARY KEY key_column1, key_column2 SOURCE(MYSQL(HOST 'localhost' PORT 9000 USER 'default' REPLICA (HOST '127.0.0.1' PRIORITY 1) PASSWORD '')) LIFETIME(MIN 1 MAX 10) LAYOUT(CACHE(SIZE_IN_CELLS 50)) RANGE(MIN second_column MAX third_column)"); } @@ -303,7 +303,7 @@ TEST(ParserDictionaryDDL, ParseDropQuery) EXPECT_TRUE(drop1->is_dictionary); EXPECT_EQ(drop1->getDatabase(), "test"); EXPECT_EQ(drop1->getTable(), "dict1"); - auto str1 = serializeAST(*drop1, true); + auto str1 = serializeAST(*drop1); EXPECT_EQ(input1, str1); String input2 = "DROP DICTIONARY IF EXISTS dict2"; @@ -314,7 +314,7 @@ TEST(ParserDictionaryDDL, ParseDropQuery) EXPECT_TRUE(drop2->is_dictionary); EXPECT_EQ(drop2->getDatabase(), ""); EXPECT_EQ(drop2->getTable(), "dict2"); - auto str2 = serializeAST(*drop2, true); + auto str2 = serializeAST(*drop2); EXPECT_EQ(input2, str2); } diff --git a/src/Processors/Transforms/CheckConstraintsTransform.cpp b/src/Processors/Transforms/CheckConstraintsTransform.cpp index 88f02a3926f..3a6595ea4fb 100644 --- a/src/Processors/Transforms/CheckConstraintsTransform.cpp +++ b/src/Processors/Transforms/CheckConstraintsTransform.cpp @@ -73,7 +73,7 @@ void CheckConstraintsTransform::onConsume(Chunk chunk) "Constraint expression returns nullable column that contains null value", backQuote(constraint_ptr->name), table_id.getNameForLogs(), - serializeAST(*(constraint_ptr->expr), true)); + serializeAST(*(constraint_ptr->expr))); result_column = nested_column; } @@ -116,7 +116,7 @@ void CheckConstraintsTransform::onConsume(Chunk chunk) backQuote(constraint_ptr->name), table_id.getNameForLogs(), rows_written + row_idx + 1, - serializeAST(*(constraint_ptr->expr), true), + serializeAST(*(constraint_ptr->expr)), column_values_msg); } } diff --git a/src/Storages/ConstraintsDescription.cpp b/src/Storages/ConstraintsDescription.cpp index db37ac7c4c3..249ed8be428 100644 --- a/src/Storages/ConstraintsDescription.cpp +++ b/src/Storages/ConstraintsDescription.cpp @@ -35,7 +35,7 @@ String ConstraintsDescription::toString() const for (const auto & constraint : constraints) list.children.push_back(constraint); - return serializeAST(list, true); + return serializeAST(list); } ConstraintsDescription ConstraintsDescription::parse(const String & str) diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp index c7aeaf8e4ef..06518a52c61 100644 --- a/src/Storages/IndicesDescription.cpp +++ b/src/Storages/IndicesDescription.cpp @@ -151,7 +151,7 @@ String IndicesDescription::toString() const for (const auto & index : *this) list.children.push_back(index.definition_ast); - return serializeAST(list, true); + return serializeAST(list); } diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 73fb279d51c..aecf0ac6d00 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -324,7 +324,7 @@ String ProjectionsDescription::toString() const for (const auto & projection : projections) list.children.push_back(projection.definition_ast); - return serializeAST(list, true); + return serializeAST(list); } ProjectionsDescription ProjectionsDescription::parse(const String & str, const ColumnsDescription & columns, ContextPtr query_context) From a24bf14450bdb1dad881330ca168566bf7e1f82f Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 20 Jul 2023 10:44:44 +0000 Subject: [PATCH 1791/1997] Use correct ZXID --- src/Coordination/KeeperStateMachine.cpp | 2 +- utils/keeper-data-dumper/main.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 5c84f23fc60..a89b608aa69 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -390,7 +390,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) /// maybe some logs were preprocessed with log idx larger than the snapshot idx /// we have to apply them to the new storage - storage->applyUncommittedState(*snapshot_deserialization_result.storage, s.get_last_log_idx()); + storage->applyUncommittedState(*snapshot_deserialization_result.storage, snapshot_deserialization_result.storage->getZXID()); storage = std::move(snapshot_deserialization_result.storage); latest_snapshot_meta = snapshot_deserialization_result.snapshot_meta; cluster_config = snapshot_deserialization_result.cluster_config; diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 51a09b676dc..39d9200f913 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -65,7 +65,7 @@ int main(int argc, char *argv[]) CoordinationSettingsPtr settings = std::make_shared(); KeeperContextPtr keeper_context = std::make_shared(true); keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2])); - keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1])); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", argv[1])); auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); state_machine->init(); From 27921a5d8f4218a92dafb6fdc145bf3891710e3a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 20 Jul 2023 10:48:39 +0000 Subject: [PATCH 1792/1997] Docs: Add another reason for integer promotion rules in ClickHouse --- docs/en/sql-reference/functions/arithmetic-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md index 64fae0e82f0..054c59d5778 100644 --- a/docs/en/sql-reference/functions/arithmetic-functions.md +++ b/docs/en/sql-reference/functions/arithmetic-functions.md @@ -6,7 +6,7 @@ sidebar_label: Arithmetic # Arithmetic Functions -The result type of all arithmetic functions is the smallest type which can represent all possible results. Size promotion happens for integers up to 32 bit, e.g. `UInt8 + UInt16 = UInt32`. If one of the inters has 64 or more bits, the result is of the same type as the bigger of the input integers, e.g. `UInt16 + UInt128 = UInt128`. While this introduces a risk of overflows around the value range boundary, it ensures that calculations are performed quickly using the maximum native integer width of 64 bit. +The result type of all arithmetic functions is the smallest type which can represent all possible results. Size promotion happens for integers up to 32 bit, e.g. `UInt8 + UInt16 = UInt32`. If one of the inters has 64 or more bits, the result is of the same type as the bigger of the input integers, e.g. `UInt16 + UInt128 = UInt128`. While this introduces a risk of overflows around the value range boundary, it ensures that calculations are performed quickly using the maximum native integer width of 64 bit. Also, this behavior guarantees compatibility with many other databases which provide 64 bit integers (BIGINT) as the biggest integer type. The result of addition or multiplication of two integers is unsigned unless one of the integers is signed. From e74acda53ec3a7f8a536eb56e4a939935d10f8e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Thu, 20 Jul 2023 12:54:42 +0200 Subject: [PATCH 1793/1997] PRQL integration (#50686) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Added prql-lib * Add PRQL parser * Extend stateless tests * Add unit tests for `ParserPRQL` --------- Co-authored-by: Ubuntu Co-authored-by: Ubuntu Co-authored-by: Александр Нам <47687537+seshWCS@users.noreply.github.com> --- rust/CMakeLists.txt | 1 + rust/prql/CMakeLists.txt | 3 + rust/prql/Cargo.lock | 569 ++++++++++++++++++ rust/prql/Cargo.toml | 20 + rust/prql/include/prql.h | 18 + rust/prql/src/lib.rs | 56 ++ src/Client/ClientBase.cpp | 4 + src/Common/config.h.in | 1 + src/Core/SettingsEnums.cpp | 4 +- src/Core/SettingsEnums.h | 1 + src/Interpreters/executeQuery.cpp | 7 +- src/Parsers/CMakeLists.txt | 4 + src/Parsers/PRQL/ParserPRQLQuery.cpp | 86 +++ src/Parsers/PRQL/ParserPRQLQuery.h | 27 + src/Parsers/tests/gtest_Parser.cpp | 20 + src/configure_config.cmake | 3 + .../queries/0_stateless/02766_prql.reference | 19 + tests/queries/0_stateless/02766_prql.sh | 58 ++ 18 files changed, 899 insertions(+), 2 deletions(-) create mode 100644 rust/prql/CMakeLists.txt create mode 100644 rust/prql/Cargo.lock create mode 100644 rust/prql/Cargo.toml create mode 100644 rust/prql/include/prql.h create mode 100644 rust/prql/src/lib.rs create mode 100644 src/Parsers/PRQL/ParserPRQLQuery.cpp create mode 100644 src/Parsers/PRQL/ParserPRQLQuery.h create mode 100644 tests/queries/0_stateless/02766_prql.reference create mode 100755 tests/queries/0_stateless/02766_prql.sh diff --git a/rust/CMakeLists.txt b/rust/CMakeLists.txt index 6700ead9786..41451fe0a1e 100644 --- a/rust/CMakeLists.txt +++ b/rust/CMakeLists.txt @@ -88,3 +88,4 @@ endfunction() add_rust_subdirectory (BLAKE3) add_rust_subdirectory (skim) +add_rust_subdirectory (prql) diff --git a/rust/prql/CMakeLists.txt b/rust/prql/CMakeLists.txt new file mode 100644 index 00000000000..65109d19a81 --- /dev/null +++ b/rust/prql/CMakeLists.txt @@ -0,0 +1,3 @@ +clickhouse_import_crate(MANIFEST_PATH Cargo.toml) +target_include_directories(_ch_rust_prql INTERFACE include) +add_library(ch_rust::prql ALIAS _ch_rust_prql) diff --git a/rust/prql/Cargo.lock b/rust/prql/Cargo.lock new file mode 100644 index 00000000000..da94e4ca852 --- /dev/null +++ b/rust/prql/Cargo.lock @@ -0,0 +1,569 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "_ch_rust_prql" +version = "0.1.0" +dependencies = [ + "prql-compiler", + "serde_json", +] + +[[package]] +name = "addr2line" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + +[[package]] +name = "aho-corasick" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" +dependencies = [ + "backtrace", +] + +[[package]] +name = "ariadne" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "367fd0ad87307588d087544707bc5fbf4805ded96c7db922b70d368fa1cb5702" +dependencies = [ + "unicode-width", + "yansi", +] + +[[package]] +name = "backtrace" +version = "0.3.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "cc" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chumsky" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d" +dependencies = [ + "hashbrown 0.12.3", + "stacker", +] + +[[package]] +name = "csv" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "either" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" + +[[package]] +name = "enum-as-inner" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "equivalent" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88bffebc5d80432c9b140ee17875ff173a8ab62faad5b257da912bd2f6c1c0a1" + +[[package]] +name = "getrandom" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.27.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e" + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] + +[[package]] +name = "hashbrown" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "indexmap" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" +dependencies = [ + "equivalent", + "hashbrown 0.14.0", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.147" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" + +[[package]] +name = "log" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "object" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "proc-macro2" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "prql-compiler" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c99b52154002ac7f286dd2293c2f8d4e30526c1d396b14deef5ada1deef3c9ff" +dependencies = [ + "anyhow", + "ariadne", + "chumsky", + "csv", + "enum-as-inner", + "itertools", + "lazy_static", + "log", + "once_cell", + "regex", + "semver", + "serde", + "serde_json", + "serde_yaml", + "sqlformat", + "sqlparser", + "strum", + "strum_macros", +] + +[[package]] +name = "psm" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" +dependencies = [ + "cc", +] + +[[package]] +name = "quote" +version = "1.0.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89089e897c013b3deb627116ae56a6955a72b8bed395c9526af31c9fe528b484" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa250384981ea14565685dea16a9ccc4d1c541a13f82b9c168572264d1df8c56" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846" + +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + +[[package]] +name = "rustversion" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc31bd9b61a32c31f9650d18add92aa83a49ba979c143eefd27fe7177b05bd5f" + +[[package]] +name = "ryu" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9" + +[[package]] +name = "semver" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" +dependencies = [ + "serde", +] + +[[package]] +name = "serde" +version = "1.0.166" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d01b7404f9d441d3ad40e6a636a7782c377d2abdbe4fa2440e2edcc2f4f10db8" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.166" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dd83d6dde2b6b2d466e14d9d1acce8816dedee94f735eac6395808b3483c6d6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.23", +] + +[[package]] +name = "serde_json" +version = "1.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f1e14e89be7aa4c4b78bdbdc9eb5bf8517829a600ae8eaa39a6e1d960b5185c" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_yaml" +version = "0.9.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "452e67b9c20c37fa79df53201dc03839651086ed9bbe92b3ca585ca9fdaa7d85" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + +[[package]] +name = "sqlformat" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e" +dependencies = [ + "itertools", + "nom", + "unicode_categories", +] + +[[package]] +name = "sqlparser" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a" +dependencies = [ + "log", + "serde", +] + +[[package]] +name = "stacker" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "winapi", +] + +[[package]] +name = "strum" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 1.0.109", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73" + +[[package]] +name = "unicode-width" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" + +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + +[[package]] +name = "unsafe-libyaml" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1865806a559042e51ab5414598446a5871b561d21b6764f2eabb0dd481d880a6" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "yansi" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" diff --git a/rust/prql/Cargo.toml b/rust/prql/Cargo.toml new file mode 100644 index 00000000000..314d1b52391 --- /dev/null +++ b/rust/prql/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "_ch_rust_prql" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +prql-compiler = "0.8.1" +serde_json = "1.0" + +[lib] +crate-type = ["staticlib"] + +[profile.release] +debug = true + +[profile.release-thinlto] +inherits = "release" +lto = true diff --git a/rust/prql/include/prql.h b/rust/prql/include/prql.h new file mode 100644 index 00000000000..29158d7f30d --- /dev/null +++ b/rust/prql/include/prql.h @@ -0,0 +1,18 @@ +#pragma once + +#include + +extern "C" { + +/// Converts a PRQL query to an SQL query. +/// @param query is a pointer to the beginning of the PRQL query. +/// @param size is the size of the PRQL query. +/// @param out is a pointer to a uint8_t pointer which will be set to the beginning of the null terminated SQL query or the error message. +/// @param out_size is the size of the string pointed by `out`. +/// @returns zero in case of success, non-zero in case of failure. +int64_t prql_to_sql(const uint8_t * query, uint64_t size, uint8_t ** out, uint64_t * out_size); + +/// Frees the passed in pointer which's memory was allocated by Rust allocators previously. +void prql_free_pointer(uint8_t * ptr_to_free); + +} // extern "C" diff --git a/rust/prql/src/lib.rs b/rust/prql/src/lib.rs new file mode 100644 index 00000000000..fb71d62d527 --- /dev/null +++ b/rust/prql/src/lib.rs @@ -0,0 +1,56 @@ +use prql_compiler::sql::Dialect; +use prql_compiler::{Options, Target}; +use std::ffi::{c_char, CString}; +use std::slice; + +fn set_output(result: String, out: *mut *mut u8, out_size: *mut u64) { + assert!(!out_size.is_null()); + let out_size_ptr = unsafe { &mut *out_size }; + *out_size_ptr = (result.len() + 1).try_into().unwrap(); + + assert!(!out.is_null()); + let out_ptr = unsafe { &mut *out }; + *out_ptr = CString::new(result).unwrap().into_raw() as *mut u8; +} + +#[no_mangle] +pub unsafe extern "C" fn prql_to_sql( + query: *const u8, + size: u64, + out: *mut *mut u8, + out_size: *mut u64, +) -> i64 { + let query_vec = unsafe { slice::from_raw_parts(query, size.try_into().unwrap()) }.to_vec(); + let maybe_prql_query = String::from_utf8(query_vec); + if maybe_prql_query.is_err() { + set_output( + String::from("The PRQL query must be UTF-8 encoded!"), + out, + out_size, + ); + return 1; + } + let prql_query = maybe_prql_query.unwrap(); + let opts = &Options { + format: true, + target: Target::Sql(Some(Dialect::ClickHouse)), + signature_comment: false, + color: false, + }; + let (is_err, res) = match prql_compiler::compile(&prql_query, &opts) { + Ok(sql_str) => (false, sql_str), + Err(err) => (true, err.to_string()), + }; + + set_output(res, out, out_size); + + match is_err { + true => 1, + false => 0, + } +} + +#[no_mangle] +pub unsafe extern "C" fn prql_free_pointer(ptr_to_free: *mut u8) { + std::mem::drop(CString::from_raw(ptr_to_free as *mut c_char)); +} diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 509dfe2e232..f5390037e6b 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -72,6 +73,7 @@ #include #include #include +#include #include #include "config_version.h" @@ -338,6 +340,8 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (dialect == Dialect::kusto) parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); + else if (dialect == Dialect::prql) + parser = std::make_unique(max_length, settings.max_parser_depth); else parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 1cb13d3ae3e..a2c18fc330f 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -54,6 +54,7 @@ #cmakedefine01 USE_BORINGSSL #cmakedefine01 USE_BLAKE3 #cmakedefine01 USE_SKIM +#cmakedefine01 USE_PRQL #cmakedefine01 USE_OPENSSL_INTREE #cmakedefine01 USE_ULID #cmakedefine01 FIU_ENABLE diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 1e2cbce9309..86400954e2f 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -138,7 +138,9 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation, ErrorCodes::BAD_ARGUMENTS, IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS, {{"clickhouse", Dialect::clickhouse}, - {"kusto", Dialect::kusto}}) + {"kusto", Dialect::kusto}, + {"kusto", Dialect::kusto}, + {"prql", Dialect::prql}}) // FIXME: do not add 'kusto_auto' to the list. Maybe remove it from code completely? IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGUMENTS, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index c2783447441..c61afbd2bbf 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -207,6 +207,7 @@ enum class Dialect clickhouse, kusto, kusto_auto, + prql, }; DECLARE_SETTING_ENUM(Dialect) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 4b76d20f31d..66bc0bcb757 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -75,6 +75,7 @@ #include #include +#include namespace ProfileEvents { @@ -702,10 +703,14 @@ static std::tuple executeQueryImpl( /// TODO: parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); } + else if (settings.dialect == Dialect::prql && !internal) + { + ParserPRQLQuery parser(max_query_size, settings.max_parser_depth); + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } else { ParserQuery parser(end, settings.allow_settings_after_format_in_insert); - /// TODO: parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); } diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index d5cf2bd4784..d74137f8a91 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -4,8 +4,12 @@ add_headers_and_sources(clickhouse_parsers .) add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) add_headers_and_sources(clickhouse_parsers ./Kusto) +add_headers_and_sources(clickhouse_parsers ./PRQL) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) +if (TARGET ch_rust::prql) + target_link_libraries(clickhouse_parsers PRIVATE ch_rust::prql) +endif () if (USE_DEBUG_HELPERS) # CMake generator expression will do insane quoting when it encounters special character like quotes, spaces, etc. diff --git a/src/Parsers/PRQL/ParserPRQLQuery.cpp b/src/Parsers/PRQL/ParserPRQLQuery.cpp new file mode 100644 index 00000000000..b3733b727dc --- /dev/null +++ b/src/Parsers/PRQL/ParserPRQLQuery.cpp @@ -0,0 +1,86 @@ +#include +#include + +#include "Parsers/Lexer.h" +#include "config.h" + +#if USE_PRQL +# include +#endif + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; + extern const int SUPPORT_IS_DISABLED; +} + +bool ParserPRQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserSetQuery set_p; + + if (set_p.parse(pos, node, expected)) + return true; + +#if !USE_PRQL + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, "PRQL is not available. Rust code or PRQL itself may be disabled. Use another dialect!"); +#else + const auto * begin = pos->begin; + + // The same parsers are used in the client and the server, so the parser have to detect the end of a single query in case of multiquery queries + while (!pos->isEnd() && pos->type != TokenType::Semicolon) + ++pos; + + const auto * end = pos->begin; + + uint8_t * sql_query_ptr{nullptr}; + uint64_t sql_query_size{0}; + + const auto res + = prql_to_sql(reinterpret_cast(begin), static_cast(end - begin), &sql_query_ptr, &sql_query_size); + + SCOPE_EXIT({ prql_free_pointer(sql_query_ptr); }); + + const auto * sql_query_char_ptr = reinterpret_cast(sql_query_ptr); + const auto * const original_sql_query_ptr = sql_query_char_ptr; + + if (res != 0) + { + throw Exception(ErrorCodes::SYNTAX_ERROR, "PRQL syntax error: '{}'", sql_query_char_ptr); + } + chassert(sql_query_size > 0); + + ParserQuery query_p(end, false); + String error_message; + node = tryParseQuery( + query_p, + sql_query_char_ptr, + sql_query_char_ptr + sql_query_size - 1, + error_message, + false, + "", + false, + max_query_size, + max_parser_depth); + + if (!node) + throw Exception( + ErrorCodes::SYNTAX_ERROR, + "Error while parsing the SQL query generated from PRQL query :'{}'.\nPRQL Query:'{}'\nSQL query: '{}'", + error_message, + std::string_view{begin, end}, + std::string_view(original_sql_query_ptr, original_sql_query_ptr + sql_query_size)); + + + return true; +#endif +} +} diff --git a/src/Parsers/PRQL/ParserPRQLQuery.h b/src/Parsers/PRQL/ParserPRQLQuery.h new file mode 100644 index 00000000000..4fc450df6b6 --- /dev/null +++ b/src/Parsers/PRQL/ParserPRQLQuery.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +namespace DB +{ +// Even when PRQL is disabled, it is not possible to exclude this parser because changing the dialect via `SET dialect = '...'` queries should succeed. +// Another solution would be disabling setting the dialect to PRQL, but it requires a lot of finicky conditional compiling around the Dialect setting enum. +// Therefore the decision, for now, is to use this parser even when PRQL is disabled to enable users to switch to another dialect. +class ParserPRQLQuery final : public IParserBase +{ +private: + // These fields are not used when PRQL is disabled at build time. + [[maybe_unused]] size_t max_query_size; + [[maybe_unused]] size_t max_parser_depth; + +public: + ParserPRQLQuery(size_t max_query_size_, size_t max_parser_depth_) : max_query_size{max_query_size_}, max_parser_depth{max_parser_depth_} + { + } + + const char * getName() const override { return "PRQL Statement"; } + +protected: + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; +} diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 2795de64b1d..ef4ef05e35e 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -476,3 +477,22 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" } }))); + +static constexpr size_t kDummyMaxQuerySize = 256 * 1024; +static constexpr size_t kDummyMaxParserDepth = 256; + +INSTANTIATE_TEST_SUITE_P( + ParserPRQL, + ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared(kDummyMaxQuerySize, kDummyMaxParserDepth)), + ::testing::ValuesIn(std::initializer_list{ + { + "from albums\ngroup [author_id] (\n aggregate [first_pushlied = min published]\n)\njoin a=author side:left [==author_id]\njoin p=purchases side:right [==author_id]\ngroup [a.id, p.purchase_id] (\n aggregate [avg_sell = min first_pushlied]\n)", + "WITH table_1 AS\n (\n SELECT\n MIN(published) AS _expr_0,\n author_id\n FROM albums\n GROUP BY author_id\n )\nSELECT\n a.id,\n p.purchase_id,\n MIN(table_0._expr_0) AS avg_sell\nFROM table_1 AS table_0\nLEFT JOIN author AS a ON table_0.author_id = a.author_id\nRIGHT JOIN purchases AS p ON table_0.author_id = p.author_id\nGROUP BY\n a.id,\n p.purchase_id", + }, + { + "from matches\nfilter start_date > @2023-05-30 # Some comment here\nderive [\n some_derived_value_1 = a + (b ?? 0), # And there\n some_derived_value_2 = c + some_derived_value\n]\nfilter some_derived_value_2 > 0\ngroup [country, city] (\n aggregate [\n average some_derived_value_2,\n aggr = max some_derived_value_2,\n ]\n)\nderive place = f\"{city} in {country}\"\nderive country_code = s\"LEFT(country, 2)\"\nsort [aggr, -country]\ntake 1..20", + "WITH\n table_3 AS\n (\n SELECT\n country,\n city,\n c + some_derived_value AS _expr_1\n FROM matches\n WHERE start_date > toDate('2023-05-30')\n ),\n table_1 AS\n (\n SELECT\n country,\n city,\n AVG(_expr_1) AS _expr_0,\n MAX(_expr_1) AS aggr\n FROM table_3 AS table_2\n WHERE _expr_1 > 0\n GROUP BY\n country,\n city\n )\nSELECT\n country,\n city,\n _expr_0,\n aggr,\n CONCAT(city, ' in ', country) AS place,\n LEFT(country, 2) AS country_code\nFROM table_1 AS table_0\nORDER BY\n aggr ASC,\n country DESC\nLIMIT 20", + }, + }))); diff --git a/src/configure_config.cmake b/src/configure_config.cmake index c11a19b36ea..ae6305705c2 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -25,6 +25,9 @@ endif() if (TARGET ch_rust::skim) set(USE_SKIM 1) endif() +if (TARGET ch_rust::prql) + set(USE_PRQL 1) +endif() if (TARGET OpenSSL::SSL) set(USE_SSL 1) endif() diff --git a/tests/queries/0_stateless/02766_prql.reference b/tests/queries/0_stateless/02766_prql.reference new file mode 100644 index 00000000000..90e0b26cee6 --- /dev/null +++ b/tests/queries/0_stateless/02766_prql.reference @@ -0,0 +1,19 @@ +101 Hello, ClickHouse! 2 He +101 Granules are the smallest chunks of data read 2 Gr +102 Insert a lot of rows per batch 2 In +102 Sort your data based on your commonly-used queries 2 So +103 This is an awesome message 2 Th +103 42 +102 4.132209897041321 +--- +101 Hello, ClickHouse! 2019-01-01 00:00:00.000 -1 +101 Granules are the smallest chunks of data read 2019-05-01 00:00:00.000 3.14159 +102 Insert a lot of rows per batch 2019-02-01 00:00:00.000 1.41421 +102 Sort your data based on your commonly-used queries 2019-03-01 00:00:00.000 2.718 +103 This is an awesome message 2019-04-01 00:00:00.000 42 +--- +101 Hello, ClickHouse! 2019-01-01 00:00:00.000 -1 +101 Granules are the smallest chunks of data read 2019-05-01 00:00:00.000 3.14159 +102 Insert a lot of rows per batch 2019-02-01 00:00:00.000 1.41421 +102 Sort your data based on your commonly-used queries 2019-03-01 00:00:00.000 2.718 +103 This is an awesome message 2019-04-01 00:00:00.000 42 diff --git a/tests/queries/0_stateless/02766_prql.sh b/tests/queries/0_stateless/02766_prql.sh new file mode 100755 index 00000000000..f8bbd72af4e --- /dev/null +++ b/tests/queries/0_stateless/02766_prql.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-random-settings + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -n -q " +CREATE TEMPORARY TABLE IF NOT EXISTS aboba +( + user_id UInt32, + message String, + creation_date DateTime64, + metric Float32 +) +ENGINE = MergeTree +ORDER BY user_id; + +INSERT INTO aboba (user_id, message, creation_date, metric) VALUES (101, 'Hello, ClickHouse!', toDateTime('2019-01-01 00:00:00', 3, 'Europe/Amsterdam'), -1.0), (102, 'Insert a lot of rows per batch', toDateTime('2019-02-01 00:00:00', 3, 'Europe/Amsterdam'), 1.41421 ), (102, 'Sort your data based on your commonly-used queries', toDateTime('2019-03-01 00:00:00', 3, 'Europe/Amsterdam'), 2.718), (101, 'Granules are the smallest chunks of data read', toDateTime('2019-05-01 00:00:00', 3, 'Europe/Amsterdam'), 3.14159), (103, 'This is an awesome message', toDateTime('2019-04-01 00:00:00', 3, 'Europe/Amsterdam'), 42); + +SET dialect = 'prql'; + +from aboba +derive [ + a = 2, + b = s\"LEFT(message, 2)\" +] +select [ user_id, message, a, b ]; + +from aboba +filter user_id > 101 +group user_id ( + aggregate [ + metrics = sum metric + ] +); + +SET dialect = 'clickhouse'; + +SELECT '---'; +SELECT + user_id, + message, + toTimeZone(creation_date, 'Europe/Amsterdam') as creation_date, + metric +FROM aboba; +SELECT '---'; + +SET dialect = 'prql'; + +from aboba +select [ user_id, message, metric ] +derive creation_date = s\"toTimeZone(creation_date, 'Europe/Amsterdam')\" +select [ user_id, message, creation_date, metric]; + +from s\"SELECT * FROM system.users\" | select non_existent_column; # {serverError UNKNOWN_IDENTIFIER} +from non_existent_table; # {serverError UNKNOWN_TABLE} +" \ No newline at end of file From 84f6a7336c2d7ac547ad7030c389d4961f4ab8e4 Mon Sep 17 00:00:00 2001 From: chen768959 <934103231@qq.com> Date: Thu, 20 Jul 2023 19:03:42 +0800 Subject: [PATCH 1794/1997] Prevent going beyond the index of const_columns_to_remove. --- src/Processors/Transforms/FinishSortingTransform.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Processors/Transforms/FinishSortingTransform.cpp b/src/Processors/Transforms/FinishSortingTransform.cpp index 744d035d0ee..baf898481ab 100644 --- a/src/Processors/Transforms/FinishSortingTransform.cpp +++ b/src/Processors/Transforms/FinishSortingTransform.cpp @@ -38,11 +38,12 @@ FinishSortingTransform::FinishSortingTransform( /// Remove constants from description_sorted_. SortDescription description_sorted_without_constants; description_sorted_without_constants.reserve(description_sorted_.size()); + size_t num_columns = const_columns_to_remove.size(); for (const auto & column_description : description_sorted_) { auto pos = header.getPositionByName(column_description.column_name); - if (!const_columns_to_remove[pos]){ + if (pos < num_columns && !const_columns_to_remove[pos]){ description_sorted_without_constants.push_back(column_description); } } From cbcd48979cab1a3a4a0f0d5baaf8077164887cf5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 20 Jul 2023 13:04:43 +0200 Subject: [PATCH 1795/1997] Fix race one more time --- programs/server/Server.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d1c1a1d200f..774c3f223a6 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -739,9 +739,10 @@ try [&]() -> std::vector { std::vector metrics; - metrics.reserve(servers_to_start_before_tables.size() + servers.size()); std::lock_guard lock(servers_lock); + metrics.reserve(servers_to_start_before_tables.size() + servers.size()); + for (const auto & server : servers_to_start_before_tables) metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); From f2d184cf1b002d18be152880ee2d82e57fed3b26 Mon Sep 17 00:00:00 2001 From: chen768959 <934103231@qq.com> Date: Thu, 20 Jul 2023 19:11:08 +0800 Subject: [PATCH 1796/1997] Consistent style for if statements. --- src/Processors/Transforms/FinishSortingTransform.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Processors/Transforms/FinishSortingTransform.cpp b/src/Processors/Transforms/FinishSortingTransform.cpp index baf898481ab..63a9c3924a2 100644 --- a/src/Processors/Transforms/FinishSortingTransform.cpp +++ b/src/Processors/Transforms/FinishSortingTransform.cpp @@ -43,9 +43,8 @@ FinishSortingTransform::FinishSortingTransform( { auto pos = header.getPositionByName(column_description.column_name); - if (pos < num_columns && !const_columns_to_remove[pos]){ + if (pos < num_columns && !const_columns_to_remove[pos]) description_sorted_without_constants.push_back(column_description); - } } /// The target description is modified in SortingTransform constructor. /// To avoid doing the same actions with description_sorted just copy it from prefix of target description. From db1b53d1bb8ed6aa71f47010c81a7f3ebb0ae65d Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 20 Jul 2023 14:18:48 +0300 Subject: [PATCH 1797/1997] Update 01606_git_import.sh --- tests/queries/0_stateless/01606_git_import.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01606_git_import.sh b/tests/queries/0_stateless/01606_git_import.sh index c9aa2c7d82e..48558d79f93 100755 --- a/tests/queries/0_stateless/01606_git_import.sh +++ b/tests/queries/0_stateless/01606_git_import.sh @@ -13,7 +13,7 @@ cd $CLICKHOUSE_TMP || exit # Protection for network errors for _ in {1..10}; do rm -rf ./clickhouse-odbc - git clone --quiet https://github.com/ClickHouse/clickhouse-odbc.git && pushd clickhouse-odbc > /dev/null && git checkout --quiet 5d84ec591c53cbb272593f024230a052690fdf69 && break + git clone --quiet https://github.com/ClickHouse/clickhouse-odbc.git && pushd clickhouse-odbc 2> /dev/null > /dev/null && git checkout --quiet 5d84ec591c53cbb272593f024230a052690fdf69 && break sleep 1 done From 2b29e3dc83d9ed6747acb4a249c9e1aca9616f21 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 20 Jul 2023 14:22:22 +0300 Subject: [PATCH 1798/1997] Update MergeTreeBackgroundExecutor.cpp (#52261) --- src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp index 6eab4337162..e497a799274 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp @@ -269,7 +269,7 @@ void MergeTreeBackgroundExecutor::routine(TaskRuntimeDataPtr item) try { ALLOW_ALLOCATIONS_IN_SCOPE; - item->task->getQueryId(); + query_id = item->task->getQueryId(); need_execute_again = item->task->executeStep(); } catch (...) From f53ff5d4f2228b7016af5742ea1ae8f70ef772df Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 20 Jul 2023 14:51:01 +0300 Subject: [PATCH 1799/1997] more fair queue for drop table sync (#52276) --- src/Interpreters/DatabaseCatalog.cpp | 17 ++++++++++++++++- src/Interpreters/DatabaseCatalog.h | 1 + 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 23a67f4bc2f..0e2e30eefee 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -697,6 +697,7 @@ DatabaseCatalog::DatabaseCatalog(ContextMutablePtr global_context_) , loading_dependencies{"LoadingDeps"} , view_dependencies{"ViewDeps"} , log(&Poco::Logger::get("DatabaseCatalog")) + , first_async_drop_in_queue(tables_marked_dropped.end()) { } @@ -959,9 +960,17 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr std::lock_guard lock(tables_marked_dropped_mutex); if (ignore_delay) - tables_marked_dropped.push_front({table_id, table, dropped_metadata_path, drop_time}); + { + /// Insert it before first_async_drop_in_queue, so sync drop queries will have priority over async ones, + /// but the queue will remain fair for multiple sync drop queries. + tables_marked_dropped.emplace(first_async_drop_in_queue, TableMarkedAsDropped{table_id, table, dropped_metadata_path, drop_time}); + } else + { tables_marked_dropped.push_back({table_id, table, dropped_metadata_path, drop_time + drop_delay_sec}); + if (first_async_drop_in_queue == tables_marked_dropped.end()) + --first_async_drop_in_queue; + } tables_marked_dropped_ids.insert(table_id.uuid); CurrentMetrics::add(CurrentMetrics::TablesToDropQueueSize, 1); @@ -1012,6 +1021,8 @@ void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id) /// This maybe throw exception. renameNoReplace(latest_metadata_dropped_path, table_metadata_path); + if (first_async_drop_in_queue == it_dropped_table) + ++first_async_drop_in_queue; tables_marked_dropped.erase(it_dropped_table); [[maybe_unused]] auto removed = tables_marked_dropped_ids.erase(dropped_table.table_id.uuid); assert(removed); @@ -1074,6 +1085,8 @@ void DatabaseCatalog::dropTableDataTask() table = std::move(*it); LOG_INFO(log, "Have {} tables in drop queue ({} of them are in use), will try drop {}", tables_marked_dropped.size(), tables_in_use_count, table.table_id.getNameForLogs()); + if (first_async_drop_in_queue == it) + ++first_async_drop_in_queue; tables_marked_dropped.erase(it); /// Schedule the task as soon as possible, while there are suitable tables to drop. schedule_after_ms = 0; @@ -1110,6 +1123,8 @@ void DatabaseCatalog::dropTableDataTask() table.drop_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) + drop_error_cooldown_sec; std::lock_guard lock(tables_marked_dropped_mutex); tables_marked_dropped.emplace_back(std::move(table)); + if (first_async_drop_in_queue == tables_marked_dropped.end()) + --first_async_drop_in_queue; /// If list of dropped tables was empty, schedule a task to retry deletion. if (tables_marked_dropped.size() == 1) { diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index d502505027f..805d7786569 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -323,6 +323,7 @@ private: mutable std::mutex ddl_guards_mutex; TablesMarkedAsDropped tables_marked_dropped TSA_GUARDED_BY(tables_marked_dropped_mutex); + TablesMarkedAsDropped::iterator first_async_drop_in_queue TSA_GUARDED_BY(tables_marked_dropped_mutex); std::unordered_set tables_marked_dropped_ids TSA_GUARDED_BY(tables_marked_dropped_mutex); mutable std::mutex tables_marked_dropped_mutex; From d16d4449432999cdee3393b1f47b4a7d7c5314a6 Mon Sep 17 00:00:00 2001 From: Val Doroshchuk Date: Thu, 20 Jul 2023 12:24:52 +0200 Subject: [PATCH 1800/1997] MaterializedMySQL: Add support of double quoted comments --- src/Parsers/ExpressionElementParsers.cpp | 33 +++++++++++++++++ src/Parsers/ExpressionElementParsers.h | 15 ++++++++ src/Parsers/MySQL/ASTDeclareColumn.cpp | 2 +- .../materialized_with_ddl.py | 35 +++++++++++++++++++ .../test_materialized_mysql_database/test.py | 6 ++++ 5 files changed, 90 insertions(+), 1 deletion(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 3a7e8790bb4..0149526da79 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1900,6 +1900,39 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } +bool ParserMySQLComment::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (pos->type != TokenType::QuotedIdentifier && pos->type != TokenType::StringLiteral) + return false; + String s; + ReadBufferFromMemory in(pos->begin, pos->size()); + try + { + if (pos->type == TokenType::StringLiteral) + readQuotedStringWithSQLStyle(s, in); + else + readDoubleQuotedStringWithSQLStyle(s, in); + } + catch (const Exception &) + { + expected.add(pos, "string literal or double quoted string"); + return false; + } + + if (in.count() != pos->size()) + { + expected.add(pos, "string literal or double quoted string"); + return false; + } + + auto literal = std::make_shared(s); + literal->begin = pos; + literal->end = ++pos; + node = literal; + return true; +} + + bool ParserMySQLGlobalVariable::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (pos->type != TokenType::DoubleAt) diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index cc88faf2653..f33f2d99f71 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -367,6 +367,21 @@ protected: }; +/** MySQL comment: + * CREATE TABLE t ( + * i INT PRIMARY KEY, + * first_name VARCHAR(255) COMMENT 'FIRST_NAME', + * last_name VARCHAR(255) COMMENT "LAST_NAME" + * ) + */ +class ParserMySQLComment : public IParserBase +{ +protected: + const char * getName() const override { return "MySQL comment parser"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + /** MySQL-style global variable: @@var */ class ParserMySQLGlobalVariable : public IParserBase diff --git a/src/Parsers/MySQL/ASTDeclareColumn.cpp b/src/Parsers/MySQL/ASTDeclareColumn.cpp index e585dcb670c..e5f2b7870e2 100644 --- a/src/Parsers/MySQL/ASTDeclareColumn.cpp +++ b/src/Parsers/MySQL/ASTDeclareColumn.cpp @@ -50,7 +50,7 @@ static inline bool parseColumnDeclareOptions(IParser::Pos & pos, ASTPtr & node, OptionDescribe("PRIMARY KEY", "primary_key", std::make_unique()), OptionDescribe("UNIQUE", "unique_key", std::make_unique()), OptionDescribe("KEY", "primary_key", std::make_unique()), - OptionDescribe("COMMENT", "comment", std::make_unique()), + OptionDescribe("COMMENT", "comment", std::make_unique()), OptionDescribe("CHARACTER SET", "charset_name", std::make_unique()), OptionDescribe("CHARSET", "charset", std::make_unique()), OptionDescribe("COLLATE", "collate", std::make_unique()), diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py index 8cf9e67bf63..f7a930ec00b 100644 --- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py @@ -1617,6 +1617,41 @@ def materialized_with_column_comments_test(clickhouse_node, mysql_node, service_ mysql_node.query("DROP DATABASE materialized_with_column_comments_test") +def double_quoted_comment(clickhouse_node, mysql_node, service_name): + db = "comment_db" + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"CREATE DATABASE {db}") + mysql_node.query( + f'CREATE TABLE {db}.t1 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT "ID")' + ) + mysql_node.query( + f"CREATE TABLE {db}.t2 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT 'ID')" + ) + clickhouse_node.query( + f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')" + ) + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db} FORMAT TSV", + "t1\nt2\n", + ) + + # incremental + mysql_node.query( + f'CREATE TABLE {db}.t3 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT "ID")' + ) + mysql_node.query( + f"CREATE TABLE {db}.t4 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT 'ID')" + ) + check_query( + clickhouse_node, f"SHOW TABLES FROM {db} FORMAT TSV", "t1\nt2\nt3\nt4\n" + ) + + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + + def materialized_with_enum8_test(clickhouse_node, mysql_node, service_name): mysql_node.query("DROP DATABASE IF EXISTS materialized_with_enum8_test") clickhouse_node.query("DROP DATABASE IF EXISTS materialized_with_enum8_test") diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index 21316d1a474..0166f7d1d33 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -416,6 +416,12 @@ def test_materialized_with_column_comments( ) +def test_double_quoted_comment(started_cluster, started_mysql_8_0, clickhouse_node): + materialized_with_ddl.double_quoted_comment( + clickhouse_node, started_mysql_8_0, "mysql80" + ) + + def test_materialized_with_enum( started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node ): From fe934d3059936cd203952cfe5881ff7243001ae9 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 20 Jul 2023 12:38:41 +0000 Subject: [PATCH 1801/1997] Make better --- docs/en/engines/table-engines/special/url.md | 2 +- docs/en/operations/settings/settings.md | 6 +++--- docs/en/sql-reference/table-functions/url.md | 4 ++-- src/Core/Settings.h | 2 +- src/Storages/StorageURL.cpp | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index 9f2bf177c96..f556df0a088 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -106,4 +106,4 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da ## Storage Settings {#storage-settings} - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. -- [decode_and_encode_path_in_url](/docs/en/operations/settings/settings.md#decode_and_encode_path_in_url) - enables or disables decoding/encoding path in uri. Enabled by default. +- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) -allows to disable decoding/encoding path in uri. Disabled by default. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index db5d1a2f5d9..d138b07d3ae 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3466,11 +3466,11 @@ Possible values: Default value: `0`. -## decode_and_encode_path_in_url {#decode_and_encode_path_in_url} +## disable_url_encoding {#disable_url_encoding} -Enables or disables decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables. +Allows to disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables. -Enabled by default. +Disabled by default. ## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously} diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md index 96f36f03949..677ed011960 100644 --- a/docs/en/sql-reference/table-functions/url.md +++ b/docs/en/sql-reference/table-functions/url.md @@ -56,8 +56,8 @@ Character `|` inside patterns is used to specify failover addresses. They are it ## Storage Settings {#storage-settings} - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. -- [decode_and_encode_path_in_url](/docs/en/operations/settings/settings.md#decode_and_encode_path_in_url) - enables or disables decoding/encoding path in uri. Enabled by default. +- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) - allows to disable decoding/encoding path in uri. Disabled by default. -- **See Also** +**See Also** - [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index ffa72d841be..5dc40494115 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -621,7 +621,7 @@ class IColumn; M(Bool, engine_file_allow_create_multiple_files, false, "Enables or disables creating a new file on each insert in file engine tables if format has suffix.", 0) \ M(Bool, engine_file_skip_empty_files, false, "Allows to skip empty files in file table engine", 0) \ M(Bool, engine_url_skip_empty_files, false, "Allows to skip empty files in url table engine", 0) \ - M(Bool, decode_and_encode_path_in_url, true, "Enables or disables decoding/encoding path in uri in URL table engine", 0) \ + M(Bool, disable_url_encoding, false, " Allows to disable decoding/encoding path in uri in URL table engine", 0) \ M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \ M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \ M(Bool, database_replicated_enforce_synchronous_settings, false, "Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings.", 0) \ diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 4cfefbc5527..0c915f54cff 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -389,7 +389,7 @@ std::pair> StorageURLSource: for (; option != end; ++option) { bool skip_url_not_found_error = glob_url && read_settings.http_skip_not_found_url_for_globs && option == std::prev(end); - auto request_uri = Poco::URI(*option, context->getSettingsRef().decode_and_encode_path_in_url); + auto request_uri = Poco::URI(*option, context->getSettingsRef().disable_url_encoding); for (const auto & [param, value] : params) request_uri.addQueryParameter(param, value); From f6a44f8eedce98bd50ceee72e5fdc4da1a82a43a Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 20 Jul 2023 12:40:41 +0000 Subject: [PATCH 1802/1997] Better --- base/poco/Foundation/include/Poco/URI.h | 6 +++-- base/poco/Foundation/src/URI.cpp | 34 +++++++++++++------------ 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/base/poco/Foundation/include/Poco/URI.h b/base/poco/Foundation/include/Poco/URI.h index 5e6e7efd938..f4505147ced 100644 --- a/base/poco/Foundation/include/Poco/URI.h +++ b/base/poco/Foundation/include/Poco/URI.h @@ -57,7 +57,7 @@ public: URI(); /// Creates an empty URI. - explicit URI(const std::string & uri, bool decode_and_encode_path = true); + explicit URI(const std::string & uri, bool disable_url_encoding = true); /// Parses an URI from the given string. Throws a /// SyntaxException if the uri is not valid. @@ -351,6 +351,8 @@ protected: private: void encodePath(std::string & encodedStr) const; + void decodePath(const std::string & encodedStr); + std::string _scheme; std::string _userInfo; @@ -360,7 +362,7 @@ private: std::string _query; std::string _fragment; - bool _decode_and_encode_path = true; + bool _disable_url_encoding = true; }; diff --git a/base/poco/Foundation/src/URI.cpp b/base/poco/Foundation/src/URI.cpp index 9bad1b39a87..3354c69d188 100644 --- a/base/poco/Foundation/src/URI.cpp +++ b/base/poco/Foundation/src/URI.cpp @@ -37,7 +37,7 @@ URI::URI(): URI::URI(const std::string& uri, bool decode_and_encode_path): - _port(0), _decode_and_encode_path(decode_and_encode_path) + _port(0), _disable_url_encoding(decode_and_encode_path) { parse(uri); } @@ -108,7 +108,7 @@ URI::URI(const URI& uri): _path(uri._path), _query(uri._query), _fragment(uri._fragment), - _decode_and_encode_path(uri._decode_and_encode_path) + _disable_url_encoding(uri._disable_url_encoding) { } @@ -121,7 +121,7 @@ URI::URI(const URI& baseURI, const std::string& relativeURI): _path(baseURI._path), _query(baseURI._query), _fragment(baseURI._fragment), - _decode_and_encode_path(baseURI._decode_and_encode_path) + _disable_url_encoding(baseURI._disable_url_encoding) { resolve(relativeURI); } @@ -153,7 +153,7 @@ URI& URI::operator = (const URI& uri) _path = uri._path; _query = uri._query; _fragment = uri._fragment; - _decode_and_encode_path = uri._decode_and_encode_path; + _disable_url_encoding = uri._disable_url_encoding; } return *this; } @@ -184,7 +184,7 @@ void URI::swap(URI& uri) std::swap(_path, uri._path); std::swap(_query, uri._query); std::swap(_fragment, uri._fragment); - std::swap(_decode_and_encode_path, uri._decode_and_encode_path); + std::swap(_disable_url_encoding, uri._disable_url_encoding); } @@ -317,10 +317,7 @@ void URI::setAuthority(const std::string& authority) void URI::setPath(const std::string& path) { _path.clear(); - if (_decode_and_encode_path) - decode(path, _path); - else - _path = path; + decodePath(path); } @@ -690,10 +687,18 @@ void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpa void URI::encodePath(std::string & encodedStr) const { - if (_decode_and_encode_path) - encode(_path, RESERVED_PATH, encodedStr); - else + if (_disable_url_encoding) encodedStr = _path; + else + encode(_path, RESERVED_PATH, encodedStr); +} + +void URI::decodePath(const std::string & encodedStr) +{ + if (_disable_url_encoding) + _path = encodedStr; + else + decode(encodedStr, _path); } bool URI::isWellKnownPort() const @@ -834,10 +839,7 @@ void URI::parsePath(std::string::const_iterator& it, const std::string::const_it { std::string path; while (it != end && *it != '?' && *it != '#') path += *it++; - if (_decode_and_encode_path) - decode(path, _path); - else - _path = path; + decodePath(path); } From 3c9e46b557a882085fdcdce5d74ad12329457db3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 20 Jul 2023 16:19:12 +0300 Subject: [PATCH 1803/1997] Update ci-slack-bot.py --- utils/ci-slack-bot/ci-slack-bot.py | 43 ++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/utils/ci-slack-bot/ci-slack-bot.py b/utils/ci-slack-bot/ci-slack-bot.py index 6e694b4fdbd..0fb12e89ce9 100755 --- a/utils/ci-slack-bot/ci-slack-bot.py +++ b/utils/ci-slack-bot/ci-slack-bot.py @@ -26,10 +26,11 @@ else: DRY_RUN_MARK = "" -MAX_FAILURES_DEFAULT = 40 +MAX_FAILURES_DEFAULT = 30 SLACK_URL_DEFAULT = DRY_RUN_MARK -FLAKY_ALERT_PROBABILITY = 0.20 +FLAKY_ALERT_PROBABILITY = 0.50 +REPORT_NO_FAILURES_PROBABILITY = 0.99 MAX_TESTS_TO_REPORT = 4 @@ -89,6 +90,22 @@ WHERE 1 AND check_name ILIKE check_name_pattern """ +# Returns percentage of failed checks (once per day, at noon) +FAILED_CHECKS_PERCENTAGE_QUERY = """ +SELECT if(toHour(now('Europe/Amsterdam')) = 12, v, 0) +FROM +( + SELECT + countDistinctIf((commit_sha, check_name), (test_status LIKE 'F%') AND (check_status != 'success')) + / countDistinct((commit_sha, check_name)) AS v + FROM checks + WHERE 1 + AND (pull_request_number = 0) + AND (test_status != 'SKIPPED') + AND (check_start_time > (now() - toIntervalDay(1))) +) +""" + # It shows all recent failures of the specified test (helps to find when it started) ALL_RECENT_FAILURES_QUERY = """ WITH @@ -202,9 +219,9 @@ def get_too_many_failures_message_impl(failures_count): curr_failures = int(failures_count[0][0]) prev_failures = int(failures_count[0][1]) if curr_failures == 0 and prev_failures != 0: - return ( - "Looks like CI is completely broken: there are *no failures* at all... 0_o" - ) + if random.random() < REPORT_NO_FAILURES_PROBABILITY: + return None + return "Wow, there are *no failures* at all... 0_o" if curr_failures < MAX_FAILURES: return None if prev_failures < MAX_FAILURES: @@ -227,6 +244,19 @@ def get_too_many_failures_message(failures_count): return msg +def get_failed_checks_percentage_message(percentage): + p = percentage[0][0] * 100 + + # Always report more than 1% of failed checks + # For <= 1%: higher percentage of failures == higher probability + if p <= random.random(): + return None + + msg = ":alert: " if p > 1 else "Only " if p < 0.5 else "" + msg += "*{0:.2f}%* of all checks in master have failed yesterday".format(p) + return msg + + def split_slack_message(long_message): lines = long_message.split("\n") messages = [] @@ -280,6 +310,9 @@ def query_and_alert_if_needed(query, get_message_func): def check_and_alert(): query_and_alert_if_needed(NEW_BROKEN_TESTS_QUERY, get_new_broken_tests_message) query_and_alert_if_needed(COUNT_FAILURES_QUERY, get_too_many_failures_message) + query_and_alert_if_needed( + FAILED_CHECKS_PERCENTAGE_QUERY, get_failed_checks_percentage_message + ) def lambda_handler(event, context): From 8649c84461f3c27bdf9fcab4db1884b21603dc2e Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 20 Jul 2023 13:28:37 +0000 Subject: [PATCH 1804/1997] Remove conditional linking --- utils/config-processor/CMakeLists.txt | 6 +----- utils/keeper-bench/CMakeLists.txt | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/utils/config-processor/CMakeLists.txt b/utils/config-processor/CMakeLists.txt index 4394083a1c3..80c3535ef4e 100644 --- a/utils/config-processor/CMakeLists.txt +++ b/utils/config-processor/CMakeLists.txt @@ -1,6 +1,2 @@ clickhouse_add_executable (config-processor config-processor.cpp) -if (ENABLE_SSL) - target_link_libraries(config-processor PRIVATE dbms) -else () - target_link_libraries(config-processor PRIVATE clickhouse_common_config_no_zookeeper_log) -endif () +target_link_libraries(config-processor PRIVATE dbms) diff --git a/utils/keeper-bench/CMakeLists.txt b/utils/keeper-bench/CMakeLists.txt index e8daec9e164..5514c34f4ef 100644 --- a/utils/keeper-bench/CMakeLists.txt +++ b/utils/keeper-bench/CMakeLists.txt @@ -4,9 +4,5 @@ if (NOT TARGET ch_contrib::rapidjson) endif () clickhouse_add_executable(keeper-bench Generator.cpp Runner.cpp Stats.cpp main.cpp) -if (ENABLE_SSL) - target_link_libraries(keeper-bench PRIVATE dbms) -else () - target_link_libraries(keeper-bench PRIVATE clickhouse_common_config_no_zookeeper_log) -endif () +target_link_libraries(keeper-bench PRIVATE dbms) target_link_libraries(keeper-bench PRIVATE ch_contrib::rapidjson) From f997adfe27e1bd3bb772857fb11fae962c373b9e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 20 Jul 2023 14:02:55 +0000 Subject: [PATCH 1805/1997] Retry if sessions not closed because missing leader --- src/Coordination/KeeperDispatcher.cpp | 37 +++++++++------- tests/integration/test_keeper_session/test.py | 42 ++++++++++++++----- 2 files changed, 54 insertions(+), 25 deletions(-) diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 9d9df5c7f30..dfb621eb0ad 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -473,23 +473,30 @@ void KeeperDispatcher::shutdown() session_to_response_callback.clear(); } - // if there is no leader, there is no reason to do CLOSE because it's a write request - if (server && hasLeader() && !close_requests.empty()) + if (server && !close_requests.empty()) { - LOG_INFO(log, "Trying to close {} session(s)", close_requests.size()); - const auto raft_result = server->putRequestBatch(close_requests); - auto sessions_closing_done_promise = std::make_shared>(); - auto sessions_closing_done = sessions_closing_done_promise->get_future(); - raft_result->when_ready([my_sessions_closing_done_promise = std::move(sessions_closing_done_promise)]( - nuraft::cmd_result> & /*result*/, - nuraft::ptr & /*exception*/) { my_sessions_closing_done_promise->set_value(); }); + // if there is no leader, there is no reason to do CLOSE because it's a write request + if (hasLeader()) + { + LOG_INFO(log, "Trying to close {} session(s)", close_requests.size()); + const auto raft_result = server->putRequestBatch(close_requests); + auto sessions_closing_done_promise = std::make_shared>(); + auto sessions_closing_done = sessions_closing_done_promise->get_future(); + raft_result->when_ready([my_sessions_closing_done_promise = std::move(sessions_closing_done_promise)]( + nuraft::cmd_result> & /*result*/, + nuraft::ptr & /*exception*/) { my_sessions_closing_done_promise->set_value(); }); - auto session_shutdown_timeout = configuration_and_settings->coordination_settings->session_shutdown_timeout.totalMilliseconds(); - if (sessions_closing_done.wait_for(std::chrono::milliseconds(session_shutdown_timeout)) != std::future_status::ready) - LOG_WARNING( - log, - "Failed to close sessions in {}ms. If they are not closed, they will be closed after session timeout.", - session_shutdown_timeout); + auto session_shutdown_timeout = configuration_and_settings->coordination_settings->session_shutdown_timeout.totalMilliseconds(); + if (sessions_closing_done.wait_for(std::chrono::milliseconds(session_shutdown_timeout)) != std::future_status::ready) + LOG_WARNING( + log, + "Failed to close sessions in {}ms. If they are not closed, they will be closed after session timeout.", + session_shutdown_timeout); + } + else + { + LOG_INFO(log, "Sessions cannot be closed during shutdown because there is no active leader"); + } } if (server) diff --git a/tests/integration/test_keeper_session/test.py b/tests/integration/test_keeper_session/test.py index e57057a8258..68147865cd2 100644 --- a/tests/integration/test_keeper_session/test.py +++ b/tests/integration/test_keeper_session/test.py @@ -6,6 +6,7 @@ import socket import struct from kazoo.client import KazooClient +from kazoo.exceptions import NoNodeError # from kazoo.protocol.serialization import Connect, read_buffer, write_buffer @@ -162,17 +163,38 @@ def test_session_timeout(started_cluster): def test_session_close_shutdown(started_cluster): wait_nodes() - node1_zk = get_fake_zk(node1.name) - node2_zk = get_fake_zk(node2.name) + node1_zk = None + node2_zk = None + for i in range(20): + node1_zk = get_fake_zk(node1.name) + node2_zk = get_fake_zk(node2.name) - eph_node = "/test_node" - node2_zk.create(eph_node, ephemeral=True) - node1_zk.sync(eph_node) - assert node1_zk.exists(eph_node) != None + eph_node = "/test_node" + node2_zk.create(eph_node, ephemeral=True) + node1_zk.sync(eph_node) - # shutdown while session is active - node2.stop_clickhouse() + node1_zk.exists(eph_node) != None - assert node1_zk.exists(eph_node) == None + # restart while session is active so it's closed during shutdown + node2.restart_clickhouse() - node2.start_clickhouse() + if node1_zk.exists(eph_node) == None: + break + + assert node2.contains_in_log("Sessions cannot be closed during shutdown because there is no active leader") + + try: + node1_zk.delete(eph_node) + except NoNodeError: + pass + + assert node1_zk.exists(eph_node) == None + + destroy_zk_client(node1_zk) + node1_zk = None + destroy_zk_client(node2_zk) + node2_zk = None + + time.sleep(1) + else: + assert False, "Session wasn't properly cleaned up on shutdown" \ No newline at end of file From 5decb1f5c555d2465724f9bc3c555c157f9deb81 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 20 Jul 2023 14:11:11 +0000 Subject: [PATCH 1806/1997] Automatic style fix --- tests/integration/test_keeper_session/test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_keeper_session/test.py b/tests/integration/test_keeper_session/test.py index 68147865cd2..cd012ad6e9e 100644 --- a/tests/integration/test_keeper_session/test.py +++ b/tests/integration/test_keeper_session/test.py @@ -181,7 +181,9 @@ def test_session_close_shutdown(started_cluster): if node1_zk.exists(eph_node) == None: break - assert node2.contains_in_log("Sessions cannot be closed during shutdown because there is no active leader") + assert node2.contains_in_log( + "Sessions cannot be closed during shutdown because there is no active leader" + ) try: node1_zk.delete(eph_node) @@ -197,4 +199,4 @@ def test_session_close_shutdown(started_cluster): time.sleep(1) else: - assert False, "Session wasn't properly cleaned up on shutdown" \ No newline at end of file + assert False, "Session wasn't properly cleaned up on shutdown" From 046bf55dc084d4df91ecfddb8e22aa6f9300fa43 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 20 Jul 2023 14:17:33 +0000 Subject: [PATCH 1807/1997] Incorporate feedback --- .../functions/arithmetic-functions.md | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md index 054c59d5778..69f1816b7df 100644 --- a/docs/en/sql-reference/functions/arithmetic-functions.md +++ b/docs/en/sql-reference/functions/arithmetic-functions.md @@ -6,9 +6,20 @@ sidebar_label: Arithmetic # Arithmetic Functions -The result type of all arithmetic functions is the smallest type which can represent all possible results. Size promotion happens for integers up to 32 bit, e.g. `UInt8 + UInt16 = UInt32`. If one of the inters has 64 or more bits, the result is of the same type as the bigger of the input integers, e.g. `UInt16 + UInt128 = UInt128`. While this introduces a risk of overflows around the value range boundary, it ensures that calculations are performed quickly using the maximum native integer width of 64 bit. Also, this behavior guarantees compatibility with many other databases which provide 64 bit integers (BIGINT) as the biggest integer type. +Arithmetic functions work for any two operands of type `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`, `Float32`, or `Float64`. -The result of addition or multiplication of two integers is unsigned unless one of the integers is signed. +Before performing the operation, both operands are casted to the result type. The result type is determined as follows (unless specified +differently in the function documentation below): +- If both operands are up to 32 bits wide, the size of the result type will be the size of the next bigger type following the bigger of the + two operands (integer size promotion). For example, `UInt8 + UInt16 = UInt32` or `Float32 * Float32 = Float64`. +- If one of the operands has 64 or more bits, the size of the result type will be the same size as the bigger of the two operands. For + example, `UInt32 + UInt128 = UInt128` or `Float32 * Float64 = Float64`. +- If one of the operands is signed, the result type will also be signed, otherwise it will be signed. For example, `UInt32 * Int32 = Int64`. + +These rules make sure that the result type will be the smallest type which can represent all possible results. While this introduces a risk +of overflows around the value range boundary, it ensures that calculations are performed quickly using the maximum native integer width of +64 bit. This behavior also guarantees compatibility with many other databases which provide 64 bit integers (BIGINT) as the biggest integer +type. Example: @@ -22,8 +33,6 @@ SELECT toTypeName(0), toTypeName(0 + 0), toTypeName(0 + 0 + 0), toTypeName(0 + 0 └───────────────┴────────────────────────┴─────────────────────────────────┴──────────────────────────────────────────┘ ``` -Arithmetic functions work for any pair of `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`, `Float32`, or `Float64` values. - Overflows are produced the same way as in C++. ## plus @@ -68,7 +77,7 @@ Alias: `a \* b` (operator) ## divide -Calculates the quotient of two values `a` and `b`. The result is always a floating-point value. If you need integer division, you can use the `intDiv` function. +Calculates the quotient of two values `a` and `b`. The result type is always [Float64](../../sql-reference/data-types/float.md). Integer division is provided by the `intDiv` function. Division by 0 returns `inf`, `-inf`, or `nan`. @@ -84,7 +93,7 @@ Alias: `a / b` (operator) Performs an integer division of two values `a` by `b`, i.e. computes the quotient rounded down to the next smallest integer. -The result has the same type as the dividend (the first parameter). +The result has the same width as the dividend (the first parameter). An exception is thrown when dividing by zero, when the quotient does not fit in the range of the dividend, or when dividing a minimal negative number by minus one. @@ -135,7 +144,7 @@ intDivOrZero(a, b) Calculates the remainder of the division of two values `a` by `b`. -The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result is a floating-point number. +The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result type is [Float64](../../sql-reference/data-types/float.md). The remainder is computed like in C++. Truncated division is used for negative numbers. From 8adf57a6981610936acc84f3c69342682952ff0a Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 20 Jul 2023 14:18:32 +0000 Subject: [PATCH 1808/1997] Fix text in comments and improve exception handling --- src/Common/examples/encrypt_decrypt.cpp | 2 +- tests/integration/test_config_decryption/test_wrong_settings.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/examples/encrypt_decrypt.cpp b/src/Common/examples/encrypt_decrypt.cpp index 542e173deb9..2d8c5a5f61f 100644 --- a/src/Common/examples/encrypt_decrypt.cpp +++ b/src/Common/examples/encrypt_decrypt.cpp @@ -3,7 +3,7 @@ #include #include -/** This test program encrypts or decrypts text values using AES_128_GCM_SIV or AES_256_GCM_SIV codecs. +/** This test program encrypts or decrypts text values using a symmetric encryption codec like AES_128_GCM_SIV or AES_256_GCM_SIV. * Keys for codecs are loaded from section of configuration file. * * How to use: diff --git a/tests/integration/test_config_decryption/test_wrong_settings.py b/tests/integration/test_config_decryption/test_wrong_settings.py index e86f7fa9b39..e0fbd4b2948 100644 --- a/tests/integration/test_config_decryption/test_wrong_settings.py +++ b/tests/integration/test_config_decryption/test_wrong_settings.py @@ -10,7 +10,7 @@ def start_clickhouse(config, err_msg): cluster.start() except Exception as e: caught_exception = str(e) - assert caught_exception.find(err_msg) != -1 + assert err_msg in caught_exception def test_wrong_method(): From e467264588a6435199879fd89d1dc995c9e37c63 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 20 Jul 2023 17:56:30 +0300 Subject: [PATCH 1809/1997] Update src/IO/HTTPCommon.cpp --- src/IO/HTTPCommon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index a5816911c09..ddd7ccbe483 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -81,7 +81,7 @@ namespace Session::close(); LOG_TRACE( log, - "Last ip ({}) is unreachable for {}:{}. Will try another resolved address.", + "Last ip ({}) is unreachable for {}:{}. Will try another resolved address.", Session::getResolvedHost(), Session::getHost(), Session::getPort()); From c0aa3e456705e3ef75ed09683f4e9ed6d9151917 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 20 Jul 2023 17:59:43 +0300 Subject: [PATCH 1810/1997] Update ci-slack-bot.py --- utils/ci-slack-bot/ci-slack-bot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/ci-slack-bot/ci-slack-bot.py b/utils/ci-slack-bot/ci-slack-bot.py index 0fb12e89ce9..ea883e3cda3 100755 --- a/utils/ci-slack-bot/ci-slack-bot.py +++ b/utils/ci-slack-bot/ci-slack-bot.py @@ -245,7 +245,7 @@ def get_too_many_failures_message(failures_count): def get_failed_checks_percentage_message(percentage): - p = percentage[0][0] * 100 + p = float(percentage[0][0]) * 100 # Always report more than 1% of failed checks # For <= 1%: higher percentage of failures == higher probability From ea252e2f612afd9e83c1aa000af945eebbe18a16 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 20 Jul 2023 15:05:07 +0000 Subject: [PATCH 1811/1997] Disable analyzer setting in backward_compatibility integration tests. --- tests/integration/helpers/cluster.py | 6 +++++- tests/integration/test_backward_compatibility/test.py | 2 ++ .../test_aggregate_fixed_key.py | 5 +++-- .../test_aggregate_function_state.py | 6 ++++-- .../test_backward_compatibility/test_convert_ordinary.py | 1 + .../test_backward_compatibility/test_cte_distributed.py | 7 ++++--- .../test_data_skipping_indices.py | 1 + .../test_backward_compatibility/test_functions.py | 3 ++- .../test_in_memory_parts_still_read.py | 1 + .../test_insert_profile_events.py | 3 ++- .../test_ip_types_binary_compatibility.py | 1 + .../test_memory_bound_aggregation.py | 4 +++- .../test_normalized_count_comparison.py | 3 ++- .../test_select_aggregate_alias_column.py | 3 ++- .../test_short_strings_aggregation.py | 4 +++- .../test_vertical_merges_from_compact_parts.py | 2 ++ 16 files changed, 38 insertions(+), 14 deletions(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index c52442ecb9c..c85fbb8ad9e 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -1533,6 +1533,7 @@ class ClickHouseCluster: with_jdbc_bridge=False, with_hive=False, with_coredns=False, + allow_analyzer=True, hostname=None, env_variables=None, image="clickhouse/integration-test", @@ -1630,6 +1631,7 @@ class ClickHouseCluster: with_hive=with_hive, with_coredns=with_coredns, with_cassandra=with_cassandra, + allow_analyzer=allow_analyzer, server_bin_path=self.server_bin_path, odbc_bridge_bin_path=self.odbc_bridge_bin_path, library_bridge_bin_path=self.library_bridge_bin_path, @@ -3169,6 +3171,7 @@ class ClickHouseInstance: with_hive, with_coredns, with_cassandra, + allow_analyzer, server_bin_path, odbc_bridge_bin_path, library_bridge_bin_path, @@ -3256,6 +3259,7 @@ class ClickHouseInstance: self.with_hive = with_hive self.with_coredns = with_coredns self.coredns_config_dir = p.abspath(p.join(base_path, "coredns_config")) + self.allow_analyzer = allow_analyzer self.main_config_name = main_config_name self.users_config_name = users_config_name @@ -4227,7 +4231,7 @@ class ClickHouseInstance: ) write_embedded_config("0_common_instance_users.xml", users_d_dir) - if os.environ.get("CLICKHOUSE_USE_NEW_ANALYZER") is not None: + if os.environ.get("CLICKHOUSE_USE_NEW_ANALYZER") is not None and self.allow_analyzer: write_embedded_config("0_common_enable_analyzer.xml", users_d_dir) if len(self.custom_dictionaries_paths): diff --git a/tests/integration/test_backward_compatibility/test.py b/tests/integration/test_backward_compatibility/test.py index ea1d3ab9c07..c3d3b8aad34 100644 --- a/tests/integration/test_backward_compatibility/test.py +++ b/tests/integration/test_backward_compatibility/test.py @@ -10,11 +10,13 @@ node1 = cluster.add_instance( tag="19.17.8.54", stay_alive=True, with_installed_binary=True, + allow_analyzer=False ) node2 = cluster.add_instance( "node2", main_configs=["configs/wide_parts_only.xml", "configs/no_compress_marks.xml"], with_zookeeper=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py b/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py index 01c9736c354..cf258987cbf 100644 --- a/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py +++ b/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py @@ -9,9 +9,10 @@ node1 = cluster.add_instance( image="yandex/clickhouse-server", tag="21.3", with_installed_binary=True, + allow_analyzer=False, ) -node2 = cluster.add_instance("node2", with_zookeeper=True) -node3 = cluster.add_instance("node3", with_zookeeper=True) +node2 = cluster.add_instance("node2", with_zookeeper=True, allow_analyzer=False) +node3 = cluster.add_instance("node3", with_zookeeper=True, allow_analyzer=False) @pytest.fixture(scope="module") diff --git a/tests/integration/test_backward_compatibility/test_aggregate_function_state.py b/tests/integration/test_backward_compatibility/test_aggregate_function_state.py index 1f6d405603a..3a936239cc8 100644 --- a/tests/integration/test_backward_compatibility/test_aggregate_function_state.py +++ b/tests/integration/test_backward_compatibility/test_aggregate_function_state.py @@ -10,6 +10,7 @@ node1 = cluster.add_instance( tag="19.16.9.37", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node2 = cluster.add_instance( "node2", @@ -18,9 +19,10 @@ node2 = cluster.add_instance( tag="19.16.9.37", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) -node3 = cluster.add_instance("node3", with_zookeeper=False) -node4 = cluster.add_instance("node4", with_zookeeper=False) +node3 = cluster.add_instance("node3", with_zookeeper=False, allow_analyzer=False) +node4 = cluster.add_instance("node4", with_zookeeper=False, allow_analyzer=False) @pytest.fixture(scope="module") diff --git a/tests/integration/test_backward_compatibility/test_convert_ordinary.py b/tests/integration/test_backward_compatibility/test_convert_ordinary.py index 8b1afd358eb..36facdd59b1 100644 --- a/tests/integration/test_backward_compatibility/test_convert_ordinary.py +++ b/tests/integration/test_backward_compatibility/test_convert_ordinary.py @@ -9,6 +9,7 @@ node = cluster.add_instance( stay_alive=True, with_zookeeper=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_cte_distributed.py b/tests/integration/test_backward_compatibility/test_cte_distributed.py index 7ea0d2d9f21..c68468aad75 100644 --- a/tests/integration/test_backward_compatibility/test_cte_distributed.py +++ b/tests/integration/test_backward_compatibility/test_cte_distributed.py @@ -3,7 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance("node1", with_zookeeper=False) +node1 = cluster.add_instance("node1", with_zookeeper=False, allow_analyzer=False) node2 = cluster.add_instance( "node2", with_zookeeper=False, @@ -11,6 +11,7 @@ node2 = cluster.add_instance( tag="21.7.3.14", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) @@ -31,7 +32,7 @@ WITH quantile(0.05)(cnt) as p05, quantile(0.95)(cnt) as p95, p95 - p05 as inter_percentile_range -SELECT +SELECT sum(cnt) as total_requests, count() as data_points, inter_percentile_range @@ -49,7 +50,7 @@ WITH quantile(0.05)(cnt) as p05, quantile(0.95)(cnt) as p95, p95 - p05 as inter_percentile_range -SELECT +SELECT sum(cnt) as total_requests, count() as data_points, inter_percentile_range diff --git a/tests/integration/test_backward_compatibility/test_data_skipping_indices.py b/tests/integration/test_backward_compatibility/test_data_skipping_indices.py index c65dc6d3841..46ab27d2ab0 100644 --- a/tests/integration/test_backward_compatibility/test_data_skipping_indices.py +++ b/tests/integration/test_backward_compatibility/test_data_skipping_indices.py @@ -12,6 +12,7 @@ node = cluster.add_instance( tag="21.6", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index afb19901e74..fa24b146fec 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -9,7 +9,7 @@ from helpers.cluster import ClickHouseCluster from helpers.client import QueryRuntimeException cluster = ClickHouseCluster(__file__) -upstream = cluster.add_instance("upstream") +upstream = cluster.add_instance("upstream", allow_analyzer=False) backward = cluster.add_instance( "backward", image="clickhouse/clickhouse-server", @@ -19,6 +19,7 @@ backward = cluster.add_instance( # Affected at least: singleValueOrNull, last_value, min, max, any, anyLast, anyHeavy, first_value, argMin, argMax tag="22.6", with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_in_memory_parts_still_read.py b/tests/integration/test_backward_compatibility/test_in_memory_parts_still_read.py index d55f155918e..cd67f1f6344 100644 --- a/tests/integration/test_backward_compatibility/test_in_memory_parts_still_read.py +++ b/tests/integration/test_backward_compatibility/test_in_memory_parts_still_read.py @@ -12,6 +12,7 @@ node = cluster.add_instance( tag="23.4", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_insert_profile_events.py b/tests/integration/test_backward_compatibility/test_insert_profile_events.py index 0fd453e57d4..8564c6b5952 100644 --- a/tests/integration/test_backward_compatibility/test_insert_profile_events.py +++ b/tests/integration/test_backward_compatibility/test_insert_profile_events.py @@ -7,12 +7,13 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -upstream_node = cluster.add_instance("upstream_node") +upstream_node = cluster.add_instance("upstream_node", allow_analyzer=False) old_node = cluster.add_instance( "old_node", image="clickhouse/clickhouse-server", tag="22.5.1.2079", with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_ip_types_binary_compatibility.py b/tests/integration/test_backward_compatibility/test_ip_types_binary_compatibility.py index bb40dff27ac..04016755a24 100644 --- a/tests/integration/test_backward_compatibility/test_ip_types_binary_compatibility.py +++ b/tests/integration/test_backward_compatibility/test_ip_types_binary_compatibility.py @@ -10,6 +10,7 @@ node_22_6 = cluster.add_instance( tag="22.6", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py b/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py index d76c4eba409..96b41c81384 100644 --- a/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py +++ b/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py @@ -10,6 +10,7 @@ node1 = cluster.add_instance( tag="21.1", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node2 = cluster.add_instance( "node2", @@ -18,8 +19,9 @@ node2 = cluster.add_instance( tag="21.1", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) -node3 = cluster.add_instance("node3", with_zookeeper=False) +node3 = cluster.add_instance("node3", with_zookeeper=False, allow_analyzer=False) @pytest.fixture(scope="module") diff --git a/tests/integration/test_backward_compatibility/test_normalized_count_comparison.py b/tests/integration/test_backward_compatibility/test_normalized_count_comparison.py index fcdedd29dad..3cd708d5029 100644 --- a/tests/integration/test_backward_compatibility/test_normalized_count_comparison.py +++ b/tests/integration/test_backward_compatibility/test_normalized_count_comparison.py @@ -3,7 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance("node1", with_zookeeper=False) +node1 = cluster.add_instance("node1", with_zookeeper=False, allow_analyzer=False) node2 = cluster.add_instance( "node2", with_zookeeper=False, @@ -11,6 +11,7 @@ node2 = cluster.add_instance( tag="21.7.2.7", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py b/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py index 8bdae54a889..7e10b6ab430 100644 --- a/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py +++ b/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py @@ -3,7 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance("node1", with_zookeeper=False) +node1 = cluster.add_instance("node1", with_zookeeper=False, allow_analyzer=False) node2 = cluster.add_instance( "node2", with_zookeeper=False, @@ -11,6 +11,7 @@ node2 = cluster.add_instance( tag="21.7.2.7", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_short_strings_aggregation.py b/tests/integration/test_backward_compatibility/test_short_strings_aggregation.py index 17a7282b7b5..e4fda618031 100644 --- a/tests/integration/test_backward_compatibility/test_short_strings_aggregation.py +++ b/tests/integration/test_backward_compatibility/test_short_strings_aggregation.py @@ -10,6 +10,7 @@ node1 = cluster.add_instance( tag="19.16.9.37", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node2 = cluster.add_instance( "node2", @@ -18,8 +19,9 @@ node2 = cluster.add_instance( tag="19.16.9.37", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) -node3 = cluster.add_instance("node3", with_zookeeper=False) +node3 = cluster.add_instance("node3", with_zookeeper=False, allow_analyzer=False) @pytest.fixture(scope="module") diff --git a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py index 3d006caad0d..82ffcc20b60 100644 --- a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py +++ b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py @@ -11,12 +11,14 @@ node_old = cluster.add_instance( stay_alive=True, with_installed_binary=True, with_zookeeper=True, + allow_analyzer=False, ) node_new = cluster.add_instance( "node2", main_configs=["configs/no_compress_marks.xml"], with_zookeeper=True, stay_alive=True, + allow_analyzer=False, ) From a26de1b370e8c09c548528ffbe3337cbf2340012 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 20 Jul 2023 15:12:55 +0000 Subject: [PATCH 1812/1997] Automatic style fix --- tests/integration/helpers/cluster.py | 5 ++++- tests/integration/test_backward_compatibility/test.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index c85fbb8ad9e..0ac2f330b1e 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -4231,7 +4231,10 @@ class ClickHouseInstance: ) write_embedded_config("0_common_instance_users.xml", users_d_dir) - if os.environ.get("CLICKHOUSE_USE_NEW_ANALYZER") is not None and self.allow_analyzer: + if ( + os.environ.get("CLICKHOUSE_USE_NEW_ANALYZER") is not None + and self.allow_analyzer + ): write_embedded_config("0_common_enable_analyzer.xml", users_d_dir) if len(self.custom_dictionaries_paths): diff --git a/tests/integration/test_backward_compatibility/test.py b/tests/integration/test_backward_compatibility/test.py index c3d3b8aad34..6f21b184a95 100644 --- a/tests/integration/test_backward_compatibility/test.py +++ b/tests/integration/test_backward_compatibility/test.py @@ -10,7 +10,7 @@ node1 = cluster.add_instance( tag="19.17.8.54", stay_alive=True, with_installed_binary=True, - allow_analyzer=False + allow_analyzer=False, ) node2 = cluster.add_instance( "node2", From e6624a07e4fe938b55dd6bc5d8cbabd0ed93d2d7 Mon Sep 17 00:00:00 2001 From: AlexBykovski Date: Thu, 20 Jul 2023 18:54:48 +0300 Subject: [PATCH 1813/1997] Update build-osx.md syntax error in command for compiler for OSx compilation --- docs/ru/development/build-osx.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/development/build-osx.md b/docs/ru/development/build-osx.md index 9a1f9c9347d..6b4e612b13f 100644 --- a/docs/ru/development/build-osx.md +++ b/docs/ru/development/build-osx.md @@ -68,7 +68,7 @@ $ /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/ $ rm -rf build $ mkdir build $ cd build - $ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER==$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF .. + $ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF .. $ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF .. $ cmake --build . --config RelWithDebInfo $ cd .. From 97e54d6ebaa174f8d2ae291ddec20fd879b29bfa Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Wed, 19 Jul 2023 07:13:25 +0000 Subject: [PATCH 1814/1997] Fix test_backup_restore_on_cluster flakiness caused by missing replica syncs --- tests/integration/test_backup_restore_on_cluster/test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index 6af3a7dbab8..39496b8a5c8 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -580,6 +580,7 @@ def test_required_privileges(): node1.query( f"RESTORE TABLE tbl AS tbl2 ON CLUSTER 'cluster' FROM {backup_name}", user="u1" ) + node2.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl2") assert node2.query("SELECT * FROM tbl2") == "100\n" @@ -593,6 +594,7 @@ def test_required_privileges(): node1.query("GRANT INSERT, CREATE TABLE ON tbl TO u1") node1.query(f"RESTORE ALL ON CLUSTER 'cluster' FROM {backup_name}", user="u1") + node2.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl") assert node2.query("SELECT * FROM tbl") == "100\n" From 13f8d72f54433a790f3efcb054db389e4fdd53f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 20 Jul 2023 17:46:22 +0200 Subject: [PATCH 1815/1997] Wait for zero copy replication lock even if some disks don't support it --- .../MergeTree/MergeFromLogEntryTask.cpp | 8 +++++-- .../MergeTree/MutateFromLogEntryTask.cpp | 6 ++++- .../ReplicatedMergeMutateTaskBase.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 22 +++++++++++++++---- 4 files changed, 30 insertions(+), 8 deletions(-) diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 9f54c554c85..883cfee89c8 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -230,7 +230,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() /// the fast replica is not overloaded because amount of executing merges doesn't affect the ability to acquire locks for new merges. /// /// So here we trying to solve it with the simplest solution -- sleep random time up to 500ms for 1GB part and up to 7 seconds for 300GB part. - /// It can sound too much, but we are trying to aquite these locks in background tasks which can be scheduled each 5 seconds or so. + /// It can sound too much, but we are trying to acquire these locks in background tasks which can be scheduled each 5 seconds or so. double start_to_sleep_seconds = std::logf(storage_settings_ptr->zero_copy_merge_mutation_min_parts_size_sleep_before_lock.value); uint64_t right_border_to_sleep_ms = static_cast((std::log(estimated_space_for_merge) - start_to_sleep_seconds + 0.5) * 1000); uint64_t time_to_sleep_milliseconds = std::min(10000UL, std::uniform_int_distribution(1, 1 + right_border_to_sleep_ms)(rng)); @@ -245,7 +245,11 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() if (!zero_copy_lock || !zero_copy_lock->isLocked()) { - LOG_DEBUG(log, "Merge of part {} started by some other replica, will wait it and fetch merged part", entry.new_part_name); + LOG_DEBUG( + log, + "Merge of part {} started by some other replica, will wait for it and fetch merged part. Number of tries {}", + entry.new_part_name, + entry.num_tries); storage.watchZeroCopyLock(entry.new_part_name, disk); /// Don't check for missing part -- it's missing because other replica still not /// finished merge. diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 6cb9d50436e..164b541d2b8 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -154,8 +154,12 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() if (!zero_copy_lock || !zero_copy_lock->isLocked()) { + LOG_DEBUG( + log, + "Mutation of part {} started by some other replica, will wait for it and mutated merged part. Number of tries {}", + entry.new_part_name, + entry.num_tries); storage.watchZeroCopyLock(entry.new_part_name, disk); - LOG_DEBUG(log, "Mutation of part {} started by some other replica, will wait it and mutated merged part", entry.new_part_name); return PrepareResult{ .prepared_successfully = false, diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp index b4748ee77ea..6ad77119016 100644 --- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp @@ -174,7 +174,7 @@ bool ReplicatedMergeMutateTaskBase::executeImpl() part_log_writer = prepare_result.part_log_writer; - /// Avoid resheduling, execute fetch here, in the same thread. + /// Avoid rescheduling, execute fetch here, in the same thread. if (!prepare_result.prepared_successfully) return execute_fetch(prepare_result.need_to_check_missing_part_in_fetch); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 07f46c07466..3264de850a0 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1370,13 +1370,27 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( if (data_settings->allow_remote_fs_zero_copy_replication) { auto disks = storage.getDisks(); - bool only_s3_storage = true; + DiskPtr disk_with_zero_copy = nullptr; for (const auto & disk : disks) - if (!disk->supportZeroCopyReplication()) - only_s3_storage = false; + { + if (disk->supportZeroCopyReplication()) + { + disk_with_zero_copy = disk; + break; + } + } + /// Technically speaking if there are more than one disk that could store the part (a local hot + cloud cold) + /// It would be possible for the merge to happen concurrently with other replica if the other replica is doing + /// a merge using zero-copy and the cloud storage, and the local replica uses the local storage instead + /// The question is, is it worth keep retrying to do the merge over and over for the opportunity to do + /// double the work? Probably not + /// So what we do is that, even if hot merge could happen, check the zero copy lock anyway. + /// Keep in mind that for the zero copy lock check to happen (via existing_zero_copy_locks) we need to + /// have failed first because of it and added it via watchZeroCopyLock. Considering we've already tried to + /// use cloud storage and zero-copy replication, the most likely scenario is that we'll try again String replica_to_execute_merge; - if (!disks.empty() && only_s3_storage && storage.checkZeroCopyLockExists(entry.new_part_name, disks[0], replica_to_execute_merge)) + if (disk_with_zero_copy && storage.checkZeroCopyLockExists(entry.new_part_name, disk_with_zero_copy, replica_to_execute_merge)) { constexpr auto fmt_string = "Not executing merge/mutation for the part {}, waiting for {} to execute it and will fetch after."; out_postpone_reason = fmt::format(fmt_string, entry.new_part_name, replica_to_execute_merge); From ed59870f92fa2893c9c105eaaeff82b1efaede22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20G=C3=B3ralski?= Date: Thu, 20 Jul 2023 18:04:58 +0200 Subject: [PATCH 1816/1997] Update LRUFileCachePriority.cpp --- src/Interpreters/Cache/LRUFileCachePriority.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index 18862e154da..33e567b7a76 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -7,6 +7,7 @@ namespace CurrentMetrics { extern const Metric FilesystemCacheSize; + extern const Metric FilesystemCacheSizeLimit; extern const Metric FilesystemCacheElements; } @@ -101,6 +102,7 @@ void LRUFileCachePriority::updateSize(int64_t size) { current_size += size; CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size); + CurrentMetrics::set(CurrentMetrics::FilesystemCacheSizeLimit, getSizeLimit()); } void LRUFileCachePriority::updateElementsCount(int64_t num) From b3c42a1171e3f631e8985b80fc3c822c7ac87dd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20G=C3=B3ralski?= Date: Thu, 20 Jul 2023 18:06:54 +0200 Subject: [PATCH 1817/1997] Update CurrentMetrics.cpp with FilesystemCacheSizeLimit metric --- src/Common/CurrentMetrics.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 626b43aea2c..583b13cf79d 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -187,6 +187,7 @@ M(CacheFileSegments, "Number of existing cache file segments") \ M(CacheDetachedFileSegments, "Number of existing detached cache file segments") \ M(FilesystemCacheSize, "Filesystem cache size in bytes") \ + M(FilesystemCacheSizeLimit, "Filesystem cache size limit in bytes") \ M(FilesystemCacheElements, "Filesystem cache elements (file segments)") \ M(FilesystemCacheDownloadQueueElements, "Filesystem cache elements in download queue") \ M(AsyncInsertCacheSize, "Number of async insert hash id in cache") \ From 920887f315e108da3b385986dee329a28aed65fb Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 20 Jul 2023 16:43:59 +0000 Subject: [PATCH 1818/1997] Done --- .../test_replicated_merge_tree_encrypted_disk/test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/test_replicated_merge_tree_encrypted_disk/test.py b/tests/integration/test_replicated_merge_tree_encrypted_disk/test.py index 05d7bbb7282..25d30eb9c82 100644 --- a/tests/integration/test_replicated_merge_tree_encrypted_disk/test.py +++ b/tests/integration/test_replicated_merge_tree_encrypted_disk/test.py @@ -67,6 +67,8 @@ def optimize_table(): def check_table(): expected = [[1, "str1"], [2, "str2"]] + node1.query("SYSTEM SYNC REPLICA tbl LIGHTWEIGHT") + node2.query("SYSTEM SYNC REPLICA tbl LIGHTWEIGHT") assert node1.query("SELECT * FROM tbl ORDER BY id") == TSV(expected) assert node2.query("SELECT * FROM tbl ORDER BY id") == TSV(expected) assert node1.query("CHECK TABLE tbl") == "1\n" From 6b3a508a23e62d5459ad2a19a3bfc91ca96ccb8f Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 20 Jul 2023 16:52:45 +0000 Subject: [PATCH 1819/1997] Done --- tests/queries/0_stateless/02122_parallel_formatting.lib | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02122_parallel_formatting.lib b/tests/queries/0_stateless/02122_parallel_formatting.lib index 56119012788..5175e004cc5 100755 --- a/tests/queries/0_stateless/02122_parallel_formatting.lib +++ b/tests/queries/0_stateless/02122_parallel_formatting.lib @@ -11,14 +11,14 @@ non_parallel_file=$CLICKHOUSE_TMP/$CLICKHOUSE_TEST_UNIQUE_NAME"_non_parallel" format=$1 echo $format-1 -$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) format $format" --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $non_parallel_file -$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) format $format" --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $parallel_file +$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) format $format" --output_format_write_statistics=0 --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $non_parallel_file +$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) format $format" --output_format_write_statistics=0 --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $parallel_file diff $non_parallel_file $parallel_file echo $format-2 -$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals order by number limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $non_parallel_file -$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals order by number limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $parallel_file +$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals order by number limit 190000 format $format" --extremes=1 --output_format_write_statistics=0 --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $non_parallel_file +$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals order by number limit 190000 format $format" --extremes=1 --output_format_write_statistics=0 --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $parallel_file diff $non_parallel_file $parallel_file From 500f1e6757b721ecc8733f5e8bf41c765a631918 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 20 Jul 2023 18:55:41 +0200 Subject: [PATCH 1820/1997] Follow up to #49698 --- .../PostgreSQL/MaterializedPostgreSQLConsumer.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index adbc95a2cf2..f2923b60bfd 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -22,7 +22,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int POSTGRESQL_REPLICATION_INTERNAL_ERROR; extern const int BAD_ARGUMENTS; - extern const int TOO_MANY_PARTS; } MaterializedPostgreSQLConsumer::MaterializedPostgreSQLConsumer( @@ -591,11 +590,8 @@ void MaterializedPostgreSQLConsumer::syncTables() } catch (DB::Exception & e) { - if (e.code() == ErrorCodes::TOO_MANY_PARTS) - { - /// Retry this buffer later. - storage_data.buffer.columns = result_rows.mutateColumns(); - } + /// Retry this buffer later. + storage_data.buffer.columns = result_rows.mutateColumns(); throw; } From 045ecdf71f544cafb4f5c3eda20ee6f9d593f614 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 20 Jul 2023 19:03:06 +0200 Subject: [PATCH 1821/1997] Update src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp Co-authored-by: Alexander Tokmakov --- src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index f2923b60bfd..d01746ddf1b 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -588,7 +588,7 @@ void MaterializedPostgreSQLConsumer::syncTables() executor.execute(); } } - catch (DB::Exception & e) + catch (...) { /// Retry this buffer later. storage_data.buffer.columns = result_rows.mutateColumns(); From 09e6bbc0e2ac634cde658b9c53e599d124d0a3d8 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 20 Jul 2023 21:10:59 +0300 Subject: [PATCH 1822/1997] Update DataPartsExchange.cpp --- src/Storages/MergeTree/DataPartsExchange.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 23bbc1c7f9d..6a3bf2940e9 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -353,8 +353,14 @@ MergeTreeData::DataPartPtr Service::findPart(const String & name) { /// It is important to include Outdated parts here because remote replicas cannot reliably /// determine the local state of the part, so queries for the parts in these states are completely normal. - auto part = data.getPartIfExists( - name, {MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}); + MergeTreeData::DataPartPtr part; + + /// Ephemeral zero-copy lock may be lost for PreActive parts + bool zero_copy_enabled = data.getSettings()->allow_remote_fs_zero_copy_replication; + if (zero_copy_enabled) + part = data.getPartIfExists(name, {MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}); + else + part = data.getPartIfExists(name, {MergeTreeDataPartState::PreActive, MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}); if (part) return part; From 24371c33bfd5037455cb025b057fb413ee1be396 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 21 Jul 2023 09:24:16 +0800 Subject: [PATCH 1823/1997] remove DelayedBlocksTask::finish --- src/Processors/Transforms/JoiningTransform.cpp | 3 ++- src/Processors/Transforms/JoiningTransform.h | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index 5480fea27a4..4e7868ea1c2 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -375,7 +375,8 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare() input.setNotNeeded(); } - if (task->finished) + // When delayed_blocks is nullptr, it means that all buckets have been joined. + if (!task->delayed_blocks) { input.close(); output.finish(); diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h index 5e7403dbbdb..a308af03662 100644 --- a/src/Processors/Transforms/JoiningTransform.h +++ b/src/Processors/Transforms/JoiningTransform.h @@ -115,7 +115,7 @@ class DelayedBlocksTask : public ChunkInfo { public: - explicit DelayedBlocksTask() : finished(true) {} + DelayedBlocksTask() = default; explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_, JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter_) : delayed_blocks(std::move(delayed_blocks_)) , left_delayed_stream_finish_counter(left_delayed_stream_finish_counter_) @@ -125,7 +125,6 @@ public: IBlocksStreamPtr delayed_blocks = nullptr; JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter = nullptr; - bool finished = false; }; using DelayedBlocksTaskPtr = std::shared_ptr; From f0e277f94a642647cfd3eb5ebc722b486d9203b0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 21 Jul 2023 06:45:35 +0200 Subject: [PATCH 1824/1997] Rename TaskStatsInfoGetter into NetlinkMetricsProvider There is ProcfsMetricsProvider, so by analogy to it. Signed-off-by: Azat Khuzhin --- src/Common/CurrentThread.cpp | 1 - ...oGetter.cpp => NetlinkMetricsProvider.cpp} | 22 +++++++++---------- ...sInfoGetter.h => NetlinkMetricsProvider.h} | 6 ++--- src/Common/ThreadProfileEvents.cpp | 6 ++--- src/Disks/IO/ThreadPoolReader.cpp | 2 +- src/IO/ReadBufferFromFileDescriptor.cpp | 2 +- src/IO/SynchronousReader.cpp | 2 +- 7 files changed, 20 insertions(+), 21 deletions(-) rename src/Common/{TaskStatsInfoGetter.cpp => NetlinkMetricsProvider.cpp} (93%) rename src/Common/{TaskStatsInfoGetter.h => NetlinkMetricsProvider.h} (85%) diff --git a/src/Common/CurrentThread.cpp b/src/Common/CurrentThread.cpp index 057b1eeda12..ac5b712279e 100644 --- a/src/Common/CurrentThread.cpp +++ b/src/Common/CurrentThread.cpp @@ -3,7 +3,6 @@ #include "CurrentThread.h" #include #include -#include #include #include #include diff --git a/src/Common/TaskStatsInfoGetter.cpp b/src/Common/NetlinkMetricsProvider.cpp similarity index 93% rename from src/Common/TaskStatsInfoGetter.cpp rename to src/Common/NetlinkMetricsProvider.cpp index 867a50c8cce..4c228bcc6fc 100644 --- a/src/Common/TaskStatsInfoGetter.cpp +++ b/src/Common/NetlinkMetricsProvider.cpp @@ -1,4 +1,4 @@ -#include "TaskStatsInfoGetter.h" +#include "NetlinkMetricsProvider.h" #include #include #include @@ -200,7 +200,7 @@ bool checkPermissionsImpl() if (!res) return false; - /// Check that we can successfully initialize TaskStatsInfoGetter. + /// Check that we can successfully initialize NetlinkMetricsProvider. /// It will ask about family id through Netlink. /// On some LXC containers we have capability but we still cannot use Netlink. /// There is an evidence that Linux fedora-riscv 6.1.22 gives something strange instead of the expected result. @@ -208,7 +208,7 @@ bool checkPermissionsImpl() try { ::taskstats stats{}; - TaskStatsInfoGetter().getStat(stats, static_cast(getThreadId())); + NetlinkMetricsProvider().getStat(stats, static_cast(getThreadId())); } catch (const Exception & e) { @@ -244,14 +244,14 @@ UInt16 getFamilyId(int fd) } -bool TaskStatsInfoGetter::checkPermissions() +bool NetlinkMetricsProvider::checkPermissions() { static bool res = checkPermissionsImpl(); return res; } -TaskStatsInfoGetter::TaskStatsInfoGetter() +NetlinkMetricsProvider::NetlinkMetricsProvider() { netlink_socket_fd = ::socket(PF_NETLINK, SOCK_RAW, NETLINK_GENERIC); if (netlink_socket_fd < 0) @@ -293,7 +293,7 @@ TaskStatsInfoGetter::TaskStatsInfoGetter() } -void TaskStatsInfoGetter::getStat(::taskstats & out_stats, pid_t tid) const +void NetlinkMetricsProvider::getStat(::taskstats & out_stats, pid_t tid) const { NetlinkMessage answer = query(netlink_socket_fd, taskstats_family_id, tid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &tid, sizeof(tid)); @@ -318,7 +318,7 @@ void TaskStatsInfoGetter::getStat(::taskstats & out_stats, pid_t tid) const } -TaskStatsInfoGetter::~TaskStatsInfoGetter() +NetlinkMetricsProvider::~NetlinkMetricsProvider() { if (netlink_socket_fd >= 0) { @@ -335,15 +335,15 @@ TaskStatsInfoGetter::~TaskStatsInfoGetter() namespace DB { -bool TaskStatsInfoGetter::checkPermissions() +bool NetlinkMetricsProvider::checkPermissions() { return false; } -TaskStatsInfoGetter::TaskStatsInfoGetter() = default; -TaskStatsInfoGetter::~TaskStatsInfoGetter() = default; +NetlinkMetricsProvider::NetlinkMetricsProvider() = default; +NetlinkMetricsProvider::~NetlinkMetricsProvider() = default; -void TaskStatsInfoGetter::getStat(::taskstats &, pid_t) const +void NetlinkMetricsProvider::getStat(::taskstats &, pid_t) const { } diff --git a/src/Common/TaskStatsInfoGetter.h b/src/Common/NetlinkMetricsProvider.h similarity index 85% rename from src/Common/TaskStatsInfoGetter.h rename to src/Common/NetlinkMetricsProvider.h index 66655d7ad0d..8a54f33be80 100644 --- a/src/Common/TaskStatsInfoGetter.h +++ b/src/Common/NetlinkMetricsProvider.h @@ -15,11 +15,11 @@ namespace DB /// /// [1]: https://elixir.bootlin.com/linux/v5.18-rc4/source/kernel/tsacct.c#L101 /// -class TaskStatsInfoGetter : private boost::noncopyable +class NetlinkMetricsProvider : private boost::noncopyable { public: - TaskStatsInfoGetter(); - ~TaskStatsInfoGetter(); + NetlinkMetricsProvider(); + ~NetlinkMetricsProvider(); void getStat(::taskstats & out_stats, pid_t tid) const; diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp index a94fd81559a..256f53df011 100644 --- a/src/Common/ThreadProfileEvents.cpp +++ b/src/Common/ThreadProfileEvents.cpp @@ -2,7 +2,7 @@ #if defined(OS_LINUX) -#include "TaskStatsInfoGetter.h" +#include "NetlinkMetricsProvider.h" #include "ProcfsMetricsProvider.h" #include "hasLinuxCapability.h" @@ -99,7 +99,7 @@ TasksStatsCounters::MetricsProvider TasksStatsCounters::findBestAvailableProvide static std::optional provider = []() -> MetricsProvider { - if (TaskStatsInfoGetter::checkPermissions()) + if (NetlinkMetricsProvider::checkPermissions()) { return MetricsProvider::Netlink; } @@ -119,7 +119,7 @@ TasksStatsCounters::TasksStatsCounters(const UInt64 tid, const MetricsProvider p switch (provider) { case MetricsProvider::Netlink: - stats_getter = [metrics_provider = std::make_shared(), tid]() + stats_getter = [metrics_provider = std::make_shared(), tid]() { ::taskstats result{}; metrics_provider->getStat(result, static_cast(tid)); diff --git a/src/Disks/IO/ThreadPoolReader.cpp b/src/Disks/IO/ThreadPoolReader.cpp index effa19bc1af..cd3f2d8dea0 100644 --- a/src/Disks/IO/ThreadPoolReader.cpp +++ b/src/Disks/IO/ThreadPoolReader.cpp @@ -114,7 +114,7 @@ std::future ThreadPoolReader::submit(Request reques /// It reports real time spent including the time spent while thread was preempted doing nothing. /// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables). /// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it - /// (TaskStatsInfoGetter has about 500K RPS). + /// (NetlinkMetricsProvider has about 500K RPS). Stopwatch watch(CLOCK_MONOTONIC); SCOPE_EXIT({ diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp index 67bc01279c3..6c0c1681a4c 100644 --- a/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/src/IO/ReadBufferFromFileDescriptor.cpp @@ -95,7 +95,7 @@ size_t ReadBufferFromFileDescriptor::readImpl(char * to, size_t min_bytes, size_ /// It reports real time spent including the time spent while thread was preempted doing nothing. /// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables). /// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it - /// (TaskStatsInfoGetter has about 500K RPS). + /// (NetlinkMetricsProvider has about 500K RPS). watch.stop(); ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); diff --git a/src/IO/SynchronousReader.cpp b/src/IO/SynchronousReader.cpp index 7cef3bd8963..e1c654e48a3 100644 --- a/src/IO/SynchronousReader.cpp +++ b/src/IO/SynchronousReader.cpp @@ -78,7 +78,7 @@ std::future SynchronousReader::submit(Request reque /// It reports real time spent including the time spent while thread was preempted doing nothing. /// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables). /// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it - /// (TaskStatsInfoGetter has about 500K RPS). + /// (NetlinkMetricsProvider has about 500K RPS). watch.stop(); ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); From 51e81b37a4158e31b61c5ae8d993a2dee0ae16e3 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 7 Jul 2023 18:45:13 +0200 Subject: [PATCH 1825/1997] Move condtions with columns from PK to the end of PREWHERE chain --- .../MergeTree/MergeTreeWhereOptimizer.cpp | 31 +++++++++++++++++++ .../MergeTree/MergeTreeWhereOptimizer.h | 8 ++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 1620ba98d58..4ff58c1fc86 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -22,6 +22,33 @@ namespace DB /// This is used to assume that condition is likely to have good selectivity. static constexpr auto threshold = 2; +static NameToIndexMap fillNamesPositions(const Names & names) +{ + NameToIndexMap names_positions; + + for (size_t position = 0; position < names.size(); ++position) + { + const auto & name = names[position]; + names_positions[name] = position; + } + + return names_positions; +} + +/// Find minimal position of any of the column in primary key. +static Int64 findMinPosition(const NameSet & condition_table_columns, const NameToIndexMap & primary_key_positions) +{ + Int64 min_position = std::numeric_limits::max() - 1; + + for (const auto & column : condition_table_columns) + { + auto it = primary_key_positions.find(column); + if (it != primary_key_positions.end()) + min_position = std::min(min_position, static_cast(it->second)); + } + + return min_position; +} MergeTreeWhereOptimizer::MergeTreeWhereOptimizer( std::unordered_map column_sizes_, @@ -35,6 +62,7 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer( , supported_columns{supported_columns_} , sorting_key_names{NameSet( metadata_snapshot->getSortingKey().column_names.begin(), metadata_snapshot->getSortingKey().column_names.end())} + , primary_key_names_positions(fillNamesPositions(metadata_snapshot->getPrimaryKey().column_names)) , log{log_} , column_sizes{std::move(column_sizes_)} { @@ -234,6 +262,9 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree if (cond.viable) cond.good = isConditionGood(node, table_columns); + /// Find min position in PK of any column that is used in this condition. + cond.min_position_in_primary_key = findMinPosition(cond.table_columns, primary_key_names_positions); + res.emplace_back(std::move(cond)); } } diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index 18555a72db1..8ab21471aeb 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -72,9 +72,14 @@ private: /// Does the condition presumably have good selectivity? bool good = false; + /// Does the condition contain primary key column? + /// If so, it is better to move it further to the end of PREWHERE chain depending on minimal position in PK of any + /// column in this condition because this condition have bigger chances to be already satisfied by PK analysis. + Int64 min_position_in_primary_key = std::numeric_limits::max() - 1; + auto tuple() const { - return std::make_tuple(!viable, !good, columns_size, table_columns.size()); + return std::make_tuple(!viable, !good, -min_position_in_primary_key, columns_size, table_columns.size()); } /// Is condition a better candidate for moving to PREWHERE? @@ -141,6 +146,7 @@ private: const Names queried_columns; const std::optional supported_columns; const NameSet sorting_key_names; + const NameToIndexMap primary_key_names_positions; Poco::Logger * log; std::unordered_map column_sizes; UInt64 total_size_of_queried_columns = 0; From fe7cff5c1cfee89d411ec79e7a3e7603ff831782 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Wed, 19 Jul 2023 22:45:04 +0200 Subject: [PATCH 1826/1997] Added move_primary_key_columns_to_end_of_prewhere setting --- src/Core/Settings.h | 1 + src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp | 11 +++++++++-- src/Storages/MergeTree/MergeTreeWhereOptimizer.h | 1 + 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 6fb26994d2f..5a1f4b46223 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -129,6 +129,7 @@ class IColumn; M(Bool, optimize_move_to_prewhere_if_final, false, "If query has `FINAL`, the optimization `move_to_prewhere` is not always correct and it is enabled only if both settings `optimize_move_to_prewhere` and `optimize_move_to_prewhere_if_final` are turned on", 0) \ M(Bool, move_all_conditions_to_prewhere, true, "Move all viable conditions from WHERE to PREWHERE", 0) \ M(Bool, enable_multiple_prewhere_read_steps, true, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \ + M(Bool, move_primary_key_columns_to_end_of_prewhere, true, "Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.", 0) \ \ M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \ M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 4ff58c1fc86..5efb7286685 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -88,6 +88,7 @@ void MergeTreeWhereOptimizer::optimize(SelectQueryInfo & select_query_info, cons where_optimizer_context.context = context; where_optimizer_context.array_joined_names = determineArrayJoinedNames(select); where_optimizer_context.move_all_conditions_to_prewhere = context->getSettingsRef().move_all_conditions_to_prewhere; + where_optimizer_context.move_primary_key_columns_to_end_of_prewhere = context->getSettingsRef().move_primary_key_columns_to_end_of_prewhere; where_optimizer_context.is_final = select.final(); RPNBuilderTreeContext tree_context(context, std::move(block_with_constants), {} /*prepared_sets*/); @@ -117,6 +118,7 @@ std::optional MergeTreeWhe where_optimizer_context.context = context; where_optimizer_context.array_joined_names = {}; where_optimizer_context.move_all_conditions_to_prewhere = context->getSettingsRef().move_all_conditions_to_prewhere; + where_optimizer_context.move_primary_key_columns_to_end_of_prewhere = context->getSettingsRef().move_primary_key_columns_to_end_of_prewhere; where_optimizer_context.is_final = is_final; RPNBuilderTreeContext tree_context(context); @@ -262,8 +264,13 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree if (cond.viable) cond.good = isConditionGood(node, table_columns); - /// Find min position in PK of any column that is used in this condition. - cond.min_position_in_primary_key = findMinPosition(cond.table_columns, primary_key_names_positions); + if (where_optimizer_context.move_primary_key_columns_to_end_of_prewhere) + { + /// Consider all conditions good with this setting enabled. + cond.good = cond.viable; + /// Find min position in PK of any column that is used in this condition. + cond.min_position_in_primary_key = findMinPosition(cond.table_columns, primary_key_names_positions); + } res.emplace_back(std::move(cond)); } diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index 8ab21471aeb..fb5e84b67c6 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -96,6 +96,7 @@ private: ContextPtr context; NameSet array_joined_names; bool move_all_conditions_to_prewhere = false; + bool move_primary_key_columns_to_end_of_prewhere = false; bool is_final = false; }; From 9b0eb9cdd709418c3782ae2468693b294e81a0cd Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sun, 16 Jul 2023 23:34:44 +0800 Subject: [PATCH 1827/1997] ignore ast opt when doing projection calc --- src/Storages/ProjectionsDescription.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 73fb279d51c..086355b6a79 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -298,6 +298,7 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) SelectQueryOptions{ type == ProjectionDescription::Type::Normal ? QueryProcessingStage::FetchColumns : QueryProcessingStage::WithMergeableState} + .ignoreASTOptimizations() .ignoreSettingConstraints()) .buildQueryPipeline(); builder.resize(1); From 696818b340d88667a214674f1df483b8c9e827d9 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sun, 16 Jul 2023 23:35:18 +0800 Subject: [PATCH 1828/1997] Don't check monotonicity when analyze projections --- src/Processors/QueryPlan/Optimizations/actionsDAGUtils.cpp | 4 ++-- src/Processors/QueryPlan/Optimizations/actionsDAGUtils.h | 2 +- .../Optimizations/optimizeUseAggregateProjection.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.cpp b/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.cpp index c9cf46aaeca..1c18465e1e1 100644 --- a/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.cpp +++ b/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.cpp @@ -8,7 +8,7 @@ namespace DB { -MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG & outer_dag) +MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG & outer_dag, bool check_monotonicity) { using Parents = std::set; std::unordered_map inner_parents; @@ -182,7 +182,7 @@ MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG } } - if (!match.node && frame.node->function_base->hasInformationAboutMonotonicity()) + if (!match.node && check_monotonicity && frame.node->function_base->hasInformationAboutMonotonicity()) { size_t num_const_args = 0; const ActionsDAG::Node * monotonic_child = nullptr; diff --git a/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.h b/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.h index dd689cba46b..223fc40e33f 100644 --- a/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.h +++ b/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.h @@ -39,5 +39,5 @@ struct MatchedTrees using Matches = std::unordered_map; }; -MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG & outer_dag); +MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG & outer_dag, bool check_monotonicity = true); } diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index f183bdca7a9..cf88de19f03 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -287,7 +287,7 @@ ActionsDAGPtr analyzeAggregateProjection( { auto proj_index = buildDAGIndex(*info.before_aggregation); - MatchedTrees::Matches matches = matchTrees(*info.before_aggregation, *query.dag); + MatchedTrees::Matches matches = matchTrees(*info.before_aggregation, *query.dag, false /* check_monotonicity */); // for (const auto & [node, match] : matches) // { From 60488e23912ba29ca0e75e2a39b1902517244e6b Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sun, 16 Jul 2023 23:38:42 +0800 Subject: [PATCH 1829/1997] Add tests --- ...ggregate_projection_with_monotonic_key_expr.reference | 1 + ...1710_aggregate_projection_with_monotonic_key_expr.sql | 9 +++++++++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.reference create mode 100644 tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.sql diff --git a/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.reference b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.sql b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.sql new file mode 100644 index 00000000000..c3109553f63 --- /dev/null +++ b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS t0; + +CREATE TABLE t0 (c0 Int16, projection h (SELECT min(c0), max(c0), count() GROUP BY -c0)) ENGINE = MergeTree ORDER BY (); + +INSERT INTO t0(c0) VALUES (1); + +SELECT count() FROM t0 GROUP BY gcd(-sign(c0), -c0); + +DROP TABLE t0; From d7bb006c231c3960e9c9f7a6f07cd8ba299d6422 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 17 Jul 2023 18:02:02 +0000 Subject: [PATCH 1830/1997] Fix monotonic chain for read-in-order as well. --- src/Processors/QueryPlan/Optimizations/actionsDAGUtils.cpp | 7 ++++++- ..._aggregate_projection_with_monotonic_key_expr.reference | 4 ++++ .../01710_aggregate_projection_with_monotonic_key_expr.sql | 7 +++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.cpp b/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.cpp index 1c18465e1e1..787a106200a 100644 --- a/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.cpp +++ b/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.cpp @@ -75,7 +75,12 @@ MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG } /// A node from found match may be nullptr. /// It means that node is visited, but no match was found. - frame.mapped_children.push_back(it->second.node); + if (it->second.monotonicity) + /// Ignore a match with monotonicity. + frame.mapped_children.push_back(nullptr); + else + frame.mapped_children.push_back(it->second.node); + } if (frame.mapped_children.size() < frame.node->children.size()) diff --git a/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.reference b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.reference index d00491fd7e5..06e9efbe839 100644 --- a/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.reference +++ b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.reference @@ -1 +1,5 @@ 1 +1 +-1 +1 +-1 diff --git a/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.sql b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.sql index c3109553f63..bed43ef6630 100644 --- a/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.sql +++ b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.sql @@ -1,4 +1,5 @@ DROP TABLE IF EXISTS t0; +DROP TABLE IF EXISTS t1; CREATE TABLE t0 (c0 Int16, projection h (SELECT min(c0), max(c0), count() GROUP BY -c0)) ENGINE = MergeTree ORDER BY (); @@ -6,4 +7,10 @@ INSERT INTO t0(c0) VALUES (1); SELECT count() FROM t0 GROUP BY gcd(-sign(c0), -c0); +create table t1 (c0 Int32) engine = MergeTree order by sin(c0); +insert into t1 values (-1), (1); +select c0 from t1 order by sin(-c0) settings optimize_read_in_order=0; +select c0 from t1 order by sin(-c0) settings optimize_read_in_order=1; + DROP TABLE t0; +DROP TABLE t1; From ec223372848014b79990bc05318862b8f8e76212 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 19 Jul 2023 10:54:26 +0800 Subject: [PATCH 1831/1997] Fix index analysis with indexHint as well --- src/Interpreters/ActionsVisitor.cpp | 8 ++++++++ ...regate_projection_with_monotonic_key_expr.reference | 1 + ...10_aggregate_projection_with_monotonic_key_expr.sql | 10 +++++++++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index efab11003f5..b769011e3d4 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -976,7 +976,15 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & if (node.name == "indexHint") { if (data.only_consts) + { + /// We need to collect constants inside `indexHint` for index analysis. + if (node.arguments) + { + for (const auto & arg : node.arguments->children) + visit(arg, data); + } return; + } /// Here we create a separate DAG for indexHint condition. /// It will be used only for index analysis. diff --git a/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.reference b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.reference index 06e9efbe839..1c8b399e790 100644 --- a/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.reference +++ b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.reference @@ -3,3 +3,4 @@ -1 1 -1 +0 diff --git a/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.sql b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.sql index bed43ef6630..5cd8ec87b4b 100644 --- a/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.sql +++ b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.sql @@ -1,16 +1,24 @@ DROP TABLE IF EXISTS t0; DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; CREATE TABLE t0 (c0 Int16, projection h (SELECT min(c0), max(c0), count() GROUP BY -c0)) ENGINE = MergeTree ORDER BY (); INSERT INTO t0(c0) VALUES (1); -SELECT count() FROM t0 GROUP BY gcd(-sign(c0), -c0); +SELECT count() FROM t0 GROUP BY gcd(-sign(c0), -c0) SETTINGS optimize_use_implicit_projections = 1; create table t1 (c0 Int32) engine = MergeTree order by sin(c0); insert into t1 values (-1), (1); select c0 from t1 order by sin(-c0) settings optimize_read_in_order=0; select c0 from t1 order by sin(-c0) settings optimize_read_in_order=1; +CREATE TABLE t2 (p Nullable(Int64), k Decimal(76, 39)) ENGINE = MergeTree PARTITION BY toDate(p) ORDER BY k SETTINGS index_granularity = 1, allow_nullable_key = 1; + +INSERT INTO t2 FORMAT Values ('2020-09-01 00:01:02', 1), ('2020-09-01 20:01:03', 2), ('2020-09-02 00:01:03', 3); + +SELECT count() FROM t2 WHERE indexHint(p = 1.) SETTINGS optimize_use_implicit_projections = 1; + DROP TABLE t0; DROP TABLE t1; +DROP TABLE t2; From 5560603321319243180a7d38b17be29e2f69cf30 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 19 Jul 2023 10:56:37 +0800 Subject: [PATCH 1832/1997] optimize_use_implicit_projections=1 by default --- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 6fb26994d2f..b8207b142d3 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -577,7 +577,7 @@ class IColumn; M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \ M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \ M(Bool, optimize_use_projections, true, "Automatically choose projections to perform SELECT query", 0) ALIAS(allow_experimental_projection_optimization) \ - M(Bool, optimize_use_implicit_projections, false, "Automatically choose implicit projections to perform SELECT query", 0) \ + M(Bool, optimize_use_implicit_projections, true, "Automatically choose implicit projections to perform SELECT query", 0) \ M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \ M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \ M(Bool, async_query_sending_for_remote, true, "Asynchronously create connections and send query to shards in remote query", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 3e58750e1d2..2886cdd288d 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -80,7 +80,6 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { - {"23.7", {{"optimize_use_implicit_projections", true, false, "Disable implicit projections due to unexpected results."}}}, {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, From 68e7583dbfaca500757ba0b8e3d3d859b89accfa Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 21 Jul 2023 14:53:06 +0800 Subject: [PATCH 1833/1997] reorganize tests and add some comments --- .../Optimizations/optimizeUseAggregateProjection.cpp | 3 +++ src/Storages/MergeTree/MergeTreeData.cpp | 4 +++- ...aggregate_projection_with_monotonic_key_expr.reference | 1 - ...01710_aggregate_projection_with_monotonic_key_expr.sql | 7 ------- tests/queries/0_stateless/01739_index_hint.reference | 5 +++++ tests/queries/0_stateless/01739_index_hint.sql | 8 ++++++++ 6 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index cf88de19f03..e611bb5b2ef 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -497,6 +497,9 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Projection sample block 2 {}", block.dumpStructure()); + // minmax_count_projection cannot be used used when there is no data to process, because + // it will produce incorrect result during constant aggregation. + // See https://github.com/ClickHouse/ClickHouse/issues/36728 if (block) { MinMaxProjectionCandidate minmax; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 9c5e45aa488..6c1375ecc1d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7027,7 +7027,9 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg max_added_blocks.get(), query_context); - // minmax_count_projection should not be used when there is no data to process. + // minmax_count_projection cannot be used used when there is no data to process, because + // it will produce incorrect result during constant aggregation. + // See https://github.com/ClickHouse/ClickHouse/issues/36728 if (!query_info.minmax_count_projection_block) return; diff --git a/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.reference b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.reference index 1c8b399e790..06e9efbe839 100644 --- a/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.reference +++ b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.reference @@ -3,4 +3,3 @@ -1 1 -1 -0 diff --git a/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.sql b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.sql index 5cd8ec87b4b..51dafb07b91 100644 --- a/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.sql +++ b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.sql @@ -13,12 +13,5 @@ insert into t1 values (-1), (1); select c0 from t1 order by sin(-c0) settings optimize_read_in_order=0; select c0 from t1 order by sin(-c0) settings optimize_read_in_order=1; -CREATE TABLE t2 (p Nullable(Int64), k Decimal(76, 39)) ENGINE = MergeTree PARTITION BY toDate(p) ORDER BY k SETTINGS index_granularity = 1, allow_nullable_key = 1; - -INSERT INTO t2 FORMAT Values ('2020-09-01 00:01:02', 1), ('2020-09-01 20:01:03', 2), ('2020-09-02 00:01:03', 3); - -SELECT count() FROM t2 WHERE indexHint(p = 1.) SETTINGS optimize_use_implicit_projections = 1; - DROP TABLE t0; DROP TABLE t1; -DROP TABLE t2; diff --git a/tests/queries/0_stateless/01739_index_hint.reference b/tests/queries/0_stateless/01739_index_hint.reference index 3a4b380de65..766dff8c7b0 100644 --- a/tests/queries/0_stateless/01739_index_hint.reference +++ b/tests/queries/0_stateless/01739_index_hint.reference @@ -33,3 +33,8 @@ insert into XXXX select number*60, 0 from numbers(100000); SELECT count() FROM XXXX WHERE indexHint(t = toDateTime(0)) SETTINGS optimize_use_implicit_projections = 1; 100000 drop table XXXX; +CREATE TABLE XXXX (p Nullable(Int64), k Decimal(76, 39)) ENGINE = MergeTree PARTITION BY toDate(p) ORDER BY k SETTINGS index_granularity = 1, allow_nullable_key = 1; +INSERT INTO XXXX FORMAT Values ('2020-09-01 00:01:02', 1), ('2020-09-01 20:01:03', 2), ('2020-09-02 00:01:03', 3); +SELECT count() FROM XXXX WHERE indexHint(p = 1.) SETTINGS optimize_use_implicit_projections = 1; +0 +drop table XXXX; diff --git a/tests/queries/0_stateless/01739_index_hint.sql b/tests/queries/0_stateless/01739_index_hint.sql index e1e66c630e1..77c2760535d 100644 --- a/tests/queries/0_stateless/01739_index_hint.sql +++ b/tests/queries/0_stateless/01739_index_hint.sql @@ -33,3 +33,11 @@ insert into XXXX select number*60, 0 from numbers(100000); SELECT count() FROM XXXX WHERE indexHint(t = toDateTime(0)) SETTINGS optimize_use_implicit_projections = 1; drop table XXXX; + +CREATE TABLE XXXX (p Nullable(Int64), k Decimal(76, 39)) ENGINE = MergeTree PARTITION BY toDate(p) ORDER BY k SETTINGS index_granularity = 1, allow_nullable_key = 1; + +INSERT INTO XXXX FORMAT Values ('2020-09-01 00:01:02', 1), ('2020-09-01 20:01:03', 2), ('2020-09-02 00:01:03', 3); + +SELECT count() FROM XXXX WHERE indexHint(p = 1.) SETTINGS optimize_use_implicit_projections = 1; + +drop table XXXX; From 0c2ea94efeeb9a68448d44c1eba08ad3898ef99f Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 21 Jul 2023 09:57:32 +0200 Subject: [PATCH 1834/1997] Update test --- .../0_stateless/02156_storage_merge_prewhere.reference | 2 +- tests/queries/0_stateless/02156_storage_merge_prewhere.sql | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference index 30f9b1ab175..74ba452d783 100644 --- a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference @@ -1,6 +1,6 @@ SELECT count() FROM t_02156_merge1 -PREWHERE (k = 3) AND notEmpty(v) +PREWHERE notEmpty(v) AND (k = 3) 2 SELECT count() FROM t_02156_merge2 diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql index b75d3fa22e5..83d88a68d9b 100644 --- a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql @@ -1,4 +1,5 @@ SET optimize_move_to_prewhere = 1; +SET enable_multiple_prewhere_read_steps = 1; DROP TABLE IF EXISTS t_02156_mt1; DROP TABLE IF EXISTS t_02156_mt2; @@ -8,8 +9,8 @@ DROP TABLE IF EXISTS t_02156_merge1; DROP TABLE IF EXISTS t_02156_merge2; DROP TABLE IF EXISTS t_02156_merge3; -CREATE TABLE t_02156_mt1 (k UInt32, v String) ENGINE = MergeTree ORDER BY k; -CREATE TABLE t_02156_mt2 (k UInt32, v String) ENGINE = MergeTree ORDER BY k; +CREATE TABLE t_02156_mt1 (k UInt32, v String) ENGINE = MergeTree ORDER BY k SETTINGS min_bytes_for_wide_part=0; +CREATE TABLE t_02156_mt2 (k UInt32, v String) ENGINE = MergeTree ORDER BY k SETTINGS min_bytes_for_wide_part=0; CREATE TABLE t_02156_log (k UInt32, v String) ENGINE = Log; CREATE TABLE t_02156_dist (k UInt32, v String) ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_02156_mt1); From d2dba496bf0c703178758b1c534c0914044d2094 Mon Sep 17 00:00:00 2001 From: StianBerger <111980234+StianBerger@users.noreply.github.com> Date: Fri, 21 Jul 2023 10:26:01 +0200 Subject: [PATCH 1835/1997] Update date-time-functions.md formatDateTime %r for 12-hour time, mentioned %H in equivalent, which is 24H. Replaced with %h. --- docs/en/sql-reference/functions/date-time-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index c6b978506a1..87d84425029 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1449,7 +1449,7 @@ Using replacement fields, you can define a pattern for the resulting string. “ | %n | new-line character (‘’) | | | %p | AM or PM designation | PM | | %Q | Quarter (1-4) | 1 | -| %r | 12-hour HH:MM AM/PM time, equivalent to %H:%i %p | 10:30 PM | +| %r | 12-hour HH:MM AM/PM time, equivalent to %h:%i %p | 10:30 PM | | %R | 24-hour HH:MM time, equivalent to %H:%i | 22:33 | | %s | second (00-59) | 44 | | %S | second (00-59) | 44 | From 53d77e6b1397e3621a81fc88da76aa9bac72ad75 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 19 Jul 2023 21:28:17 +0800 Subject: [PATCH 1836/1997] Add back missing projection QueryAccessInfo. --- src/Interpreters/Context.cpp | 13 ++-- src/Interpreters/Context.h | 1 + .../optimizeUseAggregateProjection.cpp | 18 +++++- .../optimizeUseNormalProjection.cpp | 9 ++- .../QueryPlan/ReadFromMergeTree.cpp | 4 ++ .../QueryPlan/ReadFromPreparedSource.cpp | 8 ++- .../QueryPlan/ReadFromPreparedSource.h | 3 +- ...0_query_log_with_projection_info.reference | 3 + .../01710_query_log_with_projection_info.sql | 64 +++++++++++++++++++ 9 files changed, 114 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/01710_query_log_with_projection_info.reference create mode 100644 tests/queries/0_stateless/01710_query_log_with_projection_info.sql diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 9e4d1e8d1e2..434fc1adb40 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1461,15 +1461,20 @@ void Context::addQueryAccessInfo( void Context::addQueryAccessInfo(const Names & partition_names) { if (isGlobalContext()) - { throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info"); - } std::lock_guard lock(query_access_info.mutex); for (const auto & partition_name : partition_names) - { query_access_info.partitions.emplace(partition_name); - } +} + +void Context::addQueryAccessInfo(const String & qualified_projection_name) +{ + if (isGlobalContext()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info"); + + std::lock_guard lock(query_access_info.mutex); + query_access_info.projections.emplace(qualified_projection_name); } void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 3a8d41bf130..3ce899bfb77 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -657,6 +657,7 @@ public: const String & projection_name = {}, const String & view_name = {}); void addQueryAccessInfo(const Names & partition_names); + void addQueryAccessInfo(const String & qualified_projection_name); /// Supported factories for records in query_log diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index f183bdca7a9..8c85435138c 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -625,7 +625,14 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & // candidates.minmax_projection->block.dumpStructure()); Pipe pipe(std::make_shared(std::move(candidates.minmax_projection->block))); - projection_reading = std::make_unique(std::move(pipe)); + projection_reading = std::make_unique( + std::move(pipe), + context, + query_info.is_internal ? "" + : fmt::format( + "{}.{}", + reading->getMergeTreeData().getStorageID().getFullTableName(), + backQuoteIfNeed(candidates.minmax_projection->candidate.projection->name))); has_ordinary_parts = !candidates.minmax_projection->normal_parts.empty(); if (has_ordinary_parts) @@ -658,7 +665,14 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & { auto header = proj_snapshot->getSampleBlockForColumns(best_candidate->dag->getRequiredColumnsNames()); Pipe pipe(std::make_shared(std::move(header))); - projection_reading = std::make_unique(std::move(pipe)); + projection_reading = std::make_unique( + std::move(pipe), + context, + query_info.is_internal ? "" + : fmt::format( + "{}.{}", + reading->getMergeTreeData().getStorageID().getFullTableName(), + backQuoteIfNeed(best_candidate->projection->name))); } has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index dd7a5d449bc..71db561e1c9 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -183,7 +183,14 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) if (!projection_reading) { Pipe pipe(std::make_shared(proj_snapshot->getSampleBlockForColumns(required_columns))); - projection_reading = std::make_unique(std::move(pipe)); + projection_reading = std::make_unique( + std::move(pipe), + context, + query_info.is_internal ? "" + : fmt::format( + "{}.{}", + reading->getMergeTreeData().getStorageID().getFullTableName(), + backQuoteIfNeed(best_candidate->projection->name))); } bool has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 13de5d1d140..82f47cc61d5 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1761,6 +1761,10 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons fmt::format("{}.{}", data.getStorageID().getFullNameNotQuoted(), part.data_part->info.partition_id)); } context->getQueryContext()->addQueryAccessInfo(partition_names); + + if (storage_snapshot->projection) + context->getQueryContext()->addQueryAccessInfo( + fmt::format("{}.{}", data.getStorageID().getFullTableName(), backQuoteIfNeed(storage_snapshot->projection->name))); } ProfileEvents::increment(ProfileEvents::SelectedParts, result.selected_parts); diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp index 7446203ec35..d50eec47ca8 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp @@ -1,17 +1,23 @@ +#include #include #include namespace DB { -ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_) +ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_, ContextPtr context_, const String & qualified_projection_name_) : ISourceStep(DataStream{.header = pipe_.getHeader()}) , pipe(std::move(pipe_)) + , context(context_) + , qualified_projection_name(qualified_projection_name_) { } void ReadFromPreparedSource::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { + if (context && context->hasQueryContext() && !qualified_projection_name.empty()) + context->getQueryContext()->addQueryAccessInfo(qualified_projection_name); + for (const auto & processor : pipe.getProcessors()) processors.emplace_back(processor); diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.h b/src/Processors/QueryPlan/ReadFromPreparedSource.h index 05e3ebd5102..5e64dcb7a4f 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.h +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.h @@ -9,7 +9,7 @@ namespace DB class ReadFromPreparedSource : public ISourceStep { public: - explicit ReadFromPreparedSource(Pipe pipe_); + explicit ReadFromPreparedSource(Pipe pipe_, ContextPtr context_ = nullptr, const String & qualified_projection_name_ = ""); String getName() const override { return "ReadFromPreparedSource"; } @@ -18,6 +18,7 @@ public: protected: Pipe pipe; ContextPtr context; + String qualified_projection_name; }; class ReadFromStorageStep : public ReadFromPreparedSource diff --git a/tests/queries/0_stateless/01710_query_log_with_projection_info.reference b/tests/queries/0_stateless/01710_query_log_with_projection_info.reference new file mode 100644 index 00000000000..9c2e9df6662 --- /dev/null +++ b/tests/queries/0_stateless/01710_query_log_with_projection_info.reference @@ -0,0 +1,3 @@ +t.t_normal +t.t_agg +t._minmax_count_projection diff --git a/tests/queries/0_stateless/01710_query_log_with_projection_info.sql b/tests/queries/0_stateless/01710_query_log_with_projection_info.sql new file mode 100644 index 00000000000..25e7e8fed60 --- /dev/null +++ b/tests/queries/0_stateless/01710_query_log_with_projection_info.sql @@ -0,0 +1,64 @@ +set log_queries=1; +set log_queries_min_type='QUERY_FINISH'; +set optimize_use_implicit_projections=1; + +DROP TABLE IF EXISTS t; + +CREATE TABLE t +( + `id` UInt64, + `id2` UInt64, + `id3` UInt64, + PROJECTION t_normal + ( + SELECT + id, + id2, + id3 + ORDER BY + id2, + id, + id3 + ), + PROJECTION t_agg + ( + SELECT + sum(id3) + GROUP BY id2 + ) +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity = 8; + +insert into t SELECT number, -number, number FROM numbers(10000); + +SELECT * FROM t WHERE id2 = 3 FORMAT Null; +SELECT sum(id3) FROM t GROUP BY id2 FORMAT Null; +SELECT min(id) FROM t FORMAT Null; + +SYSTEM FLUSH LOGS; + +SELECT + --Remove the prefix string which is a mutable database name. + arrayStringConcat(arrayPopFront(splitByString('.', projections[1])), '.') +FROM + system.query_log +WHERE + current_database=currentDatabase() and query = 'SELECT * FROM t WHERE id2 = 3 FORMAT Null;'; + +SELECT + --Remove the prefix string which is a mutable database name. + arrayStringConcat(arrayPopFront(splitByString('.', projections[1])), '.') +FROM + system.query_log +WHERE + current_database=currentDatabase() and query = 'SELECT sum(id3) FROM t GROUP BY id2 FORMAT Null;'; + +SELECT + --Remove the prefix string which is a mutable database name. + arrayStringConcat(arrayPopFront(splitByString('.', projections[1])), '.') +FROM + system.query_log +WHERE + current_database=currentDatabase() and query = 'SELECT min(id) FROM t FORMAT Null;'; From 8187118232371630fb10ee4062b8a52285003fa0 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 20 Jul 2023 11:12:22 +0800 Subject: [PATCH 1837/1997] Better code --- src/Interpreters/Context.cpp | 8 ++++++-- src/Interpreters/Context.h | 9 ++++++++- .../optimizeUseAggregateProjection.cpp | 18 ++++++++---------- .../optimizeUseNormalProjection.cpp | 9 ++++----- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 2 +- .../QueryPlan/ReadFromPreparedSource.cpp | 9 ++++----- .../QueryPlan/ReadFromPreparedSource.h | 7 +++++-- 7 files changed, 36 insertions(+), 26 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 434fc1adb40..cc1277e08b9 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1468,13 +1468,17 @@ void Context::addQueryAccessInfo(const Names & partition_names) query_access_info.partitions.emplace(partition_name); } -void Context::addQueryAccessInfo(const String & qualified_projection_name) +void Context::addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name) { + if (!qualified_projection_name) + return; + if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info"); std::lock_guard lock(query_access_info.mutex); - query_access_info.projections.emplace(qualified_projection_name); + query_access_info.projections.emplace(fmt::format( + "{}.{}", qualified_projection_name.storage_id.getFullTableName(), backQuoteIfNeed(qualified_projection_name.projection_name))); } void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 3ce899bfb77..fa210f04451 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -657,7 +657,14 @@ public: const String & projection_name = {}, const String & view_name = {}); void addQueryAccessInfo(const Names & partition_names); - void addQueryAccessInfo(const String & qualified_projection_name); + + struct QualifiedProjectionName + { + StorageID storage_id = StorageID::createEmpty(); + String projection_name; + explicit operator bool() const { return !projection_name.empty(); } + }; + void addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name); /// Supported factories for records in query_log diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 8c85435138c..fa6a7f5b8ea 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -628,11 +628,10 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & projection_reading = std::make_unique( std::move(pipe), context, - query_info.is_internal ? "" - : fmt::format( - "{}.{}", - reading->getMergeTreeData().getStorageID().getFullTableName(), - backQuoteIfNeed(candidates.minmax_projection->candidate.projection->name))); + query_info.is_internal ? Context::QualifiedProjectionName{} + : Context::QualifiedProjectionName{ + .storage_id = reading->getMergeTreeData().getStorageID(), + .projection_name = candidates.minmax_projection->candidate.projection->name}); has_ordinary_parts = !candidates.minmax_projection->normal_parts.empty(); if (has_ordinary_parts) @@ -668,11 +667,10 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & projection_reading = std::make_unique( std::move(pipe), context, - query_info.is_internal ? "" - : fmt::format( - "{}.{}", - reading->getMergeTreeData().getStorageID().getFullTableName(), - backQuoteIfNeed(best_candidate->projection->name))); + query_info.is_internal + ? Context::QualifiedProjectionName{} + : Context::QualifiedProjectionName{ + .storage_id = reading->getMergeTreeData().getStorageID(), .projection_name = best_candidate->projection->name}); } has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index 71db561e1c9..93d1be20e81 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -186,11 +186,10 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) projection_reading = std::make_unique( std::move(pipe), context, - query_info.is_internal ? "" - : fmt::format( - "{}.{}", - reading->getMergeTreeData().getStorageID().getFullTableName(), - backQuoteIfNeed(best_candidate->projection->name))); + query_info.is_internal + ? Context::QualifiedProjectionName{} + : Context::QualifiedProjectionName{ + .storage_id = reading->getMergeTreeData().getStorageID(), .projection_name = best_candidate->projection->name}); } bool has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 82f47cc61d5..2d2412f7e36 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1764,7 +1764,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons if (storage_snapshot->projection) context->getQueryContext()->addQueryAccessInfo( - fmt::format("{}.{}", data.getStorageID().getFullTableName(), backQuoteIfNeed(storage_snapshot->projection->name))); + Context::QualifiedProjectionName{.storage_id = data.getStorageID(), .projection_name = storage_snapshot->projection->name}); } ProfileEvents::increment(ProfileEvents::SelectedParts, result.selected_parts); diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp index d50eec47ca8..a24c4dbe4d0 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp @@ -1,21 +1,20 @@ -#include #include #include namespace DB { -ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_, ContextPtr context_, const String & qualified_projection_name_) +ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_, ContextPtr context_, Context::QualifiedProjectionName qualified_projection_name_) : ISourceStep(DataStream{.header = pipe_.getHeader()}) , pipe(std::move(pipe_)) - , context(context_) - , qualified_projection_name(qualified_projection_name_) + , context(std::move(context_)) + , qualified_projection_name(std::move(qualified_projection_name_)) { } void ReadFromPreparedSource::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - if (context && context->hasQueryContext() && !qualified_projection_name.empty()) + if (context && context->hasQueryContext()) context->getQueryContext()->addQueryAccessInfo(qualified_projection_name); for (const auto & processor : pipe.getProcessors()) diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.h b/src/Processors/QueryPlan/ReadFromPreparedSource.h index 5e64dcb7a4f..2606f501009 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.h +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.h @@ -1,4 +1,6 @@ #pragma once + +#include #include #include @@ -9,7 +11,8 @@ namespace DB class ReadFromPreparedSource : public ISourceStep { public: - explicit ReadFromPreparedSource(Pipe pipe_, ContextPtr context_ = nullptr, const String & qualified_projection_name_ = ""); + explicit ReadFromPreparedSource( + Pipe pipe_, ContextPtr context_ = nullptr, Context::QualifiedProjectionName qualified_projection_name_ = {}); String getName() const override { return "ReadFromPreparedSource"; } @@ -18,7 +21,7 @@ public: protected: Pipe pipe; ContextPtr context; - String qualified_projection_name; + Context::QualifiedProjectionName qualified_projection_name; }; class ReadFromStorageStep : public ReadFromPreparedSource From 2cad81731be0443b50e66e43fb68b2b064d67a77 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 21 Jul 2023 16:46:56 +0800 Subject: [PATCH 1838/1997] Try to fix style issues --- .../optimizeUseAggregateProjection.cpp | 19 ++++++++++++------- .../optimizeUseNormalProjection.cpp | 7 +++++-- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index fa6a7f5b8ea..53f47bcdf95 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -628,11 +628,13 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & projection_reading = std::make_unique( std::move(pipe), context, - query_info.is_internal ? Context::QualifiedProjectionName{} - : Context::QualifiedProjectionName{ - .storage_id = reading->getMergeTreeData().getStorageID(), - .projection_name = candidates.minmax_projection->candidate.projection->name}); - + query_info.is_internal + ? Context::QualifiedProjectionName{} + : Context::QualifiedProjectionName + { + .storage_id = reading->getMergeTreeData().getStorageID(), + .projection_name = candidates.minmax_projection->candidate.projection->name, + }); has_ordinary_parts = !candidates.minmax_projection->normal_parts.empty(); if (has_ordinary_parts) reading->resetParts(std::move(candidates.minmax_projection->normal_parts)); @@ -669,8 +671,11 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & context, query_info.is_internal ? Context::QualifiedProjectionName{} - : Context::QualifiedProjectionName{ - .storage_id = reading->getMergeTreeData().getStorageID(), .projection_name = best_candidate->projection->name}); + : Context::QualifiedProjectionName + { + .storage_id = reading->getMergeTreeData().getStorageID(), + .projection_name = best_candidate->projection->name, + }); } has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index 93d1be20e81..f6ace6f8025 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -188,8 +188,11 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) context, query_info.is_internal ? Context::QualifiedProjectionName{} - : Context::QualifiedProjectionName{ - .storage_id = reading->getMergeTreeData().getStorageID(), .projection_name = best_candidate->projection->name}); + : Context::QualifiedProjectionName + { + .storage_id = reading->getMergeTreeData().getStorageID(), + .projection_name = best_candidate->projection->name, + }); } bool has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; From 5fa45bdbeaef99ba6a7db894d89dc749b7ac3f97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20G=C3=B3ralski?= Date: Fri, 21 Jul 2023 12:12:34 +0200 Subject: [PATCH 1839/1997] Setting the metric FilesystemCacheSizeLimit in LRUFileCachePriority.h --- src/Interpreters/Cache/LRUFileCachePriority.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index e0d7d45062a..662a76968bc 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -5,6 +5,12 @@ #include #include +namespace CurrentMetrics +{ + extern const Metric FilesystemCacheSizeLimit; +} + + namespace DB { @@ -18,7 +24,9 @@ private: using LRUQueueIterator = typename LRUQueue::iterator; public: - LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_) {} + LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_) { + CurrentMetrics::set(CurrentMetrics::FilesystemCacheSizeLimit, max_size_); + } size_t getSize(const CacheGuard::Lock &) const override { return current_size; } From 930d45303c5b96b7553d611e82e0c94215ef5705 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20G=C3=B3ralski?= Date: Fri, 21 Jul 2023 12:13:38 +0200 Subject: [PATCH 1840/1997] removing the metric set from LRUFileCachePriority.cpp --- src/Interpreters/Cache/LRUFileCachePriority.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index 33e567b7a76..18862e154da 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -7,7 +7,6 @@ namespace CurrentMetrics { extern const Metric FilesystemCacheSize; - extern const Metric FilesystemCacheSizeLimit; extern const Metric FilesystemCacheElements; } @@ -102,7 +101,6 @@ void LRUFileCachePriority::updateSize(int64_t size) { current_size += size; CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size); - CurrentMetrics::set(CurrentMetrics::FilesystemCacheSizeLimit, getSizeLimit()); } void LRUFileCachePriority::updateElementsCount(int64_t num) From 3412dd225919f3850dfb4c0f8647e74e6630e31f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20G=C3=B3ralski?= Date: Fri, 21 Jul 2023 12:14:30 +0200 Subject: [PATCH 1841/1997] removed unnecessary whitespace --- src/Interpreters/Cache/LRUFileCachePriority.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index 662a76968bc..9396070b792 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -10,7 +10,6 @@ namespace CurrentMetrics extern const Metric FilesystemCacheSizeLimit; } - namespace DB { From 714a3a8d121326e2d908648bfc0e76e09f6e0815 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Fri, 21 Jul 2023 12:23:22 +0200 Subject: [PATCH 1842/1997] Don't do it on drop --- src/Storages/StorageReplicatedMergeTree.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 06f5330f6d9..841b646a126 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3994,6 +3994,13 @@ void StorageReplicatedMergeTree::addLastSentPart(const MergeTreePartInfo & info) void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(StorageReplicatedMergeTree::ShutdownDeadline shutdown_deadline_) { + /// Will be true in case in case of query + if (CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr) + { + LOG_TRACE(log, "Will not wait for unique parts to be fetched by other replicas because shutdown called from DROP/DETACH query"); + return; + } + if (!shutdown_called.load()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Called waitForUniquePartsToBeFetchedByOtherReplicas before shutdown, it's a bug"); @@ -4951,7 +4958,6 @@ void StorageReplicatedMergeTree::shutdown() flushAndPrepareForShutdown(); - auto settings_ptr = getSettings(); if (!shutdown_deadline.has_value()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Shutdown deadline is not set in shutdown"); @@ -6311,7 +6317,7 @@ bool StorageReplicatedMergeTree::tryWaitForReplicaToProcessLogEntry( const auto & stop_waiting = [&]() { - bool stop_waiting_itself = waiting_itself && partial_shutdown_called; + bool stop_waiting_itself = waiting_itself && (partial_shutdown_called || shutdown_prepared_called || shutdown_called); bool timeout_exceeded = check_timeout && wait_for_inactive_timeout < time_waiting.elapsedSeconds(); bool stop_waiting_inactive = (!wait_for_inactive || timeout_exceeded) && !getZooKeeper()->exists(fs::path(table_zookeeper_path) / "replicas" / replica / "is_active"); From e638a9ecd3cebe83c0c3997b19e0e73d1fb14639 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 21 Jul 2023 12:24:36 +0200 Subject: [PATCH 1843/1997] Fix style check --- src/Interpreters/Cache/LRUFileCachePriority.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index 9396070b792..e041e59a91a 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -23,7 +23,8 @@ private: using LRUQueueIterator = typename LRUQueue::iterator; public: - LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_) { + LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_) + { CurrentMetrics::set(CurrentMetrics::FilesystemCacheSizeLimit, max_size_); } From 3815a6aa62414fcd4e51090450a08bc368fd8c86 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 21 Jul 2023 11:14:39 +0000 Subject: [PATCH 1844/1997] Make test_keeper_force_recovery more stable --- .../configs/enable_keeper1.xml | 2 ++ .../configs/enable_keeper2.xml | 2 ++ .../configs/enable_keeper3.xml | 2 ++ .../configs/enable_keeper4.xml | 2 ++ .../configs/enable_keeper5.xml | 2 ++ .../configs/enable_keeper6.xml | 2 ++ .../configs/enable_keeper7.xml | 2 ++ .../configs/enable_keeper8.xml | 2 ++ .../configs/recovered_keeper1.xml | 2 ++ .../configs/use_keeper.xml | 36 ------------------- .../test_keeper_force_recovery/test.py | 5 +-- .../configs/enable_keeper1.xml | 2 ++ .../configs/enable_keeper1_solo.xml | 2 ++ .../configs/enable_keeper2.xml | 2 ++ .../configs/enable_keeper3.xml | 2 ++ .../configs/use_keeper.xml | 16 --------- .../test.py | 5 +-- 17 files changed, 28 insertions(+), 60 deletions(-) delete mode 100644 tests/integration/test_keeper_force_recovery/configs/use_keeper.xml delete mode 100644 tests/integration/test_keeper_force_recovery_single_node/configs/use_keeper.xml diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper1.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper1.xml index b7f9d1b058e..62109ee9092 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper1.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper1.xml @@ -1,5 +1,6 @@ + false 9181 1 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper2.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper2.xml index b773d59f259..2696c573180 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper2.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper2.xml @@ -1,5 +1,6 @@ + false 9181 2 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper3.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper3.xml index d4c2befc10f..fc0c0fd0300 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper3.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper3.xml @@ -1,5 +1,6 @@ + false 9181 3 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper4.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper4.xml index c039e709c9e..06f1c1d7195 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper4.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper4.xml @@ -1,5 +1,6 @@ + false 9181 4 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper5.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper5.xml index fb43b6524c8..5d3767ae969 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper5.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper5.xml @@ -1,5 +1,6 @@ + false 9181 5 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper6.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper6.xml index 430e662bf36..4d30822741a 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper6.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper6.xml @@ -1,5 +1,6 @@ + false 9181 6 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper7.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper7.xml index aa10774ad7d..b59141042ea 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper7.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper7.xml @@ -1,5 +1,6 @@ + false 9181 7 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper8.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper8.xml index 4f1c21a1084..711d70cb1ac 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper8.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper8.xml @@ -1,5 +1,6 @@ + false 9181 8 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/recovered_keeper1.xml b/tests/integration/test_keeper_force_recovery/configs/recovered_keeper1.xml index eaf0f01afc9..abd4ef85bf7 100644 --- a/tests/integration/test_keeper_force_recovery/configs/recovered_keeper1.xml +++ b/tests/integration/test_keeper_force_recovery/configs/recovered_keeper1.xml @@ -1,5 +1,6 @@ + false 9181 1 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/use_keeper.xml b/tests/integration/test_keeper_force_recovery/configs/use_keeper.xml deleted file mode 100644 index f41e8c6e49c..00000000000 --- a/tests/integration/test_keeper_force_recovery/configs/use_keeper.xml +++ /dev/null @@ -1,36 +0,0 @@ - - - - node1 - 9181 - - - node2 - 9181 - - - node3 - 9181 - - - node4 - 9181 - - - node5 - 9181 - - - node6 - 9181 - - - node7 - 9181 - - - node8 - 9181 - - - diff --git a/tests/integration/test_keeper_force_recovery/test.py b/tests/integration/test_keeper_force_recovery/test.py index f7c3787b4d8..f630e5a422b 100644 --- a/tests/integration/test_keeper_force_recovery/test.py +++ b/tests/integration/test_keeper_force_recovery/test.py @@ -22,10 +22,7 @@ def get_nodes(): nodes.append( cluster.add_instance( f"node{i+1}", - main_configs=[ - f"configs/enable_keeper{i+1}.xml", - f"configs/use_keeper.xml", - ], + main_configs=[f"configs/enable_keeper{i+1}.xml"], stay_alive=True, ) ) diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml index 441c1bc185d..94e59128bd3 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml +++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml @@ -1,5 +1,6 @@ + false 9181 1 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml index f0cb887b062..6367b4b4c29 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml +++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml @@ -1,5 +1,6 @@ + false 1 9181 1 @@ -11,6 +12,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml index e2e2c1fd7db..548d12c2e0a 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml +++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml @@ -1,5 +1,6 @@ + false 9181 2 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml index e2ac0400d88..65f9675cbd6 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml +++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml @@ -1,5 +1,6 @@ + false 9181 3 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/use_keeper.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/use_keeper.xml deleted file mode 100644 index 384e984f210..00000000000 --- a/tests/integration/test_keeper_force_recovery_single_node/configs/use_keeper.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - - node1 - 9181 - - - node2 - 9181 - - - node3 - 9181 - - - diff --git a/tests/integration/test_keeper_force_recovery_single_node/test.py b/tests/integration/test_keeper_force_recovery_single_node/test.py index 1c0d5e9a306..132c5488df6 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/test.py +++ b/tests/integration/test_keeper_force_recovery_single_node/test.py @@ -20,10 +20,7 @@ def get_nodes(): nodes.append( cluster.add_instance( f"node{i+1}", - main_configs=[ - f"configs/enable_keeper{i+1}.xml", - f"configs/use_keeper.xml", - ], + main_configs=[f"configs/enable_keeper{i+1}.xml"], stay_alive=True, ) ) From 8970a531453cbfd9916681bf658ba605db0129de Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 21 Jul 2023 11:38:46 +0000 Subject: [PATCH 1845/1997] Fix `countSubstrings()` hang with empty needle and a column haystack --- src/Functions/CountSubstringsImpl.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Functions/CountSubstringsImpl.h b/src/Functions/CountSubstringsImpl.h index de00e9397d6..1501e50afcf 100644 --- a/src/Functions/CountSubstringsImpl.h +++ b/src/Functions/CountSubstringsImpl.h @@ -49,6 +49,9 @@ struct CountSubstringsImpl /// FIXME: suboptimal memset(&res[0], 0, res.size() * sizeof(res[0])); + if (needle.empty()) + return; // Return all zeros + /// Current index in the array of strings. size_t i = 0; From 66c8066cd59a685ded94af69d2cc37469cb29d77 Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 21 Jul 2023 11:45:44 +0000 Subject: [PATCH 1846/1997] add test --- tests/queries/0_stateless/01590_countSubstrings.reference | 2 ++ tests/queries/0_stateless/01590_countSubstrings.sql | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/01590_countSubstrings.reference b/tests/queries/0_stateless/01590_countSubstrings.reference index 056cb4c53b6..95031cd3856 100644 --- a/tests/queries/0_stateless/01590_countSubstrings.reference +++ b/tests/queries/0_stateless/01590_countSubstrings.reference @@ -7,6 +7,8 @@ empty 0 0 0 +0 +0 char 1 2 diff --git a/tests/queries/0_stateless/01590_countSubstrings.sql b/tests/queries/0_stateless/01590_countSubstrings.sql index dd2122ed6ff..6d2d87b1260 100644 --- a/tests/queries/0_stateless/01590_countSubstrings.sql +++ b/tests/queries/0_stateless/01590_countSubstrings.sql @@ -12,6 +12,8 @@ select 'empty'; select countSubstrings('', '.'); select countSubstrings('', ''); select countSubstrings('.', ''); +select countSubstrings(toString(number), '') from numbers(1); +select countSubstrings('', toString(number)) from numbers(1); select 'char'; select countSubstrings('foobar.com', '.'); From 0f969923229375d72faac15257fc70bd7ece9095 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 16 Jul 2023 08:07:50 +0200 Subject: [PATCH 1847/1997] Fix possible EADDRINUSE ("Address already in use") in integration tests Here is one example [1]: minio1_1 | WARNING: Console endpoint is listening on a dynamic port (32911), please use --console-address ":PORT" to choose a static port. minio1_1 | ERROR Unable to initialize console server: Specified port is already in use minio1_1 | > Please ensure no other program uses the same address/port [1]: https://s3.amazonaws.com/clickhouse-test-reports/52103/7d510eac7c5f0dfb3361e269be30972e6022fada/integration_tests__tsan__[1_6].html And here is second [2]: java.net.BindException: Problem binding to [0.0.0.0:50020] java.net.BindException: Address already in use; For more details see: http://wiki.apache.org/hadoop/BindException [2]: https://s3.amazonaws.com/clickhouse-test-reports/51493/63e88b725d3d255a6534adce4d434ce5f95d2874/integration_tests__asan__[1_6].html v2: increase the limit from 5K to 10K Signed-off-by: Azat Khuzhin --- tests/integration/conftest.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 5933883f7b0..968571bfdde 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -12,6 +12,22 @@ from helpers.network import _NetworkManager logging.raiseExceptions = False +@pytest.fixture(autouse=True, scope="session") +def tune_local_port_range(): + # Lots of services uses non privileged ports: + # - hdfs -- 50020/50070/... + # - minio + # - mysql + # - psql + # + # So instead of tuning all these thirdparty services, let's simply + # prohibit using such ports for outgoing connections, this should fix + # possible "Address already in use" errors. + # + # NOTE: 5K is not enough, and sometimes leads to EADDRNOTAVAIL error. + run_and_check(["sysctl net.ipv4.ip_local_port_range='55000 65535'"], shell=True) + + @pytest.fixture(autouse=True, scope="session") def cleanup_environment(): try: From 1fe8076b9422e09b78887b44a724cf1cae5d7fd8 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 20 Jul 2023 12:38:57 +0200 Subject: [PATCH 1848/1997] Fix capabilities installed via systemd service (fixes netlink/IO priorities) CapabilityBoundingSet that contained in systemd unit before is about allowing to set some capabilities, not about granting them. To grant them you need to use AmbientCapabilities. And if you do not use 'clickhouse install' then: - IO priorities was unavailable (since they requires CAP_SYS_NICE) - For taskstats the procfs was used instead of netlink Not a big deal, but still. Here how it had been tested: $ systemd-run -p CapabilityBoundingSet=CAP_NET_ADMIN --shell root:/etc (master)# capsh --print Current: cap_net_admin=ep Bounding set =cap_net_admin Ambient set = $ systemd-run -p User=azat -p CapabilityBoundingSet=CAP_NET_ADMIN --shell azat:/etc$ capsh --print Current: = Bounding set =cap_net_admin Ambient set = $ systemd-run -p User=azat -p AmbientCapabilities=CAP_NET_ADMIN -p CapabilityBoundingSet=CAP_NET_ADMIN --shell azat:/etc$ capsh --print Current: cap_net_admin=eip Bounding set =cap_net_admin Ambient set =cap_net_admin Note, if you are running it under root (without changing user) you don't need to specify AmbientCapabilities additionally, because root has all capabilities by default and they had been inherited. Signed-off-by: Azat Khuzhin --- packages/clickhouse-server.service | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/clickhouse-server.service b/packages/clickhouse-server.service index 7742d8b278a..42dc5bd380d 100644 --- a/packages/clickhouse-server.service +++ b/packages/clickhouse-server.service @@ -29,6 +29,7 @@ EnvironmentFile=-/etc/default/clickhouse LimitCORE=infinity LimitNOFILE=500000 CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE +AmbientCapabilities=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE [Install] # ClickHouse should not start from the rescue shell (rescue.target). From b45c2c939b974ea3306f9c2192f362d71a69c0e2 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 21 Jul 2023 15:17:07 +0300 Subject: [PATCH 1849/1997] disable expression templates for time intervals (#52335) --- .../Impl/ConstantExpressionTemplate.cpp | 33 +++++++++++++++++++ .../Formats/Impl/ConstantExpressionTemplate.h | 2 ++ .../Formats/Impl/ValuesBlockInputFormat.cpp | 5 +++ ...2830_insert_values_time_interval.reference | 4 +++ .../02830_insert_values_time_interval.sql | 25 ++++++++++++++ 5 files changed, 69 insertions(+) create mode 100644 tests/queries/0_stateless/02830_insert_values_time_interval.reference create mode 100644 tests/queries/0_stateless/02830_insert_values_time_interval.sql diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 5d438d47de6..06efe0a20aa 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -177,6 +177,14 @@ private: if (function.name == "lambda") return; + /// Parsing of INTERVALs is quite hacky. Expressions are rewritten during parsing like this: + /// "now() + interval 1 day" -> "now() + toIntervalDay(1)" + /// "select now() + INTERVAL '1 day 1 hour 1 minute'" -> "now() + (toIntervalDay(1), toIntervalHour(1), toIntervalMinute(1))" + /// so the AST is completely different from the original expression . + /// Avoid extracting these literals and simply compare tokens. It makes the template less flexible but much simpler. + if (function.name.starts_with("toInterval")) + return; + FunctionOverloadResolverPtr builder = FunctionFactory::instance().get(function.name, context); /// Do not replace literals which must be constant ColumnNumbers dont_visit_children = builder->getArgumentsThatAreAlwaysConstant(); @@ -350,6 +358,31 @@ ConstantExpressionTemplate::TemplateStructure::TemplateStructure(LiteralsInfo & } +String ConstantExpressionTemplate::TemplateStructure::dumpTemplate() const +{ + WriteBufferFromOwnString res; + + size_t cur_column = 0; + size_t cur_token = 0; + size_t num_columns = literals.columns(); + while (cur_column < num_columns) + { + size_t skip_tokens_until = token_after_literal_idx[cur_column]; + while (cur_token < skip_tokens_until) + res << quote << tokens[cur_token++] << ", "; + + const DataTypePtr & type = literals.getByPosition(cur_column).type; + res << type->getName() << ", "; + ++cur_column; + } + + while (cur_token < tokens.size()) + res << quote << tokens[cur_token++] << ", "; + + res << "eof"; + return res.str(); +} + size_t ConstantExpressionTemplate::TemplateStructure::getTemplateHash(const ASTPtr & expression, const LiteralsInfo & replaced_literals, const DataTypePtr & result_column_type, diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.h b/src/Processors/Formats/Impl/ConstantExpressionTemplate.h index fbb3cbcd22a..71d0d0f7134 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.h +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.h @@ -31,6 +31,8 @@ class ConstantExpressionTemplate : boost::noncopyable static size_t getTemplateHash(const ASTPtr & expression, const LiteralsInfo & replaced_literals, const DataTypePtr & result_column_type, bool null_as_default, const String & salt); + String dumpTemplate() const; + String result_column_name; std::vector tokens; diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index d61e723fd75..3a65a6fe4ea 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -474,6 +475,10 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx context, &found_in_cache, delimiter); + + LOG_TEST(&Poco::Logger::get("ValuesBlockInputFormat"), "Will use an expression template to parse column {}: {}", + column_idx, structure->dumpTemplate()); + templates[column_idx].emplace(structure); if (found_in_cache) ++attempts_to_deduce_template_cached[column_idx]; diff --git a/tests/queries/0_stateless/02830_insert_values_time_interval.reference b/tests/queries/0_stateless/02830_insert_values_time_interval.reference new file mode 100644 index 00000000000..b5b57fbfbfe --- /dev/null +++ b/tests/queries/0_stateless/02830_insert_values_time_interval.reference @@ -0,0 +1,4 @@ +1 2023-07-21 22:54:02 +2 2023-07-21 21:53:01 +3 2023-07-21 21:53:01 +4 2023-07-20 21:54:02 diff --git a/tests/queries/0_stateless/02830_insert_values_time_interval.sql b/tests/queries/0_stateless/02830_insert_values_time_interval.sql new file mode 100644 index 00000000000..f5d5d8a4c04 --- /dev/null +++ b/tests/queries/0_stateless/02830_insert_values_time_interval.sql @@ -0,0 +1,25 @@ + +DROP TABLE IF EXISTS t1; + +CREATE TABLE t1 +( + c1 DateTime DEFAULT now() NOT NULL, + c2 DateTime DEFAULT now() NOT NULL, + c3 DateTime DEFAULT now() NOT NULL, + PRIMARY KEY(c1, c2, c3) +) ENGINE = MergeTree() +ORDER BY (c1, c2, c3); + +INSERT INTO t1 (c1,c2,c3) VALUES(now() + INTERVAL '1 day 1 hour 1 minute 1 second', now(), now()); + +DROP TABLE t1; + +CREATE TABLE t1 (n int, dt DateTime) ENGINE=Memory; + +SET input_format_values_interpret_expressions=0; +INSERT INTO t1 VALUES (1, toDateTime('2023-07-20 21:53:01') + INTERVAL '1 day 1 hour 1 minute 1 second'), (2, toDateTime('2023-07-20 21:53:01') + INTERVAL '1 day'); +INSERT INTO t1 VALUES (3, toDateTime('2023-07-20 21:53:01') + INTERVAL 1 DAY), (4, toDateTime('2023-07-20 21:53:01') + (toIntervalMinute(1), toIntervalSecond(1))); + +SELECT * FROM t1 ORDER BY n; + +DROP TABLE t1; From 3798bd6f509b7fc0591201c66c2e0d1b254835dd Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 21 Jul 2023 12:52:07 +0000 Subject: [PATCH 1850/1997] Replace test by text_to_encrypt --- src/Common/examples/encrypt_decrypt.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/examples/encrypt_decrypt.cpp b/src/Common/examples/encrypt_decrypt.cpp index 2d8c5a5f61f..503802016cb 100644 --- a/src/Common/examples/encrypt_decrypt.cpp +++ b/src/Common/examples/encrypt_decrypt.cpp @@ -7,7 +7,7 @@ * Keys for codecs are loaded from section of configuration file. * * How to use: - * ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV test + * ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV text_to_encrypt */ int main(int argc, char ** argv) @@ -22,7 +22,7 @@ int main(int argc, char ** argv) << "action: -e for encryption and -d for decryption." << std::endl << "codec: AES_128_GCM_SIV or AES_256_GCM_SIV." << std::endl << std::endl << "Example:" << std::endl - << " ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV test"; + << " ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV text_to_encrypt"; return 3; } From 8ec8388a9ef063beb02b430ae4b89dfe5bab9ddd Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 21 Jul 2023 14:53:02 +0200 Subject: [PATCH 1851/1997] Update gtest_lru_file_cache.cpp --- src/Interpreters/tests/gtest_lru_file_cache.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp index b9d12c8ed42..12e7d9372f7 100644 --- a/src/Interpreters/tests/gtest_lru_file_cache.cpp +++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp @@ -470,6 +470,7 @@ TEST_F(FileCacheTest, get) auto & file_segment2 = get(holder2, 2); ASSERT_TRUE(file_segment2.getOrSetDownloader() != FileSegment::getCallerId()); + ASSERT_EQ(file_segment2.state(), State::DOWNLOADING); { std::lock_guard lock(mutex); @@ -478,8 +479,7 @@ TEST_F(FileCacheTest, get) cv.notify_one(); file_segment2.wait(file_segment2.range().right); - file_segment2.complete(); - ASSERT_TRUE(file_segment2.state() == State::DOWNLOADED); + ASSERT_EQ(file_segment2.getDownloadedSize(false), file_segment2.range().size()); }); { @@ -488,7 +488,8 @@ TEST_F(FileCacheTest, get) } download(file_segment); - ASSERT_TRUE(file_segment.state() == State::DOWNLOADED); + ASSERT_EQ(file_segment.state(), State::DOWNLOADED); + file_segment.completePartAndResetDownloader(); other_1.join(); From 4695ec6802c80d25e93a7b523821840c10a3b200 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Fri, 21 Jul 2023 14:56:29 +0200 Subject: [PATCH 1852/1997] Add an ability to specify allocations size for sampling memory profiler --- programs/server/Server.cpp | 21 ++++++++++----- src/Common/MemoryTracker.cpp | 11 ++++++-- src/Common/MemoryTracker.h | 18 +++++++++++++ src/Core/ServerSettings.h | 8 ++++-- src/Core/Settings.h | 4 ++- src/Interpreters/ProcessList.cpp | 3 +++ src/Interpreters/ThreadStatusExt.cpp | 2 ++ .../__init__.py | 1 + .../configs/max_untracked_memory.xml | 7 +++++ .../configs/memory_profiler.xml | 5 ++++ .../test.py | 27 +++++++++++++++++++ ...r_sample_min_max_allocation_size.reference | 1 + ...profiler_sample_min_max_allocation_size.sh | 18 +++++++++++++ 13 files changed, 115 insertions(+), 11 deletions(-) create mode 100644 tests/integration/test_memory_profiler_min_max_borders/__init__.py create mode 100644 tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml create mode 100644 tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml create mode 100644 tests/integration/test_memory_profiler_min_max_borders/test.py create mode 100644 tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference create mode 100755 tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 948824242fb..71bf8cc9e89 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1643,17 +1643,26 @@ try global_context->initializeTraceCollector(); /// Set up server-wide memory profiler (for total memory tracker). - UInt64 total_memory_profiler_step = config().getUInt64("total_memory_profiler_step", 0); - if (total_memory_profiler_step) + if (server_settings.total_memory_profiler_step) { - total_memory_tracker.setProfilerStep(total_memory_profiler_step); + total_memory_tracker.setProfilerStep(server_settings.total_memory_profiler_step); } - double total_memory_tracker_sample_probability = config().getDouble("total_memory_tracker_sample_probability", 0); - if (total_memory_tracker_sample_probability > 0.0) + if (server_settings.total_memory_tracker_sample_probability > 0.0) { - total_memory_tracker.setSampleProbability(total_memory_tracker_sample_probability); + total_memory_tracker.setSampleProbability(server_settings.total_memory_tracker_sample_probability); } + + if (server_settings.total_memory_profiler_sample_min_allocation_size) + { + total_memory_tracker.setSampleMinAllocationSize(server_settings.total_memory_profiler_sample_min_allocation_size); + } + + if (server_settings.total_memory_profiler_sample_max_allocation_size) + { + total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size); + } + } #endif diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 81cac2617c5..52cd9cc8073 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -229,7 +229,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT } std::bernoulli_distribution sample(sample_probability); - if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) + if (unlikely(sample_probability > 0.0 && isSizeOkForSampling(size) && sample(thread_local_rng))) { MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = size}); @@ -413,7 +413,7 @@ void MemoryTracker::free(Int64 size) } std::bernoulli_distribution sample(sample_probability); - if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) + if (unlikely(sample_probability > 0.0 && isSizeOkForSampling(size) && sample(thread_local_rng))) { MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -size}); @@ -534,6 +534,13 @@ void MemoryTracker::setOrRaiseProfilerLimit(Int64 value) ; } +bool MemoryTracker::isSizeOkForSampling(UInt64 size) const +{ + //LOG_DEBUG(&Poco::Logger::get("MemoryTracker"), "CHECKING SIZE {} IN BORDERS [{}; {}]", size, min_allocation_size_bytes, max_allocation_size_bytes); + /// We can avoid comparison min_allocation_size_bytes with zero, because we cannot have 0 bytes allocation/deallocation + return ((max_allocation_size_bytes == 0 || size <= max_allocation_size_bytes) && size >= min_allocation_size_bytes); +} + bool canEnqueueBackgroundTask() { auto limit = background_memory_tracker.getSoftLimit(); diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index 4e29d40c953..768dc8a7404 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -67,6 +67,12 @@ private: /// To randomly sample allocations and deallocations in trace_log. double sample_probability = 0; + /// Randomly sample allocations only larger or equal to this size + UInt64 min_allocation_size_bytes = 0; + + /// Randomly sample allocations only smaller or equal to this size + UInt64 max_allocation_size_bytes = 0; + /// Singly-linked list. All information will be passed to subsequent memory trackers also (it allows to implement trackers hierarchy). /// In terms of tree nodes it is the list of parents. Lifetime of these trackers should "include" lifetime of current tracker. std::atomic parent {}; @@ -88,6 +94,8 @@ private: void setOrRaiseProfilerLimit(Int64 value); + bool isSizeOkForSampling(UInt64 size) const; + /// allocImpl(...) and free(...) should not be used directly friend struct CurrentMemoryTracker; void allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr); @@ -165,6 +173,16 @@ public: sample_probability = value; } + void setSampleMinAllocationSize(UInt64 value) + { + min_allocation_size_bytes = value; + } + + void setSampleMaxAllocationSize(UInt64 value) + { + max_allocation_size_bytes = value; + } + void setProfilerStep(Int64 value) { profiler_step = value; diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 1a9f226041b..f7a6c9e950e 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -81,8 +81,12 @@ namespace DB M(UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \ M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \ M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \ - M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) - + M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \ + \ + M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \ + M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ + M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ + M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 6fb26994d2f..bcfc179be5e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -426,7 +426,9 @@ class IColumn; M(UInt64, memory_overcommit_ratio_denominator_for_user, 1_GiB, "It represents soft memory limit on the global level. This value is used to compute query overcommit ratio.", 0) \ M(UInt64, max_untracked_memory, (4 * 1024 * 1024), "Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when amount (in absolute value) becomes larger than specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'.", 0) \ M(UInt64, memory_profiler_step, (4 * 1024 * 1024), "Whenever query memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down query processing.", 0) \ - M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation. Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ + M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ + M(UInt64, memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ + M(UInt64, memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ M(Bool, trace_profile_events, false, "Send to system.trace_log profile event and value of increment on each increment with 'ProfileEvent' trace_type", 0) \ \ M(UInt64, memory_usage_overcommit_max_wait_microseconds, 5'000'000, "Maximum time thread will wait for memory to be freed in the case of memory overcommit. If timeout is reached and memory is not freed, exception is thrown.", 0) \ diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 1503e396298..c299572ef41 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -223,7 +223,10 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q { /// Set up memory profiling thread_group->memory_tracker.setProfilerStep(settings.memory_profiler_step); + thread_group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); + thread_group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size); + thread_group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size); thread_group->performance_counters.setTraceProfileEvents(settings.trace_profile_events); } diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 5acfe500b1d..49d9d3ccdf6 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -83,6 +83,8 @@ ThreadGroupPtr ThreadGroup::createForBackgroundProcess(ContextPtr storage_contex const Settings & settings = storage_context->getSettingsRef(); group->memory_tracker.setProfilerStep(settings.memory_profiler_step); group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); + group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size); + group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size); group->memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator); group->memory_tracker.setParent(&background_memory_tracker); if (settings.memory_tracker_fault_probability > 0.0) diff --git a/tests/integration/test_memory_profiler_min_max_borders/__init__.py b/tests/integration/test_memory_profiler_min_max_borders/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml new file mode 100644 index 00000000000..56fc5ed34ca --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml new file mode 100644 index 00000000000..5b3e17d145f --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml @@ -0,0 +1,5 @@ + + 1 + 4096 + 8192 + diff --git a/tests/integration/test_memory_profiler_min_max_borders/test.py b/tests/integration/test_memory_profiler_min_max_borders/test.py new file mode 100644 index 00000000000..b768a442591 --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/test.py @@ -0,0 +1,27 @@ +from helpers.cluster import ClickHouseCluster +import pytest + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/memory_profiler.xml"], + user_configs=["configs/max_untracked_memory.xml"], +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_trace_boundaries_work(started_cluster): + node.query("select randomPrintableASCII(number) from numbers(1000) FORMAT Null") + node.query("SYSTEM FLUSH LOGS") + + assert node.query("SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where trace_type = 'MemorySample'") == "1\n" + assert node.query("SELECT count() FROM system.trace_log where trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)") == "0\n" diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh new file mode 100755 index 00000000000..b1fbea26da7 --- /dev/null +++ b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-cpu-aarch64, no-random-settings +# requires TraceCollector, does not available under sanitizers and aarch64 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +query_id="${CLICKHOUSE_DATABASE}_min_max_allocation_size_$RANDOM$RANDOM" +${CLICKHOUSE_CLIENT} --query_id="$query_id" --memory_profiler_sample_min_allocation_size=4096 --memory_profiler_sample_max_allocation_size=8192 --log_queries=1 --max_threads=1 --max_untracked_memory=0 --memory_profiler_sample_probability=1 --query "select randomPrintableASCII(number) from numbers(1000) FORMAT Null" + +${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" + +# at least something allocated +${CLICKHOUSE_CLIENT} --query "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample'" + +# show wrong allocations +${CLICKHOUSE_CLIENT} --query "SELECT abs(size) FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)" From 0aed62ec73b8de4614506f5b72a086d8d10db4aa Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 21 Jul 2023 13:03:25 +0000 Subject: [PATCH 1853/1997] Add codec name into exception message --- src/Compression/CompressionCodecEncrypted.cpp | 2 +- ..._no_encryption_codecs.xml => config_no_encryption_key.xml} | 1 - .../integration/test_config_decryption/test_wrong_settings.py | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) rename tests/integration/test_config_decryption/configs/{config_no_encryption_codecs.xml => config_no_encryption_key.xml} (52%) diff --git a/src/Compression/CompressionCodecEncrypted.cpp b/src/Compression/CompressionCodecEncrypted.cpp index fb870ababa3..3f4e35a78a4 100644 --- a/src/Compression/CompressionCodecEncrypted.cpp +++ b/src/Compression/CompressionCodecEncrypted.cpp @@ -588,7 +588,7 @@ String CompressionCodecEncrypted::Configuration::getKey(EncryptionMethod method, if (current_params->keys_storage[method].contains(key_id)) key = current_params->keys_storage[method].at(key_id); else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no key {} in config", key_id); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no key {} in config for {} encryption codec", key_id, getMethodName(method)); return key; } diff --git a/tests/integration/test_config_decryption/configs/config_no_encryption_codecs.xml b/tests/integration/test_config_decryption/configs/config_no_encryption_key.xml similarity index 52% rename from tests/integration/test_config_decryption/configs/config_no_encryption_codecs.xml rename to tests/integration/test_config_decryption/configs/config_no_encryption_key.xml index 07bf69d17c8..5f7769f7403 100644 --- a/tests/integration/test_config_decryption/configs/config_no_encryption_codecs.xml +++ b/tests/integration/test_config_decryption/configs/config_no_encryption_key.xml @@ -1,4 +1,3 @@ 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C - 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 diff --git a/tests/integration/test_config_decryption/test_wrong_settings.py b/tests/integration/test_config_decryption/test_wrong_settings.py index e0fbd4b2948..62610964502 100644 --- a/tests/integration/test_config_decryption/test_wrong_settings.py +++ b/tests/integration/test_config_decryption/test_wrong_settings.py @@ -26,9 +26,9 @@ def test_invalid_chars(): ) -def test_no_encryption_codecs(): +def test_no_encryption_key(): start_clickhouse( - "configs/config_no_encryption_codecs.xml", "There is no key 0 in config" + "configs/config_no_encryption_key.xml", "There is no key 0 in config for AES_128_GCM_SIV encryption codec" ) From 8d4c840e2da0401787bafa2239907ff59160a003 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 21 Jul 2023 15:15:35 +0200 Subject: [PATCH 1854/1997] Some more cases --- tests/queries/0_stateless/01590_countSubstrings.reference | 3 +++ tests/queries/0_stateless/01590_countSubstrings.sql | 3 +++ 2 files changed, 6 insertions(+) diff --git a/tests/queries/0_stateless/01590_countSubstrings.reference b/tests/queries/0_stateless/01590_countSubstrings.reference index 95031cd3856..367b910e569 100644 --- a/tests/queries/0_stateless/01590_countSubstrings.reference +++ b/tests/queries/0_stateless/01590_countSubstrings.reference @@ -9,6 +9,9 @@ empty 0 0 0 +0 +0 +0 char 1 2 diff --git a/tests/queries/0_stateless/01590_countSubstrings.sql b/tests/queries/0_stateless/01590_countSubstrings.sql index 6d2d87b1260..b38cbb7d188 100644 --- a/tests/queries/0_stateless/01590_countSubstrings.sql +++ b/tests/queries/0_stateless/01590_countSubstrings.sql @@ -14,6 +14,9 @@ select countSubstrings('', ''); select countSubstrings('.', ''); select countSubstrings(toString(number), '') from numbers(1); select countSubstrings('', toString(number)) from numbers(1); +select countSubstrings('aaa', materialize('')); +select countSubstrings(materialize('aaa'), ''); +select countSubstrings(materialize('aaa'), materialize('')); select 'char'; select countSubstrings('foobar.com', '.'); From b5cf64466887e115656aab065848fb52784964ae Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 21 Jul 2023 15:23:04 +0200 Subject: [PATCH 1855/1997] Check projection metadata the same way we check ordinary metadata. (#52361) * Check projection metadata the same way we check ordinary metadata. * Allow aggregate projection to have empty PK --------- Co-authored-by: Alexander Tokmakov --- src/Storages/MergeTree/MergeTreeData.cpp | 10 +++++++--- src/Storages/MergeTree/MergeTreeData.h | 2 +- .../02540_duplicate_primary_key.sql | 18 +----------------- .../02540_duplicate_primary_key2.reference | 1 - .../02540_duplicate_primary_key2.sql | 10 +--------- .../02816_check_projection_metadata.reference | 0 .../02816_check_projection_metadata.sql | 3 +++ 7 files changed, 13 insertions(+), 31 deletions(-) create mode 100644 tests/queries/0_stateless/02816_check_projection_metadata.reference create mode 100644 tests/queries/0_stateless/02816_check_projection_metadata.sql diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index a820bacf9a3..34be8156e71 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -465,9 +465,10 @@ void MergeTreeData::checkProperties( const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach, + bool allow_empty_sorting_key, ContextPtr local_context) const { - if (!new_metadata.sorting_key.definition_ast) + if (!new_metadata.sorting_key.definition_ast && !allow_empty_sorting_key) throw Exception(ErrorCodes::BAD_ARGUMENTS, "ORDER BY cannot be empty"); KeyDescription new_sorting_key = new_metadata.sorting_key; @@ -580,6 +581,9 @@ void MergeTreeData::checkProperties( if (projections_names.find(projection.name) != projections_names.end()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Projection with name {} already exists", backQuote(projection.name)); + /// We cannot alter a projection so far. So here we do not try to find a projection in old metadata. + bool is_aggregate = projection.type == ProjectionDescription::Type::Aggregate; + checkProperties(*projection.metadata, *projection.metadata, attach, is_aggregate, local_context); projections_names.insert(projection.name); } } @@ -593,7 +597,7 @@ void MergeTreeData::setProperties( bool attach, ContextPtr local_context) { - checkProperties(new_metadata, old_metadata, attach, local_context); + checkProperties(new_metadata, old_metadata, attach, false, local_context); setInMemoryMetadata(new_metadata); } @@ -3286,7 +3290,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context } } - checkProperties(new_metadata, old_metadata, false, local_context); + checkProperties(new_metadata, old_metadata, false, false, local_context); checkTTLExpressions(new_metadata, old_metadata); if (!columns_to_check_conversion.empty()) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 8b5b50b1841..28611d09386 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1229,7 +1229,7 @@ protected: /// The same for clearOldTemporaryDirectories. std::mutex clear_old_temporary_directories_mutex; - void checkProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false, ContextPtr local_context = nullptr) const; + void checkProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach, bool allow_empty_sorting_key, ContextPtr local_context) const; void setProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false, ContextPtr local_context = nullptr); diff --git a/tests/queries/0_stateless/02540_duplicate_primary_key.sql b/tests/queries/0_stateless/02540_duplicate_primary_key.sql index a084d76964b..6905c9d5133 100644 --- a/tests/queries/0_stateless/02540_duplicate_primary_key.sql +++ b/tests/queries/0_stateless/02540_duplicate_primary_key.sql @@ -86,20 +86,4 @@ CREATE TABLE test ) ENGINE = MergeTree PARTITION BY toYYYYMM(coverage) -ORDER BY (coverage, situation_name, NAME_toe, NAME_cockroach); - -insert into test select * from generateRandom() limit 10; - -with dissonance as ( - Select cast(toStartOfInterval(coverage, INTERVAL 1 day) as Date) as flour, count() as regulation - from test - group by flour having flour >= toDate(now())-100 - ), -cheetah as ( - Select flour, regulation from dissonance - union distinct - Select toDate(now())-1, ifnull((select regulation from dissonance where flour = toDate(now())-1),0) as regulation -) -Select flour, regulation from cheetah order by flour with fill step 1 limit 100 format Null; - -drop table test; +ORDER BY (coverage, situation_name, NAME_toe, NAME_cockroach); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02540_duplicate_primary_key2.reference b/tests/queries/0_stateless/02540_duplicate_primary_key2.reference index 08839f6bb29..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02540_duplicate_primary_key2.reference +++ b/tests/queries/0_stateless/02540_duplicate_primary_key2.reference @@ -1 +0,0 @@ -200 diff --git a/tests/queries/0_stateless/02540_duplicate_primary_key2.sql b/tests/queries/0_stateless/02540_duplicate_primary_key2.sql index d0f02a894f2..53800c95550 100644 --- a/tests/queries/0_stateless/02540_duplicate_primary_key2.sql +++ b/tests/queries/0_stateless/02540_duplicate_primary_key2.sql @@ -88,12 +88,4 @@ CREATE TABLE test ) ENGINE = MergeTree PARTITION BY toYYYYMM(timestamp) -ORDER BY (xxxx17, xxxx14, xxxx16, toStartOfDay(timestamp), left(xxxx19, 10), timestamp); - -INSERT INTO test SELECT * replace 1 as xxxx16 replace 1 as xxxx1 replace '2022-02-02 01:00:00' as timestamp replace 'Airtel' as xxxx14 FROM generateRandom() LIMIT 100; -INSERT INTO test SELECT * replace 1 as xxxx16 replace 1 as xxxx1 replace '2022-02-02 01:00:00' as timestamp replace 'BSNL' as xxxx14 FROM generateRandom() LIMIT 100; -INSERT INTO test SELECT * replace 1 as xxxx16 replace 1 as xxxx1 replace '2022-02-02 01:00:00' as timestamp replace 'xxx' as xxxx14 FROM generateRandom() LIMIT 100; - -select sum(1) from test where toStartOfInterval(timestamp, INTERVAL 1 day) >= TIMESTAMP '2022-02-01 01:00:00' and xxxx14 in ('Airtel', 'BSNL') and xxxx1 = 1 GROUP BY xxxx16; - -drop table test; +ORDER BY (xxxx17, xxxx14, xxxx16, toStartOfDay(timestamp), left(xxxx19, 10), timestamp); -- { serverError BAD_ARGUMENTS} diff --git a/tests/queries/0_stateless/02816_check_projection_metadata.reference b/tests/queries/0_stateless/02816_check_projection_metadata.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02816_check_projection_metadata.sql b/tests/queries/0_stateless/02816_check_projection_metadata.sql new file mode 100644 index 00000000000..e7da043ad41 --- /dev/null +++ b/tests/queries/0_stateless/02816_check_projection_metadata.sql @@ -0,0 +1,3 @@ +create table kek (uuid FixedString(16), id int, ns String, dt DateTime64(6), projection null_pk (select * order by ns, 1, 4)) engine=MergeTree order by (id, dt, uuid); -- {serverError ILLEGAL_COLUMN } +-- this query could segfault or throw LOGICAL_ERROR previously, when we did not check projection PK +-- insert into kek select * from generageRandom(10000); From cb53d762eae54eef9411e2cf7548927f83fe187b Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 21 Jul 2023 15:23:24 +0200 Subject: [PATCH 1856/1997] Fix one more case --- src/Functions/CountSubstringsImpl.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/Functions/CountSubstringsImpl.h b/src/Functions/CountSubstringsImpl.h index 1501e50afcf..8ba9ee99de8 100644 --- a/src/Functions/CountSubstringsImpl.h +++ b/src/Functions/CountSubstringsImpl.h @@ -226,16 +226,19 @@ struct CountSubstringsImpl const char * needle_beg = reinterpret_cast(&needle_data[prev_needle_offset]); size_t needle_size = needle_offsets[i] - prev_needle_offset - 1; - typename Impl::SearcherInSmallHaystack searcher = Impl::createSearcherInSmallHaystack(needle_beg, needle_size); - - const UInt8 * end = reinterpret_cast(haystack.data() + haystack.size()); - const UInt8 * beg = reinterpret_cast(Impl::advancePos(haystack.data(), reinterpret_cast(end), start - 1)); - - const UInt8 * pos; - while ((pos = searcher.search(beg, end)) < end) + if (needle_size > 0) { - ++res[i]; - beg = pos + needle_size; + typename Impl::SearcherInSmallHaystack searcher = Impl::createSearcherInSmallHaystack(needle_beg, needle_size); + + const UInt8 * end = reinterpret_cast(haystack.data() + haystack.size()); + const UInt8 * beg = reinterpret_cast(Impl::advancePos(haystack.data(), reinterpret_cast(end), start - 1)); + + const UInt8 * pos; + while ((pos = searcher.search(beg, end)) < end) + { + ++res[i]; + beg = pos + needle_size; + } } } From 10ec06917506c1a61caadf4c680bd0148520426f Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 21 Jul 2023 13:29:40 +0000 Subject: [PATCH 1857/1997] Improve exception message text --- src/Common/Config/ConfigProcessor.cpp | 2 +- tests/integration/test_config_decryption/test_wrong_settings.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 6529e94a41d..73fc5c58b2f 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -232,7 +232,7 @@ void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) { const NodeListPtr children = element.childNodes(); if (children->length() != 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encrypted node {} should have only one text node", node->nodeName()); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encrypted node {} cannot contain nested elements", node->nodeName()); Node * text_node = node->firstChild(); if (text_node->nodeType() != Node::TEXT_NODE) diff --git a/tests/integration/test_config_decryption/test_wrong_settings.py b/tests/integration/test_config_decryption/test_wrong_settings.py index 62610964502..da32a8f0ac8 100644 --- a/tests/integration/test_config_decryption/test_wrong_settings.py +++ b/tests/integration/test_config_decryption/test_wrong_settings.py @@ -33,4 +33,4 @@ def test_no_encryption_key(): def test_subnodes(): - start_clickhouse("configs/config_subnodes.xml", "should have only one text node") + start_clickhouse("configs/config_subnodes.xml", "cannot contain nested elements") From 1daa26c74130003a4039dcc809b9d3d0a5bcba95 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 21 Jul 2023 13:31:42 +0000 Subject: [PATCH 1858/1997] Fix black formatting --- .../integration/test_config_decryption/test_wrong_settings.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_config_decryption/test_wrong_settings.py b/tests/integration/test_config_decryption/test_wrong_settings.py index da32a8f0ac8..b148f9a051a 100644 --- a/tests/integration/test_config_decryption/test_wrong_settings.py +++ b/tests/integration/test_config_decryption/test_wrong_settings.py @@ -28,7 +28,8 @@ def test_invalid_chars(): def test_no_encryption_key(): start_clickhouse( - "configs/config_no_encryption_key.xml", "There is no key 0 in config for AES_128_GCM_SIV encryption codec" + "configs/config_no_encryption_key.xml", + "There is no key 0 in config for AES_128_GCM_SIV encryption codec", ) From abd8bfed2b6e6c20b46ffbeb82699c8530523ffe Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Fri, 21 Jul 2023 15:44:49 +0200 Subject: [PATCH 1859/1997] Remove comment --- src/Common/MemoryTracker.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 52cd9cc8073..52cae0768dc 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -536,7 +536,6 @@ void MemoryTracker::setOrRaiseProfilerLimit(Int64 value) bool MemoryTracker::isSizeOkForSampling(UInt64 size) const { - //LOG_DEBUG(&Poco::Logger::get("MemoryTracker"), "CHECKING SIZE {} IN BORDERS [{}; {}]", size, min_allocation_size_bytes, max_allocation_size_bytes); /// We can avoid comparison min_allocation_size_bytes with zero, because we cannot have 0 bytes allocation/deallocation return ((max_allocation_size_bytes == 0 || size <= max_allocation_size_bytes) && size >= min_allocation_size_bytes); } From c080e9b450faeaced13c149212456ab006648c3a Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 21 Jul 2023 21:48:49 +0800 Subject: [PATCH 1860/1997] Fix normal projection with merge table --- .../Optimizations/optimizeUseNormalProjection.cpp | 8 ++++++-- ..._projection_query_plan_optimization_misc.reference | 1 + .../01710_projection_query_plan_optimization_misc.sql | 11 +++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.reference create mode 100644 tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.sql diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index dd7a5d449bc..2a03a082d89 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -92,6 +92,10 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) break; } + /// Dangling query plan node. This might be generated by StorageMerge. + if (iter->node->step.get() == reading) + return false; + const auto metadata = reading->getStorageMetadata(); const auto & projections = metadata->projections; @@ -105,8 +109,8 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) QueryDAG query; { - auto & clild = iter->node->children[iter->next_child - 1]; - if (!query.build(*clild)) + auto & child = iter->node->children[iter->next_child - 1]; + if (!query.build(*child)) return false; if (query.dag) diff --git a/tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.reference b/tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.reference new file mode 100644 index 00000000000..9874d6464ab --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.reference @@ -0,0 +1 @@ +1 2 diff --git a/tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.sql b/tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.sql new file mode 100644 index 00000000000..cb565313380 --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.sql @@ -0,0 +1,11 @@ +drop table if exists t; + +create table t (x Int32, codectest Int32) engine = MergeTree order by x; + +alter table t add projection x (select * order by codectest); + +insert into t values (1, 2); + +select * from merge('', 't'); + +drop table t; From 2a6b96f9e339e602c59968741741e57b1675bf52 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 21 Jul 2023 13:51:40 +0000 Subject: [PATCH 1861/1997] Automatic style fix --- .../test_memory_profiler_min_max_borders/test.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_memory_profiler_min_max_borders/test.py b/tests/integration/test_memory_profiler_min_max_borders/test.py index b768a442591..6ab971fa9c4 100644 --- a/tests/integration/test_memory_profiler_min_max_borders/test.py +++ b/tests/integration/test_memory_profiler_min_max_borders/test.py @@ -23,5 +23,15 @@ def test_trace_boundaries_work(started_cluster): node.query("select randomPrintableASCII(number) from numbers(1000) FORMAT Null") node.query("SYSTEM FLUSH LOGS") - assert node.query("SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where trace_type = 'MemorySample'") == "1\n" - assert node.query("SELECT count() FROM system.trace_log where trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)") == "0\n" + assert ( + node.query( + "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where trace_type = 'MemorySample'" + ) + == "1\n" + ) + assert ( + node.query( + "SELECT count() FROM system.trace_log where trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)" + ) + == "0\n" + ) From 3acb6005f041051b7c00c48df5035843744a7e24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 21 Jul 2023 17:08:01 +0200 Subject: [PATCH 1862/1997] Reduce the number of syscalls in FileCache::loadMetadata --- src/Interpreters/Cache/FileCache.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 91d1c63e832..42cc7b80a66 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -870,13 +870,12 @@ void FileCache::loadMetadata() } size_t total_size = 0; - for (auto key_prefix_it = fs::directory_iterator{metadata.getBaseDirectory()}; - key_prefix_it != fs::directory_iterator();) + for (auto key_prefix_it = fs::directory_iterator{metadata.getBaseDirectory()}; key_prefix_it != fs::directory_iterator(); + key_prefix_it++) { const fs::path key_prefix_directory = key_prefix_it->path(); - key_prefix_it++; - if (!fs::is_directory(key_prefix_directory)) + if (!key_prefix_it->is_directory()) { if (key_prefix_directory.filename() != "status") { @@ -887,19 +886,19 @@ void FileCache::loadMetadata() continue; } - if (fs::is_empty(key_prefix_directory)) + fs::directory_iterator key_it{key_prefix_directory}; + if (key_it == fs::directory_iterator{}) { LOG_DEBUG(log, "Removing empty key prefix directory: {}", key_prefix_directory.string()); fs::remove(key_prefix_directory); continue; } - for (fs::directory_iterator key_it{key_prefix_directory}; key_it != fs::directory_iterator();) + for (/* key_it already initialized to verify emptiness */; key_it != fs::directory_iterator(); key_it++) { const fs::path key_directory = key_it->path(); - ++key_it; - if (!fs::is_directory(key_directory)) + if (!key_it->is_directory()) { LOG_DEBUG( log, @@ -908,7 +907,7 @@ void FileCache::loadMetadata() continue; } - if (fs::is_empty(key_directory)) + if (fs::directory_iterator{key_directory} == fs::directory_iterator{}) { LOG_DEBUG(log, "Removing empty key directory: {}", key_directory.string()); fs::remove(key_directory); From 5fb5ba71edbaf664045871b0fc8d6d5d6f5f45e6 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 21 Jul 2023 15:40:53 +0000 Subject: [PATCH 1863/1997] Throw exception when several text nodes found in YAML for element node --- src/Common/Config/YAMLParser.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Common/Config/YAMLParser.cpp b/src/Common/Config/YAMLParser.cpp index a1de14afc13..72706cb98ba 100644 --- a/src/Common/Config/YAMLParser.cpp +++ b/src/Common/Config/YAMLParser.cpp @@ -112,6 +112,11 @@ namespace { if (key == "#text" && value_node.IsScalar()) { + for (Node * child_node = parent_xml_node.firstChild(); child_node; child_node = child_node->nextSibling()) + if (child_node->nodeType() == Node::TEXT_NODE) + throw Exception(ErrorCodes::CANNOT_PARSE_YAML, + "YAMLParser has encountered node with several text nodes " + "and cannot continue parsing of the file"); std::string value = value_node.as(); Poco::AutoPtr xml_value = xml_document->createTextNode(value); parent_xml_node.appendChild(xml_value); From a2b170a18e7db041eb41e631f693b3ddec8e79a7 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Fri, 21 Jul 2023 17:42:55 +0200 Subject: [PATCH 1864/1997] Avoid exception which I didn't understand --- src/Storages/StorageReplicatedMergeTree.cpp | 33 +++++++++++++-------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 841b646a126..bc8dbfa0e1f 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4905,20 +4905,29 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() if (shutdown_prepared_called.exchange(true)) return; - auto settings_ptr = getSettings(); - /// Cancel fetches, merges and mutations to force the queue_task to finish ASAP. - fetcher.blocker.cancelForever(); - merger_mutator.merges_blocker.cancelForever(); - parts_mover.moves_blocker.cancelForever(); - stopBeingLeader(); + try + { + auto settings_ptr = getSettings(); + /// Cancel fetches, merges and mutations to force the queue_task to finish ASAP. + fetcher.blocker.cancelForever(); + merger_mutator.merges_blocker.cancelForever(); + parts_mover.moves_blocker.cancelForever(); + stopBeingLeader(); - if (attach_thread) - attach_thread->shutdown(); + if (attach_thread) + attach_thread->shutdown(); - restarting_thread.shutdown(/* part_of_full_shutdown */true); - /// Explicetly set the event, because the restarting thread will not set it again - startup_event.set(); - shutdown_deadline.emplace(std::chrono::system_clock::now() + std::chrono::milliseconds(settings_ptr->wait_for_unique_parts_send_before_shutdown_ms.totalMilliseconds())); + restarting_thread.shutdown(/* part_of_full_shutdown */true); + /// Explicetly set the event, because the restarting thread will not set it again + startup_event.set(); + shutdown_deadline.emplace(std::chrono::system_clock::now() + std::chrono::milliseconds(settings_ptr->wait_for_unique_parts_send_before_shutdown_ms.totalMilliseconds())); + } + catch (...) + { + /// Don't wait anything in case of inproper prepare for shutdown + shutdown_deadline.emplace(std::chrono::system_clock::now()); + throw; + } } void StorageReplicatedMergeTree::partialShutdown() From ed97284bfae5b18f8dbc8841e8b296edd45cd286 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 21 Jul 2023 19:28:28 +0200 Subject: [PATCH 1865/1997] Update src/Storages/StorageReplicatedMergeTree.cpp --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index bc8dbfa0e1f..f191440442d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4924,7 +4924,7 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() } catch (...) { - /// Don't wait anything in case of inproper prepare for shutdown + /// Don't wait anything in case of improper prepare for shutdown shutdown_deadline.emplace(std::chrono::system_clock::now()); throw; } From 6c8d5ca0a554ecc4fee32269858797d139f3c02a Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 21 Jul 2023 21:33:51 +0000 Subject: [PATCH 1866/1997] Fix: remove redundant distinct with views --- src/Interpreters/ActionsDAG.cpp | 18 +++++++++++---- ...x_remove_dedundant_distinct_view.reference | 13 +++++++++++ ...810_fix_remove_dedundant_distinct_view.sql | 22 +++++++++++++++++++ 3 files changed, 49 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference create mode 100644 tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 906875dd314..ce273e78ff3 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -2511,11 +2511,21 @@ FindOriginalNodeForOutputName::FindOriginalNodeForOutputName(const ActionsDAGPtr /// find input node which refers to the output node /// consider only aliases on the path const auto * node = output_node; - while (node && node->type == ActionsDAG::ActionType::ALIAS) + while (node) { - /// alias has only one child - chassert(node->children.size() == 1); - node = node->children.front(); + if (node->type == ActionsDAG::ActionType::ALIAS) + { + node = node->children.front(); + } + /// materiailze can occure when dealing with views, special case + /// TODO: not sure if it should be done here, looks too generic place + else if (node->type == ActionsDAG::ActionType::FUNCTION && node->function_base->getName() == "materialize") + { + chassert(node->children.size() == 1); + node = node->children.front(); + } + else + break; } if (node && node->type == ActionsDAG::ActionType::INPUT) index.emplace(output_node->result_name, node); diff --git a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference new file mode 100644 index 00000000000..01f14f82e94 --- /dev/null +++ b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference @@ -0,0 +1,13 @@ +-- { echoOn } +set query_plan_remove_redundant_distinct=1; +-- DISTINCT has to be removed since the view already has DISTINCT on the same column +SELECT count() +FROM +( + EXPLAIN SELECT DISTINCT x FROM tab_v +) +WHERE explain ILIKE '%distinct%'; +2 +SELECT DISTINCT x FROM tab_v; +2 +1 diff --git a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql new file mode 100644 index 00000000000..99fc24dae8b --- /dev/null +++ b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql @@ -0,0 +1,22 @@ +set allow_experimental_analyzer=1; + +drop table if exists tab_v; +drop table if exists tab; +create table tab (x UInt64, y UInt64) engine MergeTree() order by (x, y); +insert into tab values(1, 1); +insert into tab values(1, 2); +insert into tab values(2, 1); + +create view tab_v as select distinct(x) from tab; + +-- { echoOn } +set query_plan_remove_redundant_distinct=1; +-- DISTINCT has to be removed since the view already has DISTINCT on the same column +SELECT count() +FROM +( + EXPLAIN SELECT DISTINCT x FROM tab_v +) +WHERE explain ILIKE '%distinct%'; + +SELECT DISTINCT x FROM tab_v; From c6ffc9f266f1bb8a667a3d5beff9bd47a288ef74 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 01:05:39 +0300 Subject: [PATCH 1867/1997] Update 02815_fix_not_found_constants_col_in_block.sql --- .../0_stateless/02815_fix_not_found_constants_col_in_block.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.sql b/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.sql index c56d59c72d6..fa784cf12e3 100644 --- a/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.sql +++ b/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.sql @@ -3,3 +3,4 @@ CREATE TABLE t0 (vkey UInt32, c0 Float32, primary key(c0)) engine = AggregatingM insert into t0 values (19000, 1); select null as c_2_0, ref_2.c0 as c_2_1, ref_2.vkey as c_2_2 from t0 as ref_2 order by c_2_0 asc, c_2_1 asc, c_2_2 asc; select null as c_2_0, ref_2.c0 as c_2_1, ref_2.vkey as c_2_2 from t0 as ref_2 order by c_2_0 asc, c_2_1 asc; +DROP TABLE t0; From 687cbc57bba42a67b62b1b717e51a5be7e14b733 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 21 Jul 2023 22:15:02 +0000 Subject: [PATCH 1868/1997] Fix typo --- src/Interpreters/ActionsDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index ce273e78ff3..284c42b658a 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -2517,7 +2517,7 @@ FindOriginalNodeForOutputName::FindOriginalNodeForOutputName(const ActionsDAGPtr { node = node->children.front(); } - /// materiailze can occure when dealing with views, special case + /// materiailze() function can occur when dealing with views /// TODO: not sure if it should be done here, looks too generic place else if (node->type == ActionsDAG::ActionType::FUNCTION && node->function_base->getName() == "materialize") { From 5ec63c782c6bdd62705f26cc9b09e8a640ca9da8 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Sat, 22 Jul 2023 00:15:05 +0200 Subject: [PATCH 1869/1997] Fixed inserting into Buffer engine by not throwing exception from DatabaseCatalog::tryGetTable() when database name is empty --- src/Interpreters/DatabaseCatalog.cpp | 3 ++- ...rentDatabase_for_table_functions.reference | 17 +++++++++++++++++ ...14_currentDatabase_for_table_functions.sql | 19 +++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02814_currentDatabase_for_table_functions.reference create mode 100644 tests/queries/0_stateless/02814_currentDatabase_for_table_functions.sql diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index e0b6348ed3c..f9ed2c0d5ca 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -344,7 +344,8 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( DatabasePtr database; { std::lock_guard lock{databases_mutex}; - auto it = databases.find(table_id.getDatabaseName()); + // hasDatabase() to avod getDatabaseName() throwing exception if database is empty. + auto it = table_id.hasDatabase() ? databases.find(table_id.getDatabaseName()) : databases.end(); if (databases.end() == it) { if (exception) diff --git a/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.reference b/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.reference new file mode 100644 index 00000000000..7ff95106d3d --- /dev/null +++ b/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.reference @@ -0,0 +1,17 @@ +-- Based on https://github.com/ClickHouse/ClickHouse/issues/52436 +-- Test that inserts performed via Buffer table engine land into destination table. +-- { echoOn } + +DROP TABLE IF EXISTS null_table; +DROP TABLE IF EXISTS null_table_buffer; +DROP TABLE IF EXISTS null_mv; +DROP VIEW IF EXISTS number_view; +CREATE TABLE null_table (number UInt64) ENGINE = Null; +CREATE VIEW number_view as SELECT * FROM numbers(10) as tb; +CREATE MATERIALIZED VIEW null_mv Engine = Log AS SELECT * FROM null_table LEFT JOIN number_view as tb USING number; +CREATE TABLE null_table_buffer (number UInt64) ENGINE = Buffer(currentDatabase(), null_table, 1, 1, 1, 100, 200, 10000, 20000); +INSERT INTO null_table_buffer VALUES (1); +SELECT sleep(3) FORMAT Null; +-- Insert about should've landed into `null_mv` +SELECT count() FROM null_mv; +1 diff --git a/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.sql b/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.sql new file mode 100644 index 00000000000..74b5cf5f432 --- /dev/null +++ b/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.sql @@ -0,0 +1,19 @@ +-- Based on https://github.com/ClickHouse/ClickHouse/issues/52436 +-- Test that inserts performed via Buffer table engine land into destination table. +-- { echoOn } + +DROP TABLE IF EXISTS null_table; +DROP TABLE IF EXISTS null_table_buffer; +DROP TABLE IF EXISTS null_mv; +DROP VIEW IF EXISTS number_view; + +CREATE TABLE null_table (number UInt64) ENGINE = Null; +CREATE VIEW number_view as SELECT * FROM numbers(10) as tb; +CREATE MATERIALIZED VIEW null_mv Engine = Log AS SELECT * FROM null_table LEFT JOIN number_view as tb USING number; + +CREATE TABLE null_table_buffer (number UInt64) ENGINE = Buffer(currentDatabase(), null_table, 1, 1, 1, 100, 200, 10000, 20000); +INSERT INTO null_table_buffer VALUES (1); +SELECT sleep(3) FORMAT Null; + +-- Insert about should've landed into `null_mv` +SELECT count() FROM null_mv; From ae8f511ac5ffc6191394dd9fbfed9a0b082102e2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 02:27:07 +0200 Subject: [PATCH 1870/1997] Fix a test --- tests/integration/test_zero_copy_fetch/configs/users.xml | 7 +++++++ tests/integration/test_zero_copy_fetch/test.py | 2 ++ 2 files changed, 9 insertions(+) create mode 100644 tests/integration/test_zero_copy_fetch/configs/users.xml diff --git a/tests/integration/test_zero_copy_fetch/configs/users.xml b/tests/integration/test_zero_copy_fetch/configs/users.xml new file mode 100644 index 00000000000..b0990ca3a60 --- /dev/null +++ b/tests/integration/test_zero_copy_fetch/configs/users.xml @@ -0,0 +1,7 @@ + + + + 0 + + + diff --git a/tests/integration/test_zero_copy_fetch/test.py b/tests/integration/test_zero_copy_fetch/test.py index 4f3d42096c3..dc79e5d8723 100644 --- a/tests/integration/test_zero_copy_fetch/test.py +++ b/tests/integration/test_zero_copy_fetch/test.py @@ -19,12 +19,14 @@ def started_cluster(): cluster.add_instance( "node1", main_configs=["configs/storage_conf.xml"], + user_configs=["configs/users.xml"], with_minio=True, with_zookeeper=True, ) cluster.add_instance( "node2", main_configs=["configs/storage_conf.xml"], + user_configs=["configs/users.xml"], with_minio=True, with_zookeeper=True, ) From 9a5aed35e24a9aa4d7de71971665449cf344f917 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 02:33:44 +0200 Subject: [PATCH 1871/1997] Add a note about potential caveats for the "session_timezone" setting --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index cfcb56729d2..f267fa15276 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -775,7 +775,7 @@ class IColumn; M(Bool, allow_experimental_undrop_table_query, false, "Allow to use undrop query to restore dropped table in a limited time", 0) \ M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \ - M(Timezone, session_timezone, "", "The default timezone for current session or query. The server default timezone if empty.", 0) \ + M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \ M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0)\ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. From e68234a231bf234d60ccfa262ca5a2374fb4f98a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 04:45:50 +0300 Subject: [PATCH 1872/1997] Revert "Re-add SipHash keyed functions" --- .../sql-reference/functions/hash-functions.md | 8 +- src/Functions/FunctionsHashing.h | 329 +++++------------- src/Functions/FunctionsHashingMisc.cpp | 5 - .../0_stateless/02534_keyed_siphash.reference | 37 -- .../0_stateless/02534_keyed_siphash.sql | 61 +--- .../02552_siphash128_reference.reference | 151 -------- .../02552_siphash128_reference.sql | 253 -------------- 7 files changed, 99 insertions(+), 745 deletions(-) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 556fe622c27..06097d92480 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -51,7 +51,7 @@ Calculates the MD5 from a string and returns the resulting set of bytes as Fixed If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead. If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))). -## sipHash64 {#hash_functions-siphash64} +## sipHash64 (#hash_functions-siphash64) Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. @@ -63,9 +63,9 @@ This is a cryptographic hash function. It works at least three times faster than The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm: -1. The first and the second hash value are concatenated to an array which is hashed. -2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way. -3. This calculation is repeated for all remaining hash values of the original input. +1. The first and the second hash value are concatenated to an array which is hashed. +2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way. +3. This calculation is repeated for all remaining hash values of the original input. **Arguments** diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 82944630b10..279294b367c 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -79,51 +79,28 @@ namespace impl UInt64 key1 = 0; }; - struct SipHashKeyColumns + static SipHashKey parseSipHashKey(const ColumnWithTypeAndName & key) { - ColumnPtr key0; - ColumnPtr key1; - bool is_const; + SipHashKey ret{}; - size_t size() const - { - assert(key0 && key1); - assert(key0->size() == key1->size()); - return key0->size(); - } - SipHashKey getKey(size_t i) const - { - if (is_const) - i = 0; - const auto & key0data = assert_cast(*key0).getData(); - const auto & key1data = assert_cast(*key1).getData(); - return {key0data[i], key1data[i]}; - } - }; - - static SipHashKeyColumns parseSipHashKeyColumns(const ColumnWithTypeAndName & key) - { - const ColumnTuple * tuple = nullptr; - const auto * column = key.column.get(); - bool is_const = false; - if (isColumnConst(*column)) - { - is_const = true; - tuple = checkAndGetColumnConstData(column); - } - else - tuple = checkAndGetColumn(column); + const auto * tuple = checkAndGetColumn(key.column.get()); if (!tuple) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "key must be a tuple"); + if (tuple->tupleSize() != 2) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64"); - SipHashKeyColumns ret{tuple->getColumnPtr(0), tuple->getColumnPtr(1), is_const}; - assert(ret.key0); - if (!checkColumn(*ret.key0)) + if (tuple->empty()) + return ret; + + if (const auto * key0col = checkAndGetColumn(&(tuple->getColumn(0)))) + ret.key0 = key0col->get64(0); + else throw Exception(ErrorCodes::NOT_IMPLEMENTED, "first element of the key tuple is not UInt64"); - assert(ret.key1); - if (!checkColumn(*ret.key1)) + + if (const auto * key1col = checkAndGetColumn(&(tuple->getColumn(1)))) + ret.key1 = key1col->get64(0); + else throw Exception(ErrorCodes::NOT_IMPLEMENTED, "second element of the key tuple is not UInt64"); return ret; @@ -352,10 +329,8 @@ struct SipHash64KeyedImpl static constexpr auto name = "sipHash64Keyed"; using ReturnType = UInt64; using Key = impl::SipHashKey; - using KeyColumns = impl::SipHashKeyColumns; - static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); } - static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); } + static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); } static UInt64 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash64Keyed(key.key0, key.key1, begin, size); } @@ -396,10 +371,8 @@ struct SipHash128KeyedImpl static constexpr auto name = "sipHash128Keyed"; using ReturnType = UInt128; using Key = impl::SipHashKey; - using KeyColumns = impl::SipHashKeyColumns; - static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); } - static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); } + static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); } static UInt128 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash128Keyed(key.key0, key.key1, begin, size); } @@ -425,43 +398,13 @@ struct SipHash128ReferenceImpl using ReturnType = UInt128; - static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc(h1, h2); } + static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc(h1, h2); } static UInt128 apply(const char * data, const size_t size) { return sipHash128Reference(data, size); } static constexpr bool use_int_hash_for_pods = false; }; -struct SipHash128ReferenceKeyedImpl -{ - static constexpr auto name = "sipHash128ReferenceKeyed"; - using ReturnType = UInt128; - using Key = impl::SipHashKey; - using KeyColumns = impl::SipHashKeyColumns; - - static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); } - static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); } - - static UInt128 applyKeyed(const Key & key, const char * begin, size_t size) - { - return sipHash128ReferenceKeyed(key.key0, key.key1, begin, size); - } - - static UInt128 combineHashesKeyed(const Key & key, UInt128 h1, UInt128 h2) - { -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - UInt128 tmp; - reverseMemcpy(&tmp, &h1, sizeof(UInt128)); - h1 = tmp; - reverseMemcpy(&tmp, &h2, sizeof(UInt128)); - h2 = tmp; -#endif - UInt128 hashes[] = {h1, h2}; - return applyKeyed(key, reinterpret_cast(hashes), 2 * sizeof(UInt128)); - } - - static constexpr bool use_int_hash_for_pods = false; -}; /** Why we need MurmurHash2? * MurmurHash2 is an outdated hash function, superseded by MurmurHash3 and subsequently by CityHash, xxHash, HighwayHash. @@ -1080,7 +1023,7 @@ private: DECLARE_MULTITARGET_CODE( -template +template class FunctionAnyHash : public IFunction { public: @@ -1090,12 +1033,9 @@ private: using ToType = typename Impl::ReturnType; template - void executeIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeIntType(const KeyType & key, const IColumn * column, typename ColumnVector::Container & vec_to) const { using ColVecType = ColumnVectorOrDecimal; - KeyType key{}; - if constexpr (Keyed) - key = Impl::getKey(key_cols, 0); if (const ColVecType * col_from = checkAndGetColumn(column)) { @@ -1104,9 +1044,6 @@ private: for (size_t i = 0; i < size; ++i) { ToType hash; - if constexpr (Keyed) - if (!key_cols.is_const && i != 0) - key = Impl::getKey(key_cols, i); if constexpr (Impl::use_int_hash_for_pods) { @@ -1140,14 +1077,6 @@ private: } else if (auto col_from_const = checkAndGetColumnConst(column)) { - if constexpr (Keyed) - { - if (!key_cols.is_const) - { - ColumnPtr full_column = col_from_const->convertToFullColumn(); - return executeIntType(key_cols, full_column.get(), vec_to); - } - } auto value = col_from_const->template getValue(); ToType hash; @@ -1178,15 +1107,8 @@ private: if constexpr (first) vec_to.assign(size, hash); else - { for (size_t i = 0; i < size; ++i) - { - if constexpr (Keyed) - if (!key_cols.is_const && i != 0) - key = Impl::getKey(key_cols, i); vec_to[i] = combineHashes(key, vec_to[i], hash); - } - } } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", @@ -1194,12 +1116,9 @@ private: } template - void executeBigIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeBigIntType(const KeyType & key, const IColumn * column, typename ColumnVector::Container & vec_to) const { using ColVecType = ColumnVectorOrDecimal; - KeyType key{}; - if constexpr (Keyed) - key = Impl::getKey(key_cols, 0); if (const ColVecType * col_from = checkAndGetColumn(column)) { @@ -1208,9 +1127,6 @@ private: for (size_t i = 0; i < size; ++i) { ToType hash; - if constexpr (Keyed) - if (!key_cols.is_const && i != 0) - key = Impl::getKey(key_cols, i); if constexpr (std::endian::native == std::endian::little) hash = apply(key, reinterpret_cast(&vec_from[i]), sizeof(vec_from[i])); else @@ -1227,14 +1143,6 @@ private: } else if (auto col_from_const = checkAndGetColumnConst(column)) { - if constexpr (Keyed) - { - if (!key_cols.is_const) - { - ColumnPtr full_column = col_from_const->convertToFullColumn(); - return executeBigIntType(key_cols, full_column.get(), vec_to); - } - } auto value = col_from_const->template getValue(); ToType hash; @@ -1250,15 +1158,8 @@ private: if constexpr (first) vec_to.assign(size, hash); else - { for (size_t i = 0; i < size; ++i) - { - if constexpr (Keyed) - if (!key_cols.is_const && i != 0) - key = Impl::getKey(key_cols, i); vec_to[i] = combineHashes(key, vec_to[i], hash); - } - } } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", @@ -1266,16 +1167,10 @@ private: } template - void executeGeneric(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeGeneric(const KeyType & key, const IColumn * column, typename ColumnVector::Container & vec_to) const { - KeyType key{}; - if constexpr (Keyed) - key = Impl::getKey(key_cols, 0); for (size_t i = 0, size = column->size(); i < size; ++i) { - if constexpr (Keyed) - if (!key_cols.is_const && i != 0) - key = Impl::getKey(key_cols, i); StringRef bytes = column->getDataAt(i); const ToType hash = apply(key, bytes.data, bytes.size); if constexpr (first) @@ -1286,11 +1181,8 @@ private: } template - void executeString(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeString(const KeyType & key, const IColumn * column, typename ColumnVector::Container & vec_to) const { - KeyType key{}; - if constexpr (Keyed) - key = Impl::getKey(key_cols, 0); if (const ColumnString * col_from = checkAndGetColumn(column)) { const typename ColumnString::Chars & data = col_from->getChars(); @@ -1300,9 +1192,6 @@ private: ColumnString::Offset current_offset = 0; for (size_t i = 0; i < size; ++i) { - if constexpr (Keyed) - if (!key_cols.is_const && i != 0) - key = Impl::getKey(key_cols, i); const ToType hash = apply(key, reinterpret_cast(&data[current_offset]), offsets[i] - current_offset - 1); @@ -1323,9 +1212,6 @@ private: for (size_t i = 0; i < size; ++i) { - if constexpr (Keyed) - if (!key_cols.is_const && i != 0) - key = Impl::getKey(key_cols, i); const ToType hash = apply(key, reinterpret_cast(&data[i * n]), n); if constexpr (first) vec_to[i] = hash; @@ -1335,14 +1221,6 @@ private: } else if (const ColumnConst * col_from_const = checkAndGetColumnConstStringOrFixedString(column)) { - if constexpr (Keyed) - { - if (!key_cols.is_const) - { - ColumnPtr full_column = col_from_const->convertToFullColumn(); - return executeString(key_cols, full_column.get(), vec_to); - } - } String value = col_from_const->getValue(); const ToType hash = apply(key, value.data(), value.size()); const size_t size = vec_to.size(); @@ -1350,15 +1228,8 @@ private: if constexpr (first) vec_to.assign(size, hash); else - { for (size_t i = 0; i < size; ++i) - { - if constexpr (Keyed) - if (!key_cols.is_const && i != 0) - key = Impl::getKey(key_cols, i); vec_to[i] = combineHashes(key, vec_to[i], hash); - } - } } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", @@ -1366,7 +1237,7 @@ private: } template - void executeArray(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeArray(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to) const { const IDataType * nested_type = typeid_cast(*type).getNestedType().get(); @@ -1378,19 +1249,13 @@ private: typename ColumnVector::Container vec_temp(nested_size); bool nested_is_first = true; - executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first); + executeForArgument(key, nested_type, nested_column, vec_temp, nested_is_first); const size_t size = offsets.size(); ColumnArray::Offset current_offset = 0; - KeyType key{}; - if constexpr (Keyed) - key = Impl::getKey(key_cols, 0); for (size_t i = 0; i < size; ++i) { - if constexpr (Keyed) - if (!key_cols.is_const && i != 0) - key = Impl::getKey(key_cols, i); ColumnArray::Offset next_offset = offsets[i]; ToType hash; @@ -1414,7 +1279,7 @@ private: { /// NOTE: here, of course, you can do without the materialization of the column. ColumnPtr full_column = col_from_const->convertToFullColumn(); - executeArray(key_cols, type, full_column.get(), vec_to); + executeArray(key, type, full_column.get(), vec_to); } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", @@ -1422,7 +1287,7 @@ private: } template - void executeAny(const KeyColumnsType & key_cols, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector::Container & vec_to) const + void executeAny(const KeyType & key, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector::Container & vec_to) const { WhichDataType which(from_type); @@ -1430,45 +1295,40 @@ private: throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}", icolumn->getName(), icolumn->size(), vec_to.size(), getName()); - if constexpr (Keyed) - if ((!key_cols.is_const && key_cols.size() != vec_to.size()) - || (key_cols.is_const && key_cols.size() != 1)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Key column size {} doesn't match result column size {} of function {}", key_cols.size(), vec_to.size(), getName()); - - if (which.isUInt8()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isUInt16()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isUInt32()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isUInt64()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isUInt128()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isUInt256()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isInt8()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isInt16()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isInt32()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isInt64()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isInt128()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isInt256()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isUUID()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isIPv4()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isIPv6()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isEnum8()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isEnum16()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isDate()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isDate32()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isDateTime()) executeIntType(key_cols, icolumn, vec_to); + if (which.isUInt8()) executeIntType(key, icolumn, vec_to); + else if (which.isUInt16()) executeIntType(key, icolumn, vec_to); + else if (which.isUInt32()) executeIntType(key, icolumn, vec_to); + else if (which.isUInt64()) executeIntType(key, icolumn, vec_to); + else if (which.isUInt128()) executeBigIntType(key, icolumn, vec_to); + else if (which.isUInt256()) executeBigIntType(key, icolumn, vec_to); + else if (which.isInt8()) executeIntType(key, icolumn, vec_to); + else if (which.isInt16()) executeIntType(key, icolumn, vec_to); + else if (which.isInt32()) executeIntType(key, icolumn, vec_to); + else if (which.isInt64()) executeIntType(key, icolumn, vec_to); + else if (which.isInt128()) executeBigIntType(key, icolumn, vec_to); + else if (which.isInt256()) executeBigIntType(key, icolumn, vec_to); + else if (which.isUUID()) executeBigIntType(key, icolumn, vec_to); + else if (which.isIPv4()) executeIntType(key, icolumn, vec_to); + else if (which.isIPv6()) executeBigIntType(key, icolumn, vec_to); + else if (which.isEnum8()) executeIntType(key, icolumn, vec_to); + else if (which.isEnum16()) executeIntType(key, icolumn, vec_to); + else if (which.isDate()) executeIntType(key, icolumn, vec_to); + else if (which.isDate32()) executeIntType(key, icolumn, vec_to); + else if (which.isDateTime()) executeIntType(key, icolumn, vec_to); /// TODO: executeIntType() for Decimal32/64 leads to incompatible result - else if (which.isDecimal32()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isDecimal64()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isDecimal128()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isDecimal256()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isFloat32()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isFloat64()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isString()) executeString(key_cols, icolumn, vec_to); - else if (which.isFixedString()) executeString(key_cols, icolumn, vec_to); - else if (which.isArray()) executeArray(key_cols, from_type, icolumn, vec_to); - else executeGeneric(key_cols, icolumn, vec_to); + else if (which.isDecimal32()) executeBigIntType(key, icolumn, vec_to); + else if (which.isDecimal64()) executeBigIntType(key, icolumn, vec_to); + else if (which.isDecimal128()) executeBigIntType(key, icolumn, vec_to); + else if (which.isDecimal256()) executeBigIntType(key, icolumn, vec_to); + else if (which.isFloat32()) executeIntType(key, icolumn, vec_to); + else if (which.isFloat64()) executeIntType(key, icolumn, vec_to); + else if (which.isString()) executeString(key, icolumn, vec_to); + else if (which.isFixedString()) executeString(key, icolumn, vec_to); + else if (which.isArray()) executeArray(key, from_type, icolumn, vec_to); + else executeGeneric(key, icolumn, vec_to); } - void executeForArgument(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to, bool & is_first) const + void executeForArgument(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to, bool & is_first) const { /// Flattening of tuples. if (const ColumnTuple * tuple = typeid_cast(column)) @@ -1477,7 +1337,7 @@ private: const DataTypes & tuple_types = typeid_cast(*type).getElements(); size_t tuple_size = tuple_columns.size(); for (size_t i = 0; i < tuple_size; ++i) - executeForArgument(key_cols, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first); + executeForArgument(key, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first); } else if (const ColumnTuple * tuple_const = checkAndGetColumnConstData(column)) { @@ -1487,24 +1347,24 @@ private: for (size_t i = 0; i < tuple_size; ++i) { auto tmp = ColumnConst::create(tuple_columns[i], column->size()); - executeForArgument(key_cols, tuple_types[i].get(), tmp.get(), vec_to, is_first); + executeForArgument(key, tuple_types[i].get(), tmp.get(), vec_to, is_first); } } else if (const auto * map = checkAndGetColumn(column)) { const auto & type_map = assert_cast(*type); - executeForArgument(key_cols, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first); + executeForArgument(key, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first); } else if (const auto * const_map = checkAndGetColumnConst(column)) { - executeForArgument(key_cols, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first); + executeForArgument(key, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first); } else { if (is_first) - executeAny(key_cols, type, column, vec_to); + executeAny(key, type, column, vec_to); else - executeAny(key_cols, type, column, vec_to); + executeAny(key, type, column, vec_to); } is_first = false; @@ -1535,33 +1395,30 @@ public: { auto col_to = ColumnVector::create(input_rows_count); - if (input_rows_count != 0) + typename ColumnVector::Container & vec_to = col_to->getData(); + + /// If using a "keyed" algorithm, the first argument is the key and + /// the data starts from the second argument. + /// Otherwise there is no key and all arguments are interpreted as data. + constexpr size_t first_data_argument = Keyed; + + if (arguments.size() <= first_data_argument) { - typename ColumnVector::Container & vec_to = col_to->getData(); + /// Return a fixed random-looking magic number when input is empty + vec_to.assign(input_rows_count, static_cast(0xe28dbde7fe22e41c)); + } - /// If using a "keyed" algorithm, the first argument is the key and - /// the data starts from the second argument. - /// Otherwise there is no key and all arguments are interpreted as data. - constexpr size_t first_data_argument = Keyed; + KeyType key{}; + if constexpr (Keyed) + if (!arguments.empty()) + key = Impl::parseKey(arguments[0]); - if (arguments.size() <= first_data_argument) - { - /// Return a fixed random-looking magic number when input is empty - vec_to.assign(input_rows_count, static_cast(0xe28dbde7fe22e41c)); - } - - KeyColumnsType key_cols{}; - if constexpr (Keyed) - if (!arguments.empty()) - key_cols = Impl::parseKeyColumns(arguments[0]); - - /// The function supports arbitrary number of arguments of arbitrary types. - bool is_first_argument = true; - for (size_t i = first_data_argument; i < arguments.size(); ++i) - { - const auto & col = arguments[i]; - executeForArgument(key_cols, col.type.get(), col.column.get(), vec_to, is_first_argument); - } + /// The function supports arbitrary number of arguments of arbitrary types. + bool is_first_argument = true; + for (size_t i = first_data_argument; i < arguments.size(); ++i) + { + const auto & col = arguments[i]; + executeForArgument(key, col.type.get(), col.column.get(), vec_to, is_first_argument); } if constexpr (std::is_same_v) /// backward-compatible @@ -1593,19 +1450,17 @@ public: ) // DECLARE_MULTITARGET_CODE -template -class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash +template +class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash { public: explicit FunctionAnyHash(ContextPtr context) : selector(context) { - selector - .registerImplementation>(); + selector.registerImplementation>(); #if USE_MULTITARGET_CODE - selector.registerImplementation>(); - selector - .registerImplementation>(); + selector.registerImplementation>(); + selector.registerImplementation>(); #endif } @@ -1841,7 +1696,7 @@ struct NameIntHash32 { static constexpr auto name = "intHash32"; }; struct NameIntHash64 { static constexpr auto name = "intHash64"; }; using FunctionSipHash64 = FunctionAnyHash; -using FunctionSipHash64Keyed = FunctionAnyHash; +using FunctionSipHash64Keyed = FunctionAnyHash; using FunctionIntHash32 = FunctionIntHash; using FunctionIntHash64 = FunctionIntHash; #if USE_SSL @@ -1855,10 +1710,8 @@ using FunctionSHA384 = FunctionStringHashFixedString; using FunctionSHA512 = FunctionStringHashFixedString; #endif using FunctionSipHash128 = FunctionAnyHash; -using FunctionSipHash128Keyed = FunctionAnyHash; +using FunctionSipHash128Keyed = FunctionAnyHash; using FunctionSipHash128Reference = FunctionAnyHash; -using FunctionSipHash128ReferenceKeyed - = FunctionAnyHash; using FunctionCityHash64 = FunctionAnyHash; using FunctionFarmFingerprint64 = FunctionAnyHash; using FunctionFarmHash64 = FunctionAnyHash; diff --git a/src/Functions/FunctionsHashingMisc.cpp b/src/Functions/FunctionsHashingMisc.cpp index f56568b2508..56c3c1ed00c 100644 --- a/src/Functions/FunctionsHashingMisc.cpp +++ b/src/Functions/FunctionsHashingMisc.cpp @@ -20,11 +20,6 @@ REGISTER_FUNCTION(Hashing) .examples{{"hash", "SELECT hex(sipHash128Reference('foo', '\\x01', 3))", ""}}, .categories{"Hash"} }); - factory.registerFunction(FunctionDocumentation{ - .description = "Same as [sipHash128Reference](#hash_functions-siphash128reference) but additionally takes an explicit key argument " - "instead of using a fixed key.", - .examples{{"hash", "SELECT hex(sipHash128ReferenceKeyed((506097522914230528, 1084818905618843912),'foo', '\\x01', 3));", ""}}, - .categories{"Hash"}}); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference index a9f724365a8..ccc514e7ea2 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.reference +++ b/tests/queries/0_stateless/02534_keyed_siphash.reference @@ -197,40 +197,3 @@ E28DBDE7FE22E41C Check bug with hashing of const integer values 11862823756610506724 11862823756610506724 -86AE90BB6A238D3F6221457630142C9B -86AE90BB6A238D3F6221457630142C9B -Check memsan bug -18096612095653370192 -20AF99D3A87829E0 -12489502208762728797 -Check const columns -15080046610211022027 -15080046610211022027 -15080046610211022027 -15080046610211022027 -2E779C73D13981AA1AE19AFF9617EA49 -2E779C73D13981AA1AE19AFF9617EA49 -2E779C73D13981AA1AE19AFF9617EA49 -2E779C73D13981AA1AE19AFF9617EA49 -Check multiple keys as tuple from a table -11862823756610506724 -9357996107237883963 -86AE90BB6A238D3F6221457630142C9B -F6D93D8FEA6D7DECCDD95A7A0A2AA36D -Check multiple keys as separate ints from a table -11862823756610506724 -9357996107237883963 -86AE90BB6A238D3F6221457630142C9B -F6D93D8FEA6D7DECCDD95A7A0A2AA36D -Check constant key and data from a table -11862823756610506724 -11862823756610506724 -86AE90BB6A238D3F6221457630142C9B -86AE90BB6A238D3F6221457630142C9B -Check multiple keys as separate ints from a table with constant data -11862823756610506724 -9357996107237883963 -86AE90BB6A238D3F6221457630142C9B -F6D93D8FEA6D7DECCDD95A7A0A2AA36D -Check asan bug -0 diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql index 4f3ae7d62bd..900b99f548a 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.sql +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -263,10 +263,10 @@ select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)); select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)); -select sipHash64Keyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash128Keyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash64Keyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash128Keyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED } +select sipHash64Keyed((0, 0), '1'); -- { serverError 48 } +select sipHash128Keyed((0, 0), '1'); -- { serverError 48 } +select sipHash64Keyed(toUInt64(0), '1'); -- { serverError 48 } +select sipHash128Keyed(toUInt64(0), '1'); -- { serverError 48 } select hex(sipHash64()); SELECT hex(sipHash128()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128()) = '1CE422FEE7BD8DE20000000000000000'; @@ -280,57 +280,4 @@ INSERT INTO tab VALUES ((2, 2), 4); -- these two statements must produce the same result SELECT sipHash64Keyed(key, val) FROM tab; SELECT sipHash64Keyed(key, 4::UInt64) FROM tab; -SELECT hex(sipHash128Keyed(key, val)) FROM tab; -SELECT hex(sipHash128Keyed(key, 4::UInt64)) FROM tab; DROP TABLE tab; - -SELECT 'Check memsan bug'; -SELECT sipHash64Keyed((2::UInt64, toUInt64(2)), 4) GROUP BY toUInt64(2); -SELECT hex(sipHash64Keyed((toUInt64(9223372036854775806), toUInt64(-9223372036854775808)), char(2147483646, -2147483648, 1, 3, 4, 7, 2147483647))) GROUP BY toUInt64(257), (toUInt64(9223372036854775806), toUInt64(2147483646)); -SELECT sipHash64Keyed((toUInt64(9223372036854775806), 9223372036854775808::UInt64), char(2)) GROUP BY toUInt64(9223372036854775806); - -SELECT 'Check const columns'; -DROP TABLE IF EXISTS sipHashKeyed_test; -CREATE TABLE sipHashKeyed_test ENGINE = Memory() AS SELECT 1 a, 'test' b; -SELECT sipHash64Keyed((toUInt64(0), toUInt64(0)), 1, 'test'); -SELECT sipHash64(tuple(*)) FROM sipHashKeyed_test; -SELECT sipHash64Keyed((toUInt64(0), toUInt64(0)), tuple(*)) FROM sipHashKeyed_test; -SELECT sipHash64Keyed((toUInt64(0), toUInt64(0)), a, b) FROM sipHashKeyed_test; -SELECT hex(sipHash128Keyed((toUInt64(0), toUInt64(0)), 1, 'test')); -SELECT hex(sipHash128(tuple(*))) FROM sipHashKeyed_test; -SELECT hex(sipHash128Keyed((toUInt64(0), toUInt64(0)), tuple(*))) FROM sipHashKeyed_test; -SELECT hex(sipHash128Keyed((toUInt64(0), toUInt64(0)), a, b)) FROM sipHashKeyed_test; -DROP TABLE sipHashKeyed_test; - -SELECT 'Check multiple keys as tuple from a table'; -DROP TABLE IF EXISTS sipHashKeyed_keys; -CREATE TABLE sipHashKeyed_keys (key Tuple(UInt64, UInt64), val UInt64) ENGINE=Memory; -INSERT INTO sipHashKeyed_keys VALUES ((2, 2), 4); -INSERT INTO sipHashKeyed_keys VALUES ((4, 4), 4); -SELECT sipHash64Keyed(key, val) FROM sipHashKeyed_keys ORDER by key; -SELECT hex(sipHash128Keyed(key, val)) FROM sipHashKeyed_keys ORDER by key; -DROP TABLE sipHashKeyed_keys; - -SELECT 'Check multiple keys as separate ints from a table'; -DROP TABLE IF EXISTS sipHashKeyed_keys; -CREATE TABLE sipHashKeyed_keys (key0 UInt64, key1 UInt64, val UInt64) ENGINE=Memory; -INSERT INTO sipHashKeyed_keys VALUES (2, 2, 4); -INSERT INTO sipHashKeyed_keys VALUES (4, 4, 4); -SELECT sipHash64Keyed((key0, key1), val) FROM sipHashKeyed_keys ORDER by key0; -SELECT hex(sipHash128Keyed((key0, key1), val)) FROM sipHashKeyed_keys ORDER by key0; -SELECT 'Check constant key and data from a table'; -SELECT sipHash64Keyed((2::UInt64, 2::UInt64), val) FROM sipHashKeyed_keys ORDER by val; -SELECT hex(sipHash128Keyed((2::UInt64, 2::UInt64), val)) FROM sipHashKeyed_keys ORDER by val; -DROP TABLE sipHashKeyed_keys; - -SELECT 'Check multiple keys as separate ints from a table with constant data'; -DROP TABLE IF EXISTS sipHashKeyed_keys; -CREATE TABLE sipHashKeyed_keys (key0 UInt64, key1 UInt64) ENGINE=Memory; -INSERT INTO sipHashKeyed_keys VALUES (2, 2); -INSERT INTO sipHashKeyed_keys VALUES (4, 4); -SELECT sipHash64Keyed((key0, key1), 4::UInt64) FROM sipHashKeyed_keys ORDER by key0; -SELECT hex(sipHash128Keyed((key0, key1), 4::UInt64)) FROM sipHashKeyed_keys ORDER by key0; -DROP TABLE sipHashKeyed_keys; - -SELECT 'Check asan bug'; -SELECT sipHash128((toUInt64(9223372036854775806), 1)) = sipHash128(1) GROUP BY sipHash128(1::UInt8), toUInt64(9223372036854775806); diff --git a/tests/queries/0_stateless/02552_siphash128_reference.reference b/tests/queries/0_stateless/02552_siphash128_reference.reference index ece9f6a4615..d00491fd7e5 100644 --- a/tests/queries/0_stateless/02552_siphash128_reference.reference +++ b/tests/queries/0_stateless/02552_siphash128_reference.reference @@ -1,152 +1 @@ -A3817F04BA25A8E66DF67214C7550293 -DA87C1D86B99AF44347659119B22FC45 -8177228DA4A45DC7FCA38BDEF60AFFE4 -9C70B60C5267A94E5F33B6B02985ED51 -F88164C12D9C8FAF7D0F6E7C7BCD5579 -1368875980776F8854527A07690E9627 -14EECA338B208613485EA0308FD7A15E -A1F1EBBED8DBC153C0B84AA61FF08239 -3B62A9BA6258F5610F83E264F31497B4 -264499060AD9BAABC47F8B02BB6D71ED -00110DC378146956C95447D3F3D0FBBA -0151C568386B6677A2B4DC6F81E5DC18 -D626B266905EF35882634DF68532C125 -9869E247E9C08B10D029934FC4B952F7 -31FCEFAC66D7DE9C7EC7485FE4494902 -5493E99933B0A8117E08EC0F97CFC3D9 -6EE2A4CA67B054BBFD3315BF85230577 -473D06E8738DB89854C066C47AE47740 -A426E5E423BF4885294DA481FEAEF723 -78017731CF65FAB074D5208952512EB1 -9E25FC833F2290733E9344A5E83839EB -568E495ABE525A218A2214CD3E071D12 -4A29B54552D16B9A469C10528EFF0AAE -C9D184DDD5A9F5E0CF8CE29A9ABF691C -2DB479AE78BD50D8882A8A178A6132AD -8ECE5F042D5E447B5051B9EACB8D8F6F -9C0B53B4B3C307E87EAEE08678141F66 -ABF248AF69A6EAE4BFD3EB2F129EEB94 -0664DA1668574B88B935F3027358AEF4 -AA4B9DC4BF337DE90CD4FD3C467C6AB7 -EA5C7F471FAF6BDE2B1AD7D4686D2287 -2939B0183223FAFC1723DE4F52C43D35 -7C3956CA5EEAFC3E363E9D556546EB68 -77C6077146F01C32B6B69D5F4EA9FFCF -37A6986CB8847EDF0925F0F1309B54DE -A705F0E69DA9A8F907241A2E923C8CC8 -3DC47D1F29C448461E9E76ED904F6711 -0D62BF01E6FC0E1A0D3C4751C5D3692B -8C03468BCA7C669EE4FD5E084BBEE7B5 -528A5BB93BAF2C9C4473CCE5D0D22BD9 -DF6A301E95C95DAD97AE0CC8C6913BD8 -801189902C857F39E73591285E70B6DB -E617346AC9C231BB3650AE34CCCA0C5B -27D93437EFB721AA401821DCEC5ADF89 -89237D9DED9C5E78D8B1C9B166CC7342 -4A6D8091BF5E7D651189FA94A250B14C -0E33F96055E7AE893FFC0E3DCF492902 -E61C432B720B19D18EC8D84BDC63151B -F7E5AEF549F782CF379055A608269B16 -438D030FD0B7A54FA837F2AD201A6403 -A590D3EE4FBF04E3247E0D27F286423F -5FE2C1A172FE93C4B15CD37CAEF9F538 -2C97325CBD06B36EB2133DD08B3A017C -92C814227A6BCA949FF0659F002AD39E -DCE850110BD8328CFBD50841D6911D87 -67F14984C7DA791248E32BB5922583DA -1938F2CF72D54EE97E94166FA91D2A36 -74481E9646ED49FE0F6224301604698E -57FCA5DE98A9D6D8006438D0583D8A1D -9FECDE1CEFDC1CBED4763674D9575359 -E3040C00EB28F15366CA73CBD872E740 -7697009A6A831DFECCA91C5993670F7A -5853542321F567A005D547A4F04759BD -5150D1772F50834A503E069A973FBD7C 1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -Check bug with hashing of const integer values -E940B12600C844966162FF8FE7A16AAE -E940B12600C844966162FF8FE7A16AAE -Check memsan bug -1CE422FEE7BD8DE20000000000000000 -Check const columns -B66B53476BDBEB8549A257E3B1766C30 -B66B53476BDBEB8549A257E3B1766C30 -B66B53476BDBEB8549A257E3B1766C30 -B66B53476BDBEB8549A257E3B1766C30 -Check multiple keys as tuple from a table -E940B12600C844966162FF8FE7A16AAE -EC58946A98A0D37F4E3FAC02FBBA9480 -Check multiple keys as separate ints from a table -E940B12600C844966162FF8FE7A16AAE -EC58946A98A0D37F4E3FAC02FBBA9480 -Check constant key and data from a table -E940B12600C844966162FF8FE7A16AAE -E940B12600C844966162FF8FE7A16AAE -Check multiple keys as separate ints from a table with constant data -E940B12600C844966162FF8FE7A16AAE -EC58946A98A0D37F4E3FAC02FBBA9480 diff --git a/tests/queries/0_stateless/02552_siphash128_reference.sql b/tests/queries/0_stateless/02552_siphash128_reference.sql index f7324ed0ee4..200954c3b57 100644 --- a/tests/queries/0_stateless/02552_siphash128_reference.sql +++ b/tests/queries/0_stateless/02552_siphash128_reference.sql @@ -1,254 +1 @@ --- Test Vectors from the SipHash reference C implementation: --- Written by --- Jean-Philippe Aumasson --- Daniel J. Bernstein --- Released under CC0 --- https://github.com/veorq/SipHash/blob/eee7d0d84dc7731df2359b243aa5e75d85f6eaef/vectors.h#L645 - -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - '')); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62))); - --- CH tests -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0)) == sipHash128Reference(char(0)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1)) == sipHash128Reference(char(0, 1)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2)) == sipHash128Reference(char(0, 1, 2)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3)) == sipHash128Reference(char(0, 1, 2, 3)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4)) == sipHash128Reference(char(0, 1, 2, 3, 4)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)); - -select sipHash128ReferenceKeyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash128ReferenceKeyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED } - SELECT hex(sipHash128Reference()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128()) = '1CE422FEE7BD8DE20000000000000000'; -SELECT hex(sipHash128ReferenceKeyed()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128Keyed()) = '1CE422FEE7BD8DE20000000000000000'; - -SELECT 'Check bug with hashing of const integer values'; -DROP TABLE IF EXISTS tab; -CREATE TABLE tab (key Tuple(UInt64, UInt64), val UInt64) ENGINE=Memory; -INSERT INTO tab VALUES ((2, 2), 4); --- these two statements must produce the same result -SELECT hex(sipHash128ReferenceKeyed(key, val)) FROM tab; -SELECT hex(sipHash128ReferenceKeyed(key, 4::UInt64)) FROM tab; -DROP TABLE tab; - -SELECT 'Check memsan bug'; -SELECT hex(sipHash128ReferenceKeyed((toUInt64(2), toUInt64(-9223372036854775807)))) GROUP BY (toUInt64(506097522914230528), toUInt64(now64(2, NULL + NULL), 1084818905618843912)), toUInt64(2), NULL + NULL, char(-2147483649, 1); - -SELECT 'Check const columns'; -DROP TABLE IF EXISTS sipHashKeyed_test; -CREATE TABLE sipHashKeyed_test ENGINE = Memory() AS SELECT 1 a, 'test' b; -SELECT hex(sipHash128ReferenceKeyed((toUInt64(0), toUInt64(0)), 1, 'test')); -SELECT hex(sipHash128Reference(tuple(*))) FROM sipHashKeyed_test; -SELECT hex(sipHash128ReferenceKeyed((toUInt64(0), toUInt64(0)), tuple(*))) FROM sipHashKeyed_test; -SELECT hex(sipHash128ReferenceKeyed((toUInt64(0), toUInt64(0)), a, b)) FROM sipHashKeyed_test; -DROP TABLE sipHashKeyed_test; - -SELECT 'Check multiple keys as tuple from a table'; -DROP TABLE IF EXISTS sipHashKeyed_keys; -CREATE TABLE sipHashKeyed_keys (key Tuple(UInt64, UInt64), val UInt64) ENGINE=Memory; -INSERT INTO sipHashKeyed_keys VALUES ((2, 2), 4); -INSERT INTO sipHashKeyed_keys VALUES ((4, 4), 4); -SELECT hex(sipHash128ReferenceKeyed(key, val)) FROM sipHashKeyed_keys ORDER by key; -DROP TABLE sipHashKeyed_keys; - -SELECT 'Check multiple keys as separate ints from a table'; -DROP TABLE IF EXISTS sipHashKeyed_keys; -CREATE TABLE sipHashKeyed_keys (key0 UInt64, key1 UInt64, val UInt64) ENGINE=Memory; -INSERT INTO sipHashKeyed_keys VALUES (2, 2, 4); -INSERT INTO sipHashKeyed_keys VALUES (4, 4, 4); -SELECT hex(sipHash128ReferenceKeyed((key0, key1), val)) FROM sipHashKeyed_keys ORDER by key0; -SELECT 'Check constant key and data from a table'; -SELECT hex(sipHash128ReferenceKeyed((2::UInt64, 2::UInt64), val)) FROM sipHashKeyed_keys ORDER by val; -DROP TABLE sipHashKeyed_keys; - -SELECT 'Check multiple keys as separate ints from a table with constant data'; -DROP TABLE IF EXISTS sipHashKeyed_keys; -CREATE TABLE sipHashKeyed_keys (key0 UInt64, key1 UInt64) ENGINE=Memory; -INSERT INTO sipHashKeyed_keys VALUES (2, 2); -INSERT INTO sipHashKeyed_keys VALUES (4, 4); -SELECT hex(sipHash128ReferenceKeyed((key0, key1), 4::UInt64)) FROM sipHashKeyed_keys ORDER by key0; -DROP TABLE sipHashKeyed_keys; From 7cc3372355d06dfc1184b3ebcd6d2164d179b7be Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 04:30:09 +0200 Subject: [PATCH 1873/1997] Fix terrible trash --- src/Functions/FunctionsHashing.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 82944630b10..090d38fa73d 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -1567,7 +1567,10 @@ public: if constexpr (std::is_same_v) /// backward-compatible { auto col_to_fixed_string = ColumnFixedString::create(sizeof(UInt128)); - col_to_fixed_string->getChars() = std::move(*reinterpret_cast(&col_to->getData())); + const auto & data = col_to->getData(); + auto & chars = col_to_fixed_string->getChars(); + chars.resize(data.size() * sizeof(UInt128)); + memcpy(chars.data(), data.data(), data.size() * sizeof(UInt128)); return col_to_fixed_string; } @@ -1601,12 +1604,12 @@ public: { selector .registerImplementation>(); - +/* #if USE_MULTITARGET_CODE selector.registerImplementation>(); selector .registerImplementation>(); -#endif +#endif*/ } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override From 4c2dabddb6d697ba3744e48e07e09aeaf8fc59d6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 04:31:38 +0200 Subject: [PATCH 1874/1997] Add a test --- tests/queries/0_stateless/02831_trash.reference | 2 ++ tests/queries/0_stateless/02831_trash.sql | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 tests/queries/0_stateless/02831_trash.reference create mode 100644 tests/queries/0_stateless/02831_trash.sql diff --git a/tests/queries/0_stateless/02831_trash.reference b/tests/queries/0_stateless/02831_trash.reference new file mode 100644 index 00000000000..e25f2e9e23f --- /dev/null +++ b/tests/queries/0_stateless/02831_trash.reference @@ -0,0 +1,2 @@ +2761631236 +1210084689 diff --git a/tests/queries/0_stateless/02831_trash.sql b/tests/queries/0_stateless/02831_trash.sql new file mode 100644 index 00000000000..600e2ad0695 --- /dev/null +++ b/tests/queries/0_stateless/02831_trash.sql @@ -0,0 +1,2 @@ +SELECT CRC32IEEE(sipHash128()); +SELECT CRC32(murmurHash3_128()); From d2b178536e1e5b6d85c917d3d26bbe2cff7594ea Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 04:38:16 +0200 Subject: [PATCH 1875/1997] Fix terrible trash --- src/Functions/FunctionsHashing.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 090d38fa73d..8f8715ec3f1 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -1604,12 +1604,12 @@ public: { selector .registerImplementation>(); -/* + #if USE_MULTITARGET_CODE selector.registerImplementation>(); selector .registerImplementation>(); -#endif*/ +#endif } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override From 4d0b75ebdd1bb69e155b237768c7db7a22cb09cc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 04:42:16 +0200 Subject: [PATCH 1876/1997] Remove hashid --- .gitmodules | 3 - contrib/CMakeLists.txt | 1 - contrib/hashidsxx | 1 - contrib/hashidsxx-cmake/CMakeLists.txt | 14 -- src/Core/Settings.h | 2 +- src/Functions/CMakeLists.txt | 1 - src/Functions/FunctionHashID.cpp | 12 -- src/Functions/FunctionHashID.h | 170 ------------------ .../0_stateless/02293_hashid.reference | 15 -- tests/queries/0_stateless/02293_hashid.sql | 16 -- ...new_functions_must_be_documented.reference | 1 - 11 files changed, 1 insertion(+), 235 deletions(-) delete mode 160000 contrib/hashidsxx delete mode 100644 contrib/hashidsxx-cmake/CMakeLists.txt delete mode 100644 src/Functions/FunctionHashID.cpp delete mode 100644 src/Functions/FunctionHashID.h delete mode 100644 tests/queries/0_stateless/02293_hashid.reference delete mode 100644 tests/queries/0_stateless/02293_hashid.sql diff --git a/.gitmodules b/.gitmodules index 151dc28c55b..ba71a8ae3a7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -258,9 +258,6 @@ [submodule "contrib/wyhash"] path = contrib/wyhash url = https://github.com/wangyi-fudan/wyhash -[submodule "contrib/hashidsxx"] - path = contrib/hashidsxx - url = https://github.com/schoentoon/hashidsxx [submodule "contrib/nats-io"] path = contrib/nats-io url = https://github.com/ClickHouse/nats.c diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 2af468970f1..0f68c0cbc7c 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -164,7 +164,6 @@ add_contrib (libpq-cmake libpq) add_contrib (nuraft-cmake NuRaft) add_contrib (fast_float-cmake fast_float) add_contrib (datasketches-cpp-cmake datasketches-cpp) -add_contrib (hashidsxx-cmake hashidsxx) option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES}) if (ENABLE_NLP) diff --git a/contrib/hashidsxx b/contrib/hashidsxx deleted file mode 160000 index 783f6911ccf..00000000000 --- a/contrib/hashidsxx +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 783f6911ccfdaca83e3cfac084c4aad888a80cee diff --git a/contrib/hashidsxx-cmake/CMakeLists.txt b/contrib/hashidsxx-cmake/CMakeLists.txt deleted file mode 100644 index 17f3888bd94..00000000000 --- a/contrib/hashidsxx-cmake/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hashidsxx") - -set (SRCS - "${LIBRARY_DIR}/hashids.cpp" -) - -set (HDRS - "${LIBRARY_DIR}/hashids.h" -) - -add_library(_hashidsxx ${SRCS} ${HDRS}) -target_include_directories(_hashidsxx SYSTEM PUBLIC "${LIBRARY_DIR}") - -add_library(ch_contrib::hashidsxx ALIAS _hashidsxx) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index cfcb56729d2..bde51ae9971 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -761,7 +761,7 @@ class IColumn; /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ - M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions (hashid, etc)", 0) \ + M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \ M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \ M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \ diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 2f5c8a212f2..06436488050 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -21,7 +21,6 @@ list (APPEND PUBLIC_LIBS dbms ch_contrib::metrohash ch_contrib::murmurhash - ch_contrib::hashidsxx ch_contrib::morton_nd ) diff --git a/src/Functions/FunctionHashID.cpp b/src/Functions/FunctionHashID.cpp deleted file mode 100644 index 829b3d9d2f6..00000000000 --- a/src/Functions/FunctionHashID.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "FunctionHashID.h" -#include - -namespace DB -{ - -REGISTER_FUNCTION(HashID) -{ - factory.registerFunction(); -} - -} diff --git a/src/Functions/FunctionHashID.h b/src/Functions/FunctionHashID.h deleted file mode 100644 index 680c3f6430b..00000000000 --- a/src/Functions/FunctionHashID.h +++ /dev/null @@ -1,170 +0,0 @@ -#pragma once - -#include "config.h" - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; - extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int SUPPORT_IS_DISABLED; - extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; - extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; -} - -// hashid(string, salt) -class FunctionHashID : public IFunction -{ -public: - static constexpr auto name = "hashid"; - - static FunctionPtr create(ContextPtr context) - { - if (!context->getSettingsRef().allow_experimental_hash_functions) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, - "Hashing function '{}' is experimental. Set `allow_experimental_hash_functions` setting to enable it", name); - - return std::make_shared(); - } - - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 0; } - - bool isVariadic() const override { return true; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3}; } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - if (arguments.empty()) - throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least one argument", getName()); - - const auto & id_col = arguments[0]; - if (!isUnsignedInteger(id_col.type)) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "First argument of function {} must be unsigned integer, got {}", - getName(), - arguments[0].type->getName()); - - if (arguments.size() > 1) - { - const auto & hash_col = arguments[1]; - if (!isString(hash_col.type)) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Second argument of function {} must be String, got {}", - getName(), - arguments[1].type->getName()); - } - - if (arguments.size() > 2) - { - const auto & min_length_col = arguments[2]; - if (!isUInt8(min_length_col.type)) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Third argument of function {} must be UInt8, got {}", - getName(), - arguments[2].type->getName()); - } - - if (arguments.size() > 3) - { - const auto & alphabet_col = arguments[3]; - if (!isString(alphabet_col.type)) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Fourth argument of function {} must be String, got {}", - getName(), - arguments[3].type->getName()); - } - - if (arguments.size() > 4) - { - throw Exception( - ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, - "Function {} expect no more than four arguments (integer, salt, min_length, optional_alphabet), got {}", - getName(), - arguments.size()); - } - - return std::make_shared(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override - { - const auto & numcolumn = arguments[0].column; - - if (checkAndGetColumn(numcolumn.get()) || checkAndGetColumn(numcolumn.get()) - || checkAndGetColumn(numcolumn.get()) || checkAndGetColumn(numcolumn.get())) - { - std::string salt; - UInt8 min_length = 0; - std::string alphabet; - - if (arguments.size() >= 4) - { - const auto & alphabetcolumn = arguments[3].column; - if (const auto * alpha_col = checkAndGetColumnConst(alphabetcolumn.get())) - { - alphabet = alpha_col->getValue(); - if (alphabet.find('\0') != std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Custom alphabet must not contain null character"); - } - } - else - alphabet.assign(DEFAULT_ALPHABET); - - if (arguments.size() >= 3) - { - const auto & minlengthcolumn = arguments[2].column; - if (const auto * min_length_col = checkAndGetColumnConst(minlengthcolumn.get())) - min_length = min_length_col->getValue(); - } - - if (arguments.size() >= 2) - { - const auto & saltcolumn = arguments[1].column; - if (const auto * salt_col = checkAndGetColumnConst(saltcolumn.get())) - salt = salt_col->getValue(); - } - - hashidsxx::Hashids hash(salt, min_length, alphabet); - - auto col_res = ColumnString::create(); - - for (size_t i = 0; i < input_rows_count; ++i) - { - col_res->insert(hash.encode({numcolumn->getUInt(i)})); - } - - return col_res; - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function hashid", - arguments[0].column->getName()); - } -}; - -} diff --git a/tests/queries/0_stateless/02293_hashid.reference b/tests/queries/0_stateless/02293_hashid.reference deleted file mode 100644 index dfc78349c05..00000000000 --- a/tests/queries/0_stateless/02293_hashid.reference +++ /dev/null @@ -1,15 +0,0 @@ -0 gY -1 jR -2 k5 -3 l5 -4 mO -0 pbgkmdljlpjoapne -1 akemglnjepjpodba -2 obmgndljgajpkeao -3 dldokmpjpgjgeanb -4 nkdlpgajngjnobme -YQrvD5XGvbx -Bm3zaOq7zbp -oV -oV -6b diff --git a/tests/queries/0_stateless/02293_hashid.sql b/tests/queries/0_stateless/02293_hashid.sql deleted file mode 100644 index 06af0b5e1d8..00000000000 --- a/tests/queries/0_stateless/02293_hashid.sql +++ /dev/null @@ -1,16 +0,0 @@ --- Tags: no-upgrade-check -SET allow_experimental_hash_functions = 1; - -select number, hashid(number) from system.numbers limit 5; -select number, hashid(number, 's3cr3t', 16, 'abcdefghijklmnop') from system.numbers limit 5; -select hashid(1234567890123456, 's3cr3t'); -select hashid(1234567890123456, 's3cr3t2'); - -SELECT hashid(1, hashid(2)); -SELECT hashid(1, 'k5'); -SELECT hashid(1, 'k5_othersalt'); - --- https://github.com/ClickHouse/ClickHouse/issues/39672 -SELECT - JSONExtractRaw(257, NULL), - hashid(1024, if(rand() % 10, 'truetruetruetrue', NULL), 's3\0r3t'); -- {serverError 43} diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index fc00bfdadca..595ebb483d5 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -346,7 +346,6 @@ hasAny hasColumnInTable hasSubstr hasThreadFuzzer -hashid hex hiveHash hop From 5f4756fb33f754913f4ab8ddfa84c39739920f19 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 04:54:58 +0200 Subject: [PATCH 1877/1997] Remove toDecimalString --- .../functions/type-conversion-functions.md | 38 --- .../functions/type-conversion-functions.md | 38 --- src/Functions/FunctionToDecimalString.cpp | 22 -- src/Functions/FunctionToDecimalString.h | 312 ------------------ src/IO/WriteHelpers.h | 44 +-- .../02676_to_decimal_string.reference | 21 -- .../0_stateless/02676_to_decimal_string.sql | 35 -- 7 files changed, 13 insertions(+), 497 deletions(-) delete mode 100644 src/Functions/FunctionToDecimalString.cpp delete mode 100644 src/Functions/FunctionToDecimalString.h delete mode 100644 tests/queries/0_stateless/02676_to_decimal_string.reference delete mode 100644 tests/queries/0_stateless/02676_to_decimal_string.sql diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 36f40b37238..c2bd525c483 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -945,44 +945,6 @@ Result: └────────────┴───────┘ ``` -## toDecimalString - -Converts a numeric value to String with the number of fractional digits in the output specified by the user. - -**Syntax** - -``` sql -toDecimalString(number, scale) -``` - -**Parameters** - -- `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md), -- `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md). - * Maximum scale for [Decimal](/docs/en/sql-reference/data-types/decimal.md) and [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal), - * Maximum scale for [Float](/docs/en/sql-reference/data-types/float.md) is 60. - -**Returned value** - -- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale). - The number is rounded up or down according to common arithmetic in case requested scale is smaller than original number's scale. - -**Example** - -Query: - -``` sql -SELECT toDecimalString(CAST('64.32', 'Float64'), 5); -``` - -Result: - -```response -┌toDecimalString(CAST('64.32', 'Float64'), 5)─┐ -│ 64.32000 │ -└─────────────────────────────────────────────┘ -``` - ## reinterpretAsUInt(8\|16\|32\|64) ## reinterpretAsInt(8\|16\|32\|64) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index e53104d8d71..088b1a9a1f1 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -762,44 +762,6 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut; └────────────┴───────┘ ``` -## toDecimalString - -Принимает любой численный тип первым аргументом, возвращает строковое десятичное представление числа с точностью, заданной вторым аргументом. - -**Синтаксис** - -``` sql -toDecimalString(number, scale) -``` - -**Параметры** - -- `number` — Значение любого числового типа: [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md), [Float](/docs/ru/sql-reference/data-types/float.md), [Decimal](/docs/ru/sql-reference/data-types/decimal.md), -- `scale` — Требуемое количество десятичных знаков после запятой, [UInt8](/docs/ru/sql-reference/data-types/int-uint.md). - * Значение `scale` для типов [Decimal](/docs/ru/sql-reference/data-types/decimal.md) и [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md) должно не превышать 77 (так как это наибольшее количество значимых символов для этих типов), - * Значение `scale` для типа [Float](/docs/ru/sql-reference/data-types/float.md) не должно превышать 60. - -**Возвращаемое значение** - -- Строка ([String](/docs/en/sql-reference/data-types/string.md)), представляющая собой десятичное представление входного числа с заданной длиной дробной части. - При необходимости число округляется по стандартным правилам арифметики. - -**Пример использования** - -Запрос: - -``` sql -SELECT toDecimalString(CAST('64.32', 'Float64'), 5); -``` - -Результат: - -```response -┌─toDecimalString(CAST('64.32', 'Float64'), 5)┐ -│ 64.32000 │ -└─────────────────────────────────────────────┘ -``` - ## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264} ## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264} diff --git a/src/Functions/FunctionToDecimalString.cpp b/src/Functions/FunctionToDecimalString.cpp deleted file mode 100644 index fe417b19137..00000000000 --- a/src/Functions/FunctionToDecimalString.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include -#include - -namespace DB -{ - -REGISTER_FUNCTION(ToDecimalString) -{ - factory.registerFunction( - FunctionDocumentation{ - .description=R"( -Returns string representation of a number. First argument is the number of any numeric type, -second argument is the desired number of digits in fractional part. Returns String. - - )", - .examples{{"toDecimalString", "SELECT toDecimalString(2.1456,2)", ""}}, - .categories{"String"} - }, FunctionFactory::CaseInsensitive); -} - -} diff --git a/src/Functions/FunctionToDecimalString.h b/src/Functions/FunctionToDecimalString.h deleted file mode 100644 index 6ae007e6b66..00000000000 --- a/src/Functions/FunctionToDecimalString.h +++ /dev/null @@ -1,312 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int ILLEGAL_COLUMN; - extern const int CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER; -} - -class FunctionToDecimalString : public IFunction -{ -public: - static constexpr auto name = "toDecimalString"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override { return name; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - size_t getNumberOfArguments() const override { return 2; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isNumber(*arguments[0])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal first argument for formatDecimal function: got {}, expected numeric type", - arguments[0]->getName()); - - if (!isUInt8(*arguments[1])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal second argument for formatDecimal function: got {}, expected UInt8", - arguments[1]->getName()); - - return std::make_shared(); - } - - bool useDefaultImplementationForConstants() const override { return true; } - -private: - /// For operations with Integer/Float - template - void vectorConstant(const FromVectorType & vec_from, UInt8 precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const - { - size_t input_rows_count = vec_from.size(); - result_offsets.resize(input_rows_count); - - /// Buffer is used here and in functions below because resulting size cannot be precisely anticipated, - /// and buffer resizes on-the-go. Also, .count() provided by buffer is convenient in this case. - WriteBufferFromVector buf_to(vec_to); - - for (size_t i = 0; i < input_rows_count; ++i) - { - format(vec_from[i], buf_to, precision); - result_offsets[i] = buf_to.count(); - } - - buf_to.finalize(); - } - - template - void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector::Container & vec_precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const - { - size_t input_rows_count = vec_from.size(); - result_offsets.resize(input_rows_count); - - WriteBufferFromVector buf_to(vec_to); - - constexpr size_t max_digits = std::numeric_limits::digits10; - - for (size_t i = 0; i < input_rows_count; ++i) - { - if (vec_precision[i] > max_digits) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, - "Too many fractional digits requested, shall not be more than {}", max_digits); - format(vec_from[i], buf_to, vec_precision[i]); - result_offsets[i] = buf_to.count(); - } - - buf_to.finalize(); - } - - template - void constantVector(const FirstArgType & value_from, const ColumnVector::Container & vec_precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const - { - size_t input_rows_count = vec_precision.size(); - result_offsets.resize(input_rows_count); - - WriteBufferFromVector buf_to(vec_to); - - constexpr size_t max_digits = std::numeric_limits::digits10; - - for (size_t i = 0; i < input_rows_count; ++i) - { - if (vec_precision[i] > max_digits) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, - "Too many fractional digits requested, shall not be more than {}", max_digits); - format(value_from, buf_to, vec_precision[i]); - result_offsets[i] = buf_to.count(); - } - - buf_to.finalize(); - } - - /// For operations with Decimal - template - void vectorConstant(const FirstArgVectorType & vec_from, UInt8 precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const - { - /// There are no more than 77 meaning digits (as it is the max length of UInt256). So we can limit it with 77. - constexpr size_t max_digits = std::numeric_limits::digits10; - if (precision > max_digits) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, - "Too many fractional digits requested for Decimal, must not be more than {}", max_digits); - - WriteBufferFromVector buf_to(vec_to); - size_t input_rows_count = vec_from.size(); - result_offsets.resize(input_rows_count); - - for (size_t i = 0; i < input_rows_count; ++i) - { - writeText(vec_from[i], from_scale, buf_to, true, true, precision); - writeChar(0, buf_to); - result_offsets[i] = buf_to.count(); - } - buf_to.finalize(); - } - - template - void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector::Container & vec_precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const - { - size_t input_rows_count = vec_from.size(); - result_offsets.resize(input_rows_count); - - WriteBufferFromVector buf_to(vec_to); - - constexpr size_t max_digits = std::numeric_limits::digits10; - - for (size_t i = 0; i < input_rows_count; ++i) - { - if (vec_precision[i] > max_digits) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, - "Too many fractional digits requested for Decimal, must not be more than {}", max_digits); - writeText(vec_from[i], from_scale, buf_to, true, true, vec_precision[i]); - writeChar(0, buf_to); - result_offsets[i] = buf_to.count(); - } - buf_to.finalize(); - } - - template - void constantVector(const FirstArgType & value_from, const ColumnVector::Container & vec_precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const - { - size_t input_rows_count = vec_precision.size(); - result_offsets.resize(input_rows_count); - - WriteBufferFromVector buf_to(vec_to); - - constexpr size_t max_digits = std::numeric_limits::digits10; - - for (size_t i = 0; i < input_rows_count; ++i) - { - if (vec_precision[i] > max_digits) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, - "Too many fractional digits requested for Decimal, must not be more than {}", max_digits); - writeText(value_from, from_scale, buf_to, true, true, vec_precision[i]); - writeChar(0, buf_to); - result_offsets[i] = buf_to.count(); - } - buf_to.finalize(); - } - - template - static void format(T value, DB::WriteBuffer & out, UInt8 precision) - { - /// Maximum of 60 is hard-coded in 'double-conversion/double-conversion.h' for floating point values, - /// Catch this here to give user a more reasonable error. - if (precision > 60) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, - "Too high precision requested for Float, must not be more than 60, got {}", Int8(precision)); - - DB::DoubleConverter::BufferType buffer; - double_conversion::StringBuilder builder{buffer, sizeof(buffer)}; - - const auto result = DB::DoubleConverter::instance().ToFixed(value, precision, &builder); - - if (!result) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, "Error processing number: {}", value); - - out.write(buffer, builder.position()); - writeChar(0, out); - } - - template - static void format(T value, DB::WriteBuffer & out, UInt8 precision) - { - /// Fractional part for Integer is just trailing zeros. Let's limit it with 77 (like with Decimals). - constexpr size_t max_digits = std::numeric_limits::digits10; - if (precision > max_digits) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, - "Too many fractional digits requested, shall not be more than {}", max_digits); - writeText(value, out); - if (precision > 0) [[likely]] - { - writeChar('.', out); - for (int i = 0; i < precision; ++i) - writeChar('0', out); - writeChar(0, out); - } - } - -public: - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - switch (arguments[0].type->getTypeId()) - { - case TypeIndex::UInt8: return executeType(arguments); - case TypeIndex::UInt16: return executeType(arguments); - case TypeIndex::UInt32: return executeType(arguments); - case TypeIndex::UInt64: return executeType(arguments); - case TypeIndex::UInt128: return executeType(arguments); - case TypeIndex::UInt256: return executeType(arguments); - case TypeIndex::Int8: return executeType(arguments); - case TypeIndex::Int16: return executeType(arguments); - case TypeIndex::Int32: return executeType(arguments); - case TypeIndex::Int64: return executeType(arguments); - case TypeIndex::Int128: return executeType(arguments); - case TypeIndex::Int256: return executeType(arguments); - case TypeIndex::Float32: return executeType(arguments); - case TypeIndex::Float64: return executeType(arguments); - case TypeIndex::Decimal32: return executeType(arguments); - case TypeIndex::Decimal64: return executeType(arguments); - case TypeIndex::Decimal128: return executeType(arguments); - case TypeIndex::Decimal256: return executeType(arguments); - default: - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", - arguments[0].column->getName(), getName()); - } - } - -private: - template - ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const - { - const auto * from_col_const = typeid_cast(arguments[0].column.get()); - const auto * precision_col = checkAndGetColumn>(arguments[1].column.get()); - const auto * precision_col_const = typeid_cast(arguments[1].column.get()); - - auto result_col = ColumnString::create(); - auto * result_col_string = assert_cast(result_col.get()); - ColumnString::Chars & result_chars = result_col_string->getChars(); - ColumnString::Offsets & result_offsets = result_col_string->getOffsets(); - - if constexpr (is_decimal) - { - const auto * from_col = checkAndGetColumn>(arguments[0].column.get()); - UInt8 from_scale = from_col->getScale(); - - if (from_col) - { - if (precision_col_const) - vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets, from_scale); - else - vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, from_scale); - } - else if (from_col_const) - constantVector(from_col_const->template getValue(), precision_col->getData(), result_chars, result_offsets, from_scale); - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName()); - } - else - { - const auto * from_col = checkAndGetColumn>(arguments[0].column.get()); - if (from_col) - { - if (precision_col_const) - vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets); - else - vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets); - } - else if (from_col_const) - constantVector(from_col_const->template getValue(), precision_col->getData(), result_chars, result_offsets); - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName()); - } - - return result_col; - } -}; - -} diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index aa4c9b17e48..0494cdf22e7 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -905,26 +905,26 @@ inline void writeText(const IPv4 & x, WriteBuffer & buf) { writeIPv4Text(x, buf) inline void writeText(const IPv6 & x, WriteBuffer & buf) { writeIPv6Text(x, buf); } template -void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros, - bool fixed_fractional_length, UInt32 fractional_length) +void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros) { /// If it's big integer, but the number of digits is small, /// use the implementation for smaller integers for more efficient arithmetic. + if constexpr (std::is_same_v) { if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); return; } else if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); return; } else if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); return; } } @@ -932,53 +932,35 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool { if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); return; } else if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); return; } } constexpr size_t max_digits = std::numeric_limits::digits10; assert(scale <= max_digits); - assert(fractional_length <= max_digits); - char buf[max_digits]; - memset(buf, '0', std::max(scale, fractional_length)); + memset(buf, '0', scale); T value = x; Int32 last_nonzero_pos = 0; - - if (fixed_fractional_length && fractional_length < scale) - { - T new_value = value / DecimalUtils::scaleMultiplier(scale - fractional_length - 1); - auto round_carry = new_value % 10; - value = new_value / 10; - if (round_carry >= 5) - value += 1; - } - - for (Int32 pos = fixed_fractional_length ? std::min(scale - 1, fractional_length - 1) : scale - 1; pos >= 0; --pos) + for (Int32 pos = scale - 1; pos >= 0; --pos) { auto remainder = value % 10; value /= 10; - - if (remainder != 0 && last_nonzero_pos == 0) - last_nonzero_pos = pos; - - buf[pos] += static_cast(remainder); } writeChar('.', ostr); - ostr.write(buf, fixed_fractional_length ? fractional_length : (trailing_zeros ? scale : last_nonzero_pos + 1)); + ostr.write(buf, trailing_zeros ? scale : last_nonzero_pos + 1); } template -void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros, - bool fixed_fractional_length = false, UInt32 fractional_length = 0) +void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros) { T part = DecimalUtils::getWholePart(x, scale); @@ -989,7 +971,7 @@ void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer writeIntText(part, ostr); - if (scale || (fixed_fractional_length && fractional_length > 0)) + if (scale) { part = DecimalUtils::getFractionalPart(x, scale); if (part || trailing_zeros) @@ -997,7 +979,7 @@ void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer if (part < 0) part *= T(-1); - writeDecimalFractional(part, scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); + writeDecimalFractional(part, scale, ostr, trailing_zeros); } } } diff --git a/tests/queries/0_stateless/02676_to_decimal_string.reference b/tests/queries/0_stateless/02676_to_decimal_string.reference deleted file mode 100644 index 4c27ee5b528..00000000000 --- a/tests/queries/0_stateless/02676_to_decimal_string.reference +++ /dev/null @@ -1,21 +0,0 @@ -2.00000000000000000000000000000000000000000000000000000000000000000000000000000 -2.12 --2.00000000000000000000000000000000000000000000000000000000000000000000000000000 --2.12 -2.987600000000000033395508580724708735942840576171875000000000 -2.15 --2.987600000000000033395508580724708735942840576171875000000000 --2.15 -64.1230010986 -64.2340000000 --64.1230010986 --64.2340000000 --32.345 -32.34500000000000000000000000000000000000000000000000000000000000000000000000000 -32.46 --64.5671232345 -128.78932312332132985464 --128.78932312332132985464 -128.78932312332132985464000000000000000000000000000000000000000000000000000000000 -128.7893231233 --128.78932312332132985464123123789323123321329854600000000000000000000000000000000 diff --git a/tests/queries/0_stateless/02676_to_decimal_string.sql b/tests/queries/0_stateless/02676_to_decimal_string.sql deleted file mode 100644 index 563d60c62c7..00000000000 --- a/tests/queries/0_stateless/02676_to_decimal_string.sql +++ /dev/null @@ -1,35 +0,0 @@ --- Regular types -SELECT toDecimalString(2, 77); -- more digits required than exist -SELECT toDecimalString(2.123456, 2); -- rounding -SELECT toDecimalString(-2, 77); -- more digits required than exist -SELECT toDecimalString(-2.123456, 2); -- rounding - -SELECT toDecimalString(2.9876, 60); -- more digits required than exist (took 60 as it is float by default) -SELECT toDecimalString(2.1456, 2); -- rounding -SELECT toDecimalString(-2.9876, 60); -- more digits required than exist -SELECT toDecimalString(-2.1456, 2); -- rounding - --- Float32 and Float64 tests. No sense to test big float precision -- the result will be a mess anyway. -SELECT toDecimalString(64.123::Float32, 10); -SELECT toDecimalString(64.234::Float64, 10); -SELECT toDecimalString(-64.123::Float32, 10); -SELECT toDecimalString(-64.234::Float64, 10); - --- Decimals -SELECT toDecimalString(-32.345::Decimal32(3), 3); -SELECT toDecimalString(32.345::Decimal32(3), 77); -- more digits required than exist -SELECT toDecimalString(32.456::Decimal32(3), 2); -- rounding -SELECT toDecimalString('-64.5671232345'::Decimal64(10), 10); -SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 20); -SELECT toDecimalString('-128.78932312332132985464123123'::Decimal128(26), 20); -- rounding -SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 77); -- more digits required than exist -SELECT toDecimalString('128.789323123321329854641231237893231233213298546'::Decimal256(45), 10); -- rounding -SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 77); -- more digits required than exist - --- Max number of decimal fractional digits is defined as 77 for Int/UInt/Decimal and 60 for Float. --- These values shall work OK. -SELECT toDecimalString('32.32'::Float32, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} -SELECT toDecimalString('64.64'::Float64, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} -SELECT toDecimalString('88'::UInt8, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} -SELECT toDecimalString('646464'::Int256, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} -SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} From cda42e6dd4eaa56822ad64aad7aa09f632547d93 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 05:05:51 +0200 Subject: [PATCH 1878/1997] Add a test --- .../queries/0_stateless/02831_regexp_analyze_recursion.reference | 0 tests/queries/0_stateless/02831_regexp_analyze_recursion.sql | 1 + 2 files changed, 1 insertion(+) create mode 100644 tests/queries/0_stateless/02831_regexp_analyze_recursion.reference create mode 100644 tests/queries/0_stateless/02831_regexp_analyze_recursion.sql diff --git a/tests/queries/0_stateless/02831_regexp_analyze_recursion.reference b/tests/queries/0_stateless/02831_regexp_analyze_recursion.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql b/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql new file mode 100644 index 00000000000..018d1f031e6 --- /dev/null +++ b/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql @@ -0,0 +1 @@ +SELECT match('', repeat('(', 100000)); -- { serverError 306 } From 21ffce0ff20fc7f136d8d5b05369a1abcdc01be3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 05:06:02 +0200 Subject: [PATCH 1879/1997] Check regular expression depth --- src/Common/OptimizedRegularExpression.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index c542945c78d..0b80e2f3f97 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #define MIN_LENGTH_FOR_STRSTR 3 @@ -50,6 +51,8 @@ const char * analyzeImpl( bool & is_trivial, Literals & global_alternatives) { + checkStackSize(); + /** The expression is trivial if all the metacharacters in it are escaped. * The non-alternative string is * a string outside parentheses, From de2016261ef32878456de9efae5cfab748611853 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 05:08:27 +0200 Subject: [PATCH 1880/1997] Get rid of it --- docker/test/fasttest/run.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 828c73e6781..e25b5fdbfed 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -141,7 +141,6 @@ function clone_submodules contrib/jemalloc contrib/replxx contrib/wyhash - contrib/hashidsxx contrib/c-ares contrib/morton-nd contrib/xxHash From 20625d75ab52319b8e67e50d2df803d0e2dc0934 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 15 Jun 2023 14:08:43 +0200 Subject: [PATCH 1881/1997] Fix optimize_skip_unused_shards with JOINs In case of JOIN query may contains conditions for other tables, while optimize_skip_unused_shards was pretty dumb and failed to skip such columns. Fix this by removing JOIN before applying this optimization. v2: restriction for analyzer v3: ignore 01940_custom_tld_sharding_key under analyzer Signed-off-by: Azat Khuzhin Co-Authored-By: Alexey Milovidov --- src/Storages/StorageDistributed.cpp | 42 ++++++++------ src/Storages/StorageDistributed.h | 4 +- tests/analyzer_tech_debt.txt | 2 + ...optimize_skip_unused_shards_join.reference | 0 ...02790_optimize_skip_unused_shards_join.sql | 55 +++++++++++++++++++ 5 files changed, 84 insertions(+), 19 deletions(-) create mode 100644 tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.reference create mode 100644 tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.sql diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 0727658160c..1a99d272cab 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -75,6 +75,7 @@ #include #include #include +#include #include #include @@ -434,7 +435,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( { /// Always calculate optimized cluster here, to avoid conditions during read() /// (Anyway it will be calculated in the read()) - ClusterPtr optimized_cluster = getOptimizedCluster(local_context, storage_snapshot, query_info.query); + ClusterPtr optimized_cluster = getOptimizedCluster(local_context, storage_snapshot, query_info); if (optimized_cluster) { LOG_DEBUG(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}", @@ -1297,7 +1298,7 @@ ClusterPtr StorageDistributed::getCluster() const } ClusterPtr StorageDistributed::getOptimizedCluster( - ContextPtr local_context, const StorageSnapshotPtr & storage_snapshot, const ASTPtr & query_ptr) const + ContextPtr local_context, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const { ClusterPtr cluster = getCluster(); const Settings & settings = local_context->getSettingsRef(); @@ -1306,7 +1307,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster( if (has_sharding_key && sharding_key_is_usable) { - ClusterPtr optimized = skipUnusedShards(cluster, query_ptr, storage_snapshot, local_context); + ClusterPtr optimized = skipUnusedShards(cluster, query_info, storage_snapshot, local_context); if (optimized) return optimized; } @@ -1355,25 +1356,34 @@ IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, c /// using constraints from "PREWHERE" and "WHERE" conditions, otherwise returns `nullptr` ClusterPtr StorageDistributed::skipUnusedShards( ClusterPtr cluster, - const ASTPtr & query_ptr, + const SelectQueryInfo & query_info, const StorageSnapshotPtr & storage_snapshot, ContextPtr local_context) const { - const auto & select = query_ptr->as(); - + const auto & select = query_info.query->as(); if (!select.prewhere() && !select.where()) - { return nullptr; - } + + /// FIXME: support analyzer + if (!query_info.syntax_analyzer_result) + return nullptr; ASTPtr condition_ast; - if (select.prewhere() && select.where()) + /// Remove JOIN from the query since it may contain a condition for other tables. + /// But only the conditions for the left table should be analyzed for shard skipping. { - condition_ast = makeASTFunction("and", select.prewhere()->clone(), select.where()->clone()); - } - else - { - condition_ast = select.prewhere() ? select.prewhere()->clone() : select.where()->clone(); + ASTPtr select_without_join_ptr = select.clone(); + ASTSelectQuery select_without_join = select_without_join_ptr->as(); + TreeRewriterResult analyzer_result_without_join = *query_info.syntax_analyzer_result; + + removeJoin(select_without_join, analyzer_result_without_join, local_context); + if (!select_without_join.prewhere() && !select_without_join.where()) + return nullptr; + + if (select_without_join.prewhere() && select_without_join.where()) + condition_ast = makeASTFunction("and", select_without_join.prewhere()->clone(), select_without_join.where()->clone()); + else + condition_ast = select_without_join.prewhere() ? select_without_join.prewhere()->clone() : select_without_join.where()->clone(); } replaceConstantExpressions(condition_ast, local_context, storage_snapshot->metadata->getColumns().getAll(), shared_from_this(), storage_snapshot); @@ -1396,11 +1406,9 @@ ClusterPtr StorageDistributed::skipUnusedShards( return nullptr; } - // Can't get definite answer if we can skip any shards + // Can't get a definite answer if we can skip any shards if (!blocks) - { return nullptr; - } std::set shards; diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index f45286341cf..615d6e337b6 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -182,10 +182,10 @@ private: /// Apply the following settings: /// - optimize_skip_unused_shards /// - force_optimize_skip_unused_shards - ClusterPtr getOptimizedCluster(ContextPtr, const StorageSnapshotPtr & storage_snapshot, const ASTPtr & query_ptr) const; + ClusterPtr getOptimizedCluster(ContextPtr, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const; ClusterPtr skipUnusedShards( - ClusterPtr cluster, const ASTPtr & query_ptr, const StorageSnapshotPtr & storage_snapshot, ContextPtr context) const; + ClusterPtr cluster, const SelectQueryInfo & query_info, const StorageSnapshotPtr & storage_snapshot, ContextPtr context) const; /// This method returns optimal query processing stage. /// diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index e0f259306aa..8ffb94e17b8 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -130,3 +130,5 @@ 02581_share_big_sets_between_mutation_tasks_long 02581_share_big_sets_between_multiple_mutations_tasks_long 00992_system_parts_race_condition_zookeeper_long +02790_optimize_skip_unused_shards_join +01940_custom_tld_sharding_key diff --git a/tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.reference b/tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.sql b/tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.sql new file mode 100644 index 00000000000..0773e0a9a5e --- /dev/null +++ b/tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.sql @@ -0,0 +1,55 @@ +-- Issue: https://github.com/ClickHouse/ClickHouse/issues/15995 + +DROP TABLE IF EXISTS outer; +DROP TABLE IF EXISTS inner; + +DROP TABLE IF EXISTS outer_distributed; +DROP TABLE IF EXISTS inner_distributed; + +CREATE TABLE IF NOT EXISTS outer +( + `id` UInt64, + `organization_id` UInt64, + `version` UInt64 +) +ENGINE = ReplacingMergeTree(version) +PARTITION BY organization_id % 8 +ORDER BY (organization_id, id); + +CREATE TABLE inner +( + `id` UInt64, + `outer_id` UInt64, + `organization_id` UInt64, + `version` UInt64, + `date` Date +) +ENGINE = ReplacingMergeTree(version) +PARTITION BY toYYYYMM(date) +ORDER BY (organization_id, outer_id); + +CREATE TABLE inner_distributed AS inner +ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), 'inner', intHash64(organization_id)); + +CREATE TABLE outer_distributed AS outer +ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), 'outer', intHash64(organization_id)); + +SELECT + sum(if(inner_distributed.id != 0, 1, 0)) AS total, + inner_distributed.date AS date +FROM outer_distributed AS outer_distributed +FINAL +LEFT JOIN +( + SELECT + inner_distributed.outer_id AS outer_id, + inner_distributed.id AS id, + inner_distributed.date AS date + FROM inner_distributed AS inner_distributed + FINAL + WHERE inner_distributed.organization_id = 15078 +) AS inner_distributed ON inner_distributed.outer_id = outer_distributed.id +WHERE (outer_distributed.organization_id = 15078) AND (date != toDate('1970-01-01')) +GROUP BY date +ORDER BY date DESC +SETTINGS distributed_product_mode = 'local', optimize_skip_unused_shards = 1; From c3c6560c9511163fc14eac2be8f89c30d3bcce1d Mon Sep 17 00:00:00 2001 From: vdimir Date: Sat, 22 Jul 2023 08:44:19 +0000 Subject: [PATCH 1882/1997] Add 02815_range_dict_no_direct_join to analyzer_tech_debt.txt --- tests/analyzer_tech_debt.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index e0f259306aa..19b90a39800 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -130,3 +130,4 @@ 02581_share_big_sets_between_mutation_tasks_long 02581_share_big_sets_between_multiple_mutations_tasks_long 00992_system_parts_race_condition_zookeeper_long +02815_range_dict_no_direct_join From 5ca6c97832f786e6e3be085e3ec79829f9233cdd Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sat, 22 Jul 2023 12:03:20 +0200 Subject: [PATCH 1883/1997] Update gtest_lru_file_cache.cpp --- src/Interpreters/tests/gtest_lru_file_cache.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp index 12e7d9372f7..dab14a66ed7 100644 --- a/src/Interpreters/tests/gtest_lru_file_cache.cpp +++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp @@ -489,7 +489,6 @@ TEST_F(FileCacheTest, get) download(file_segment); ASSERT_EQ(file_segment.state(), State::DOWNLOADED); - file_segment.completePartAndResetDownloader(); other_1.join(); From 363201270c00f0ebfa61e80471b372f434370380 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 17:49:53 +0300 Subject: [PATCH 1884/1997] Update 01710_query_log_with_projection_info.sql --- .../0_stateless/01710_query_log_with_projection_info.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/01710_query_log_with_projection_info.sql b/tests/queries/0_stateless/01710_query_log_with_projection_info.sql index 25e7e8fed60..cd84b392fe5 100644 --- a/tests/queries/0_stateless/01710_query_log_with_projection_info.sql +++ b/tests/queries/0_stateless/01710_query_log_with_projection_info.sql @@ -62,3 +62,5 @@ FROM system.query_log WHERE current_database=currentDatabase() and query = 'SELECT min(id) FROM t FORMAT Null;'; + +DROP TABLE t; From dab954a92d7893a7ebbef9cda0a3aedf63a96a50 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Sat, 22 Jul 2023 18:10:54 +0200 Subject: [PATCH 1885/1997] do not throw exception in OptimizedRegularExpressionImpl::analyze --- src/Common/OptimizedRegularExpression.cpp | 13 +++++++++++-- .../0_stateless/02831_regexp_analyze_recursion.sql | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index 0b80e2f3f97..918ebd75fc0 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -423,6 +423,7 @@ void OptimizedRegularExpressionImpl::analyze( bool & is_trivial, bool & required_substring_is_prefix, std::vector & alternatives) +try { Literals alternative_literals; Literal required_literal; @@ -432,12 +433,20 @@ void OptimizedRegularExpressionImpl::analyze( for (auto & lit : alternative_literals) alternatives.push_back(std::move(lit.literal)); } +catch(...) +{ + required_substring = ""; + is_trivial = false; + required_substring_is_prefix = false; + alternatives.clear(); + std::cerr << "Analyze RegularExpression failed, got error: {}" << DB::getCurrentExceptionMessage(false) << "\n"; +} template OptimizedRegularExpressionImpl::OptimizedRegularExpressionImpl(const std::string & regexp_, int options) { - std::vector alternativesDummy; /// this vector extracts patterns a,b,c from pattern (a|b|c). for now it's not used. - analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix, alternativesDummy); + std::vector alternatives_dummy; /// this vector extracts patterns a,b,c from pattern (a|b|c). for now it's not used. + analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix, alternatives_dummy); /// Just three following options are supported diff --git a/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql b/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql index 018d1f031e6..a2075ae903b 100644 --- a/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql +++ b/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql @@ -1 +1 @@ -SELECT match('', repeat('(', 100000)); -- { serverError 306 } +SELECT match('', repeat('(', 100000)); -- { serverError 427 } From c60090ccbd30143d44ab715b8b7b5e0060a2095f Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sat, 22 Jul 2023 17:43:22 +0000 Subject: [PATCH 1886/1997] Add test with materialize() + fix --- ...2810_fix_remove_dedundant_distinct_view.reference | 12 ++++++++++-- .../02810_fix_remove_dedundant_distinct_view.sql | 10 +++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference index 01f14f82e94..ec714a5df07 100644 --- a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference +++ b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference @@ -8,6 +8,14 @@ FROM ) WHERE explain ILIKE '%distinct%'; 2 -SELECT DISTINCT x FROM tab_v; -2 +SELECT DISTINCT x FROM tab_v ORDER BY x; 1 +2 +-- explicitly checking that materialize() doesn't affect the result, - redundant DISTINCT is still removed +SELECT count() +FROM +( + EXPLAIN SELECT DISTINCT x FROM (SELECT materialize(x) as x FROM (select DISTINCT x from tab)) +) +WHERE explain ILIKE '%distinct%'; +2 diff --git a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql index 99fc24dae8b..ca0a2edd99d 100644 --- a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql +++ b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql @@ -19,4 +19,12 @@ FROM ) WHERE explain ILIKE '%distinct%'; -SELECT DISTINCT x FROM tab_v; +SELECT DISTINCT x FROM tab_v ORDER BY x; + +-- explicitly checking that materialize() doesn't affect the result, - redundant DISTINCT is still removed +SELECT count() +FROM +( + EXPLAIN SELECT DISTINCT x FROM (SELECT materialize(x) as x FROM (select DISTINCT x from tab)) +) +WHERE explain ILIKE '%distinct%'; From afdda489bdfb27d1db2a7554223f5dfcb8cca7b1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 19:53:16 +0200 Subject: [PATCH 1887/1997] Fix test --- .../02790_sql_standard_fetch.reference | 72 +++++++++---------- .../0_stateless/02790_sql_standard_fetch.sql | 12 ++-- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.reference b/tests/queries/0_stateless/02790_sql_standard_fetch.reference index 429eecbc936..270af6e5c17 100644 --- a/tests/queries/0_stateless/02790_sql_standard_fetch.reference +++ b/tests/queries/0_stateless/02790_sql_standard_fetch.reference @@ -1,36 +1,36 @@ -┌─id─┬─name──┬─department─┬─salary─┐ -│ 25 │ Frank │ it │ 120 │ -│ 23 │ Henry │ it │ 104 │ -│ 24 │ Irene │ it │ 104 │ -│ 33 │ Alice │ sales │ 100 │ -│ 32 │ Dave │ sales │ 96 │ -└────┴───────┴────────────┴────────┘ -┌─id─┬─name──┬─department─┬─salary─┐ -│ 25 │ Frank │ it │ 120 │ -│ 23 │ Henry │ it │ 104 │ -│ 24 │ Irene │ it │ 104 │ -│ 33 │ Alice │ sales │ 100 │ -│ 32 │ Dave │ sales │ 96 │ -└────┴───────┴────────────┴────────┘ -┌─id─┬─name──┬─department─┬─salary─┐ -│ 25 │ Frank │ it │ 120 │ -│ 23 │ Henry │ it │ 104 │ -│ 24 │ Irene │ it │ 104 │ -│ 33 │ Alice │ sales │ 100 │ -│ 31 │ Cindy │ sales │ 96 │ -│ 32 │ Dave │ sales │ 96 │ -└────┴───────┴────────────┴────────┘ -┌─id─┬─name──┬─department─┬─salary─┐ -│ 33 │ Alice │ sales │ 100 │ -│ 31 │ Cindy │ sales │ 96 │ -│ 32 │ Dave │ sales │ 96 │ -│ 22 │ Grace │ it │ 90 │ -│ 21 │ Emma │ it │ 84 │ -└────┴───────┴────────────┴────────┘ -┌─id─┬─name──┬─department─┬─salary─┐ -│ 33 │ Alice │ sales │ 100 │ -│ 31 │ Cindy │ sales │ 96 │ -│ 32 │ Dave │ sales │ 96 │ -│ 22 │ Grace │ it │ 90 │ -│ 21 │ Emma │ it │ 84 │ -└────┴───────┴────────────┴────────┘ +┌─id─┬─name───────────┬─department─┬─salary─┐ +│ 25 │ Frank │ it │ 120 │ +│ 23 │ Henry or Irene │ it │ 104 │ +│ 24 │ Henry or Irene │ it │ 104 │ +│ 33 │ Alice │ sales │ 100 │ +│ 32 │ Dave or Cindy │ sales │ 96 │ +└────┴────────────────┴────────────┴────────┘ +┌─id─┬─name───────────┬─department─┬─salary─┐ +│ 25 │ Frank │ it │ 120 │ +│ 23 │ Henry or Irene │ it │ 104 │ +│ 24 │ Henry or Irene │ it │ 104 │ +│ 33 │ Alice │ sales │ 100 │ +│ 32 │ Dave or Cindy │ sales │ 96 │ +└────┴────────────────┴────────────┴────────┘ +┌─id─┬─name───────────┬─department─┬─salary─┐ +│ 25 │ Frank │ it │ 120 │ +│ 23 │ Henry or Irene │ it │ 104 │ +│ 24 │ Henry or Irene │ it │ 104 │ +│ 33 │ Alice │ sales │ 100 │ +│ 31 │ Dave or Cindy │ sales │ 96 │ +│ 32 │ Dave or Cindy │ sales │ 96 │ +└────┴────────────────┴────────────┴────────┘ +┌─id─┬─name──────────┬─department─┬─salary─┐ +│ 33 │ Alice │ sales │ 100 │ +│ 31 │ Dave or Cindy │ sales │ 96 │ +│ 32 │ Dave or Cindy │ sales │ 96 │ +│ 22 │ Grace │ it │ 90 │ +│ 21 │ Emma │ it │ 84 │ +└────┴───────────────┴────────────┴────────┘ +┌─id─┬─name──────────┬─department─┬─salary─┐ +│ 33 │ Alice │ sales │ 100 │ +│ 31 │ Dave or Cindy │ sales │ 96 │ +│ 32 │ Dave or Cindy │ sales │ 96 │ +│ 22 │ Grace │ it │ 90 │ +│ 21 │ Emma │ it │ 84 │ +└────┴───────────────┴────────────┴────────┘ diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.sql b/tests/queries/0_stateless/02790_sql_standard_fetch.sql index 4204279a746..07a806eddf9 100644 --- a/tests/queries/0_stateless/02790_sql_standard_fetch.sql +++ b/tests/queries/0_stateless/02790_sql_standard_fetch.sql @@ -1,33 +1,33 @@ -- https://antonz.org/sql-fetch/ CREATE TEMPORARY TABLE employees (id UInt64, name String, department String, salary UInt64); -INSERT INTO employees VALUES (23, 'Henry', 'it', 104), (24, 'Irene', 'it', 104), (25, 'Frank', 'it', 120), (31, 'Cindy', 'sales', 96), (33, 'Alice', 'sales', 100), (32, 'Dave', 'sales', 96), (22, 'Grace', 'it', 90), (21, 'Emma', 'it', '84'); +INSERT INTO employees VALUES (23, 'Henry', 'it', 104), (24, 'Irene', 'it', 104), (25, 'Frank', 'it', 120), (31, 'Cindy', 'sales', 96), (33, 'Alice', 'sales', 100), (32, 'Dave', 'sales', 96), (22, 'Grace', 'it', 90), (21, 'Emma', 'it', 84); -- Determinism SET max_threads = 1, parallelize_output_from_storages = 0; -select * from (SELECT * FROM employees ORDER BY id, name, department, salary) +select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc limit 5 format PrettyCompactNoEscapes; -select * from (SELECT * FROM employees ORDER BY id, name, department, salary) +select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc fetch first 5 rows only format PrettyCompactNoEscapes; -select * from (SELECT * FROM employees ORDER BY id, name, department, salary) +select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc fetch first 5 rows with ties format PrettyCompactNoEscapes; -select * from (SELECT * FROM employees ORDER BY id, name, department, salary) +select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc offset 3 rows fetch next 5 rows only format PrettyCompactNoEscapes; -select * from (SELECT * FROM employees ORDER BY id, name, department, salary) +select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc offset 3 rows fetch first 5 rows only From 7a24de801d93957cd87e8a1d2f726b934912b038 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 19:53:45 +0200 Subject: [PATCH 1888/1997] Fix test --- .../02790_sql_standard_fetch.reference | 72 +++++++++---------- .../0_stateless/02790_sql_standard_fetch.sql | 10 +-- 2 files changed, 41 insertions(+), 41 deletions(-) diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.reference b/tests/queries/0_stateless/02790_sql_standard_fetch.reference index 270af6e5c17..ef578b526da 100644 --- a/tests/queries/0_stateless/02790_sql_standard_fetch.reference +++ b/tests/queries/0_stateless/02790_sql_standard_fetch.reference @@ -1,36 +1,36 @@ -┌─id─┬─name───────────┬─department─┬─salary─┐ -│ 25 │ Frank │ it │ 120 │ -│ 23 │ Henry or Irene │ it │ 104 │ -│ 24 │ Henry or Irene │ it │ 104 │ -│ 33 │ Alice │ sales │ 100 │ -│ 32 │ Dave or Cindy │ sales │ 96 │ -└────┴────────────────┴────────────┴────────┘ -┌─id─┬─name───────────┬─department─┬─salary─┐ -│ 25 │ Frank │ it │ 120 │ -│ 23 │ Henry or Irene │ it │ 104 │ -│ 24 │ Henry or Irene │ it │ 104 │ -│ 33 │ Alice │ sales │ 100 │ -│ 32 │ Dave or Cindy │ sales │ 96 │ -└────┴────────────────┴────────────┴────────┘ -┌─id─┬─name───────────┬─department─┬─salary─┐ -│ 25 │ Frank │ it │ 120 │ -│ 23 │ Henry or Irene │ it │ 104 │ -│ 24 │ Henry or Irene │ it │ 104 │ -│ 33 │ Alice │ sales │ 100 │ -│ 31 │ Dave or Cindy │ sales │ 96 │ -│ 32 │ Dave or Cindy │ sales │ 96 │ -└────┴────────────────┴────────────┴────────┘ -┌─id─┬─name──────────┬─department─┬─salary─┐ -│ 33 │ Alice │ sales │ 100 │ -│ 31 │ Dave or Cindy │ sales │ 96 │ -│ 32 │ Dave or Cindy │ sales │ 96 │ -│ 22 │ Grace │ it │ 90 │ -│ 21 │ Emma │ it │ 84 │ -└────┴───────────────┴────────────┴────────┘ -┌─id─┬─name──────────┬─department─┬─salary─┐ -│ 33 │ Alice │ sales │ 100 │ -│ 31 │ Dave or Cindy │ sales │ 96 │ -│ 32 │ Dave or Cindy │ sales │ 96 │ -│ 22 │ Grace │ it │ 90 │ -│ 21 │ Emma │ it │ 84 │ -└────┴───────────────┴────────────┴────────┘ +┌─name───────────┬─department─┬─salary─┐ +│ Frank │ it │ 120 │ +│ Henry or Irene │ it │ 104 │ +│ Henry or Irene │ it │ 104 │ +│ Alice │ sales │ 100 │ +│ Dave or Cindy │ sales │ 96 │ +└────────────────┴────────────┴────────┘ +┌─name───────────┬─department─┬─salary─┐ +│ Frank │ it │ 120 │ +│ Henry or Irene │ it │ 104 │ +│ Henry or Irene │ it │ 104 │ +│ Alice │ sales │ 100 │ +│ Dave or Cindy │ sales │ 96 │ +└────────────────┴────────────┴────────┘ +┌─name───────────┬─department─┬─salary─┐ +│ Frank │ it │ 120 │ +│ Henry or Irene │ it │ 104 │ +│ Henry or Irene │ it │ 104 │ +│ Alice │ sales │ 100 │ +│ Dave or Cindy │ sales │ 96 │ +│ Dave or Cindy │ sales │ 96 │ +└────────────────┴────────────┴────────┘ +┌─name──────────┬─department─┬─salary─┐ +│ Alice │ sales │ 100 │ +│ Dave or Cindy │ sales │ 96 │ +│ Dave or Cindy │ sales │ 96 │ +│ Grace │ it │ 90 │ +│ Emma │ it │ 84 │ +└───────────────┴────────────┴────────┘ +┌─name──────────┬─department─┬─salary─┐ +│ Alice │ sales │ 100 │ +│ Dave or Cindy │ sales │ 96 │ +│ Dave or Cindy │ sales │ 96 │ +│ Grace │ it │ 90 │ +│ Emma │ it │ 84 │ +└───────────────┴────────────┴────────┘ diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.sql b/tests/queries/0_stateless/02790_sql_standard_fetch.sql index 07a806eddf9..638cc66682d 100644 --- a/tests/queries/0_stateless/02790_sql_standard_fetch.sql +++ b/tests/queries/0_stateless/02790_sql_standard_fetch.sql @@ -6,28 +6,28 @@ INSERT INTO employees VALUES (23, 'Henry', 'it', 104), (24, 'Irene', 'it', 104), -- Determinism SET max_threads = 1, parallelize_output_from_storages = 0; -select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) +select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc limit 5 format PrettyCompactNoEscapes; -select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) +select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc fetch first 5 rows only format PrettyCompactNoEscapes; -select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) +select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc fetch first 5 rows with ties format PrettyCompactNoEscapes; -select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) +select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc offset 3 rows fetch next 5 rows only format PrettyCompactNoEscapes; -select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) +select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc offset 3 rows fetch first 5 rows only From 2c6bc318476ce98b916cd2ffb6a9a44f5a5488f8 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sat, 22 Jul 2023 18:07:22 +0000 Subject: [PATCH 1889/1997] Test is not dependent on new analyzer --- .../0_stateless/02810_fix_remove_dedundant_distinct_view.sql | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql index ca0a2edd99d..10a68721c51 100644 --- a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql +++ b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql @@ -1,5 +1,3 @@ -set allow_experimental_analyzer=1; - drop table if exists tab_v; drop table if exists tab; create table tab (x UInt64, y UInt64) engine MergeTree() order by (x, y); From d25cd0d0b635196b1a4cb2178d93b7060bf02819 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 20:21:33 +0200 Subject: [PATCH 1890/1997] Partial revert --- tests/queries/0_stateless/01187_set_profile_as_setting.sh | 2 +- .../0_stateless/02360_rename_table_along_with_log_name.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01187_set_profile_as_setting.sh b/tests/queries/0_stateless/01187_set_profile_as_setting.sh index fccac57aea8..dacb609d790 100755 --- a/tests/queries/0_stateless/01187_set_profile_as_setting.sh +++ b/tests/queries/0_stateless/01187_set_profile_as_setting.sh @@ -4,13 +4,13 @@ unset CLICKHOUSE_LOG_COMMENT CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh $CLICKHOUSE_CLIENT -n -m -q "select value, changed from system.settings where name='readonly';" $CLICKHOUSE_CLIENT -n -m -q "set profile='default'; select value, changed from system.settings where name='readonly';" $CLICKHOUSE_CLIENT -n -m -q "set profile='readonly'; select value, changed from system.settings where name='readonly';" 2>&1| grep -Fa "Cannot modify 'send_logs_level' setting in readonly mode" > /dev/null && echo "OK" +CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=fatal/g') $CLICKHOUSE_CLIENT -n -m -q "set profile='readonly'; select value, changed from system.settings where name='readonly';" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=select+value,changed+from+system.settings+where+name='readonly'" diff --git a/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh b/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh index c07dcdd549b..e8c7f844b5c 100755 --- a/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh +++ b/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh @@ -1,7 +1,6 @@ #!/usr/bin/env bash CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=trace # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh @@ -12,6 +11,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS y;" $CLICKHOUSE_CLIENT -q "CREATE TABLE x(i int) ENGINE MergeTree ORDER BY i;" $CLICKHOUSE_CLIENT -q "RENAME TABLE x TO y;" +CLICKHOUSE_CLIENT_WITH_LOG=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=trace/g') regexp="${CLICKHOUSE_DATABASE}\\.x" # Check if there are still log entries with old table name $CLICKHOUSE_CLIENT_WITH_LOG --send_logs_source_regexp "$regexp" -q "INSERT INTO y VALUES(1);" From 0b258dda4ee618a4d002e2b5246d68bbd2c77c7e Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 21 Jul 2023 08:31:45 +0200 Subject: [PATCH 1891/1997] Reproducible builds for Rust From now on cargo will not download anything from the internet during builds. This step had been moved for docker image builds (via cargo vendor). And now cargo inside docker.io/clickhouse/binary-builder will not use any crates from the internet, so we don't need to add --offline for cargo commands in cmake (corrosion_import_crate()). Also the docker build command had been adjusted to allow following symlinks inside build context, by using tar, this is required for Rust packages. Note, that to make proper Cargo.lock that could be vendored I did the following: - per-project locks had been removed (since there is no automatic way to sync the workspace Cargo.lock with per-project Cargo.lock, since cargo update/generate-lockfile will use only per-project Cargo.toml files apparently, -Z minimal-versions does not helps either) - and to generate Cargo.lock with less changes I've pinned version in the Cargo.toml strictly, i.e. not 'foo = "0.1"' but 'foo = "=0.1"' then the Cargo.lock for workspace had been generated and afterwards I've reverted this part. Plus I have to update the dependencies afterwards, since otherwise there are conflicts with dependencies for std library. Non trivial. Signed-off-by: Azat Khuzhin --- .gitignore | 2 + docker/packager/binary/Dockerfile | 27 ++ docker/packager/binary/rust | 1 + rust/.dockerignore | 4 + rust/.gitignore | 4 + rust/BLAKE3/Cargo.lock | 92 ----- rust/CMakeLists.txt | 2 + rust/{skim => }/Cargo.lock | 519 +++++++++++++++++++++++++-- rust/Cargo.toml | 12 + rust/prql/Cargo.lock | 569 ------------------------------ tests/ci/docker_images_check.py | 33 +- tests/ci/docker_test.py | 12 +- 12 files changed, 582 insertions(+), 695 deletions(-) create mode 120000 docker/packager/binary/rust create mode 100644 rust/.dockerignore create mode 100644 rust/.gitignore delete mode 100644 rust/BLAKE3/Cargo.lock rename rust/{skim => }/Cargo.lock (66%) create mode 100644 rust/Cargo.toml delete mode 100644 rust/prql/Cargo.lock diff --git a/.gitignore b/.gitignore index 39d6f3f9fc8..5341f23a94f 100644 --- a/.gitignore +++ b/.gitignore @@ -69,6 +69,7 @@ cmake-build-* *.pyc __pycache__ *.pytest_cache +.mypy_cache test.cpp CPackConfig.cmake @@ -167,3 +168,4 @@ tests/integration/**/_gen /rust/**/target # It is autogenerated from *.in /rust/**/.cargo/config.toml +/rust/**/vendor diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 897bcd24d04..99e748c41d4 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -58,6 +58,33 @@ RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \ rustup target add aarch64-apple-darwin && \ rustup target add powerpc64le-unknown-linux-gnu +# Create vendor cache for cargo. +# +# Note, that the config.toml for the root is used, you will not be able to +# install any other crates, except those which had been vendored (since if +# there is "replace-with" for some source, then cargo will not look to other +# remotes except this). +# +# Notes for the command itself: +# - --chown is required to preserve the rights +# - unstable-options for -C +# - chmod is required to fix the permissions, since builds are running from a different user +# - copy of the Cargo.lock is required for proper dependencies versions +# - cargo vendor --sync is requried to overcome [1] bug. +# +# [1]: https://github.com/rust-lang/wg-cargo-std-aware/issues/23 +COPY --chown=root:root /rust /rust/packages +RUN cargo -Z unstable-options -C /rust/packages vendor > $CARGO_HOME/config.toml && \ + cp "$(rustc --print=sysroot)"/lib/rustlib/src/rust/Cargo.lock "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/ && \ + cargo -Z unstable-options -C /rust/packages vendor --sync "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/Cargo.toml && \ + rm "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/Cargo.lock && \ + sed -i "s#\"vendor\"#\"/rust/vendor\"#" $CARGO_HOME/config.toml && \ + cat $CARGO_HOME/config.toml && \ + mv /rust/packages/vendor /rust/vendor && \ + chmod -R o=r+X /rust/vendor && \ + ls -R -l /rust/packages && \ + rm -r /rust/packages + # NOTE: Seems like gcc-11 is too new for ubuntu20 repository # A cross-linker for RISC-V 64 (we need it, because LLVM's LLD does not work): RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ diff --git a/docker/packager/binary/rust b/docker/packager/binary/rust new file mode 120000 index 00000000000..742dc49e9ac --- /dev/null +++ b/docker/packager/binary/rust @@ -0,0 +1 @@ +../../../rust \ No newline at end of file diff --git a/rust/.dockerignore b/rust/.dockerignore new file mode 100644 index 00000000000..6b761aa401c --- /dev/null +++ b/rust/.dockerignore @@ -0,0 +1,4 @@ +# Just in case ignore any cargo stuff (and just in case someone will run this +# docker build locally with build context using folder root): +target +vendor diff --git a/rust/.gitignore b/rust/.gitignore new file mode 100644 index 00000000000..f850cd563c9 --- /dev/null +++ b/rust/.gitignore @@ -0,0 +1,4 @@ +# This is for tar --exclude-vcs-ignores (and just in case someone will run +# docker build locally with build context created via tar): +target +vendor diff --git a/rust/BLAKE3/Cargo.lock b/rust/BLAKE3/Cargo.lock deleted file mode 100644 index 9ac60773732..00000000000 --- a/rust/BLAKE3/Cargo.lock +++ /dev/null @@ -1,92 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "_ch_rust_blake3" -version = "0.1.0" -dependencies = [ - "blake3", - "libc", -] - -[[package]] -name = "arrayref" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" - -[[package]] -name = "arrayvec" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" - -[[package]] -name = "blake3" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "526c210b4520e416420759af363083471656e819a75e831b8d2c9d5a584f2413" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq", - "digest", -] - -[[package]] -name = "cc" -version = "1.0.73" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "constant_time_eq" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" - -[[package]] -name = "digest" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" -dependencies = [ - "generic-array", -] - -[[package]] -name = "generic-array" -version = "0.14.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "libc" -version = "0.2.132" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5" - -[[package]] -name = "typenum" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" diff --git a/rust/CMakeLists.txt b/rust/CMakeLists.txt index 41451fe0a1e..ca0886cb300 100644 --- a/rust/CMakeLists.txt +++ b/rust/CMakeLists.txt @@ -55,6 +55,8 @@ function(clickhouse_import_crate) endif() endif() + # Note, here --offline is not used, since on CI vendor archive is used, and + # passing --offline here will be inconvenient for local development. corrosion_import_crate(NO_STD ${ARGN} PROFILE ${profile}) endfunction() diff --git a/rust/skim/Cargo.lock b/rust/Cargo.lock similarity index 66% rename from rust/skim/Cargo.lock rename to rust/Cargo.lock index f55ea8a84b0..07bbf8ba27e 100644 --- a/rust/skim/Cargo.lock +++ b/rust/Cargo.lock @@ -2,6 +2,22 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "_ch_rust_blake3" +version = "0.1.0" +dependencies = [ + "blake3", + "libc", +] + +[[package]] +name = "_ch_rust_prql" +version = "0.1.0" +dependencies = [ + "prql-compiler", + "serde_json", +] + [[package]] name = "_ch_rust_skim_rust" version = "0.1.0" @@ -12,6 +28,32 @@ dependencies = [ "term", ] +[[package]] +name = "addr2line" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "1.0.2" @@ -36,6 +78,31 @@ dependencies = [ "libc", ] +[[package]] +name = "anyhow" +version = "1.0.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854" +dependencies = [ + "backtrace", +] + +[[package]] +name = "ariadne" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "367fd0ad87307588d087544707bc5fbf4805ded96c7db922b70d368fa1cb5702" +dependencies = [ + "unicode-width", + "yansi", +] + +[[package]] +name = "arrayref" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" + [[package]] name = "arrayvec" version = "0.7.4" @@ -48,6 +115,21 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "backtrace" +version = "0.3.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + [[package]] name = "beef" version = "0.5.2" @@ -60,6 +142,29 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "blake3" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "digest", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bumpalo" version = "3.13.0" @@ -93,6 +198,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "chumsky" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d" +dependencies = [ + "hashbrown 0.12.3", + "stacker", +] + [[package]] name = "codespan-reporting" version = "0.11.1" @@ -103,6 +218,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "constant_time_eq" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" + [[package]] name = "core-foundation-sys" version = "0.8.4" @@ -177,10 +298,41 @@ dependencies = [ ] [[package]] -name = "cxx" -version = "1.0.101" +name = "crypto-common" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5032837c1384de3708043de9d4e97bb91290faca6c16529a28aa340592a78166" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "csv" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "cxx" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f68e12e817cb19eaab81aaec582b4052d07debd3c3c6b083b9d361db47c7dc9d" dependencies = [ "cc", "cxxbridge-flags", @@ -190,9 +342,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.101" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51368b3d0dbf356e10fcbfd455a038503a105ee556f7ee79b6bb8c53a7247456" +checksum = "e789217e4ab7cf8cc9ce82253180a9fe331f35f5d339f0ccfe0270b39433f397" dependencies = [ "cc", "codespan-reporting", @@ -200,24 +352,24 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn 2.0.26", + "syn 2.0.27", ] [[package]] name = "cxxbridge-flags" -version = "1.0.101" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d9062157072e4aafc8e56ceaf8325ce850c5ae37578c852a0d4de2cecdded13" +checksum = "78a19f4c80fd9ab6c882286fa865e92e07688f4387370a209508014ead8751d0" [[package]] name = "cxxbridge-macro" -version = "1.0.101" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf01e8a540f5a4e0f284595834f81cf88572f244b768f051724537afa99a2545" +checksum = "b8fcfa71f66c8563c4fa9dd2bb68368d50267856f831ac5d85367e0805f9606c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.27", ] [[package]] @@ -296,6 +448,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -319,9 +482,27 @@ dependencies = [ [[package]] name = "either" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] +name = "enum-as-inner" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "fnv" @@ -338,6 +519,16 @@ dependencies = [ "thread_local", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.10" @@ -349,6 +540,33 @@ dependencies = [ "wasi 0.11.0+wasi-snapshot-preview1", ] +[[package]] +name = "gimli" +version = "0.27.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e" + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] + +[[package]] +name = "hashbrown" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "hermit-abi" version = "0.3.2" @@ -384,6 +602,31 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" +[[package]] +name = "indexmap" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" +dependencies = [ + "equivalent", + "hashbrown 0.14.0", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + [[package]] name = "js-sys" version = "0.3.64" @@ -444,6 +687,21 @@ dependencies = [ "autocfg", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + [[package]] name = "nix" version = "0.24.3" @@ -470,10 +728,20 @@ dependencies = [ ] [[package]] -name = "num-traits" -version = "0.2.15" +name = "nom" +version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "num-traits" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" dependencies = [ "autocfg", ] @@ -488,6 +756,15 @@ dependencies = [ "libc", ] +[[package]] +name = "object" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1" +dependencies = [ + "memchr", +] + [[package]] name = "once_cell" version = "1.18.0" @@ -509,6 +786,41 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prql-compiler" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c99b52154002ac7f286dd2293c2f8d4e30526c1d396b14deef5ada1deef3c9ff" +dependencies = [ + "anyhow", + "ariadne", + "chumsky", + "csv", + "enum-as-inner", + "itertools", + "lazy_static", + "log", + "once_cell", + "regex", + "semver", + "serde", + "serde_json", + "serde_yaml", + "sqlformat", + "sqlparser", + "strum", + "strum_macros", +] + +[[package]] +name = "psm" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" +dependencies = [ + "cc", +] + [[package]] name = "quote" version = "1.0.31" @@ -589,12 +901,24 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + [[package]] name = "rustversion" version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" +[[package]] +name = "ryu" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" + [[package]] name = "scopeguard" version = "1.2.0" @@ -608,10 +932,57 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3cf7c11c38cb994f3d40e8a8cde3bbd1f72a435e4c49e85d6553d8312306152" [[package]] -name = "serde" -version = "1.0.171" +name = "semver" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9" +checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" +dependencies = [ + "serde", +] + +[[package]] +name = "serde" +version = "1.0.174" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b88756493a5bd5e5395d53baa70b194b05764ab85b59e43e4b8f4e1192fa9b1" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.174" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e5c3a298c7f978e53536f95a63bdc4c4a64550582f31a0359a9afda6aede62e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.27", +] + +[[package]] +name = "serde_json" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_yaml" +version = "0.9.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a49e178e4452f45cb61d0cd8cebc1b0fafd3e41929e996cef79aa3aca91f574" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] [[package]] name = "skim" @@ -638,12 +1009,74 @@ dependencies = [ "vte", ] +[[package]] +name = "sqlformat" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e" +dependencies = [ + "itertools", + "nom", + "unicode_categories", +] + +[[package]] +name = "sqlparser" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a" +dependencies = [ + "log", + "serde", +] + +[[package]] +name = "stacker" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "winapi", +] + [[package]] name = "strsim" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +[[package]] +name = "strum" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 1.0.109", +] + +[[package]] +name = "subtle" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" + [[package]] name = "syn" version = "1.0.109" @@ -657,9 +1090,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.26" +version = "2.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970" +checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0" dependencies = [ "proc-macro2", "quote", @@ -688,22 +1121,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.43" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42" +checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.43" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f" +checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96" dependencies = [ "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.27", ] [[package]] @@ -766,6 +1199,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "typenum" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" + [[package]] name = "unicode-ident" version = "1.0.11" @@ -778,12 +1217,30 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + +[[package]] +name = "unsafe-libyaml" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28467d3e1d3c6586d8f25fa243f544f5800fec42d97032474e17222c2b75cfa" + [[package]] name = "utf8parse" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + [[package]] name = "vte" version = "0.11.1" @@ -838,7 +1295,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.27", "wasm-bindgen-shared", ] @@ -860,7 +1317,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.27", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -967,3 +1424,9 @@ name = "windows_x86_64_msvc" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + +[[package]] +name = "yansi" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 index 00000000000..2a2b582cea8 --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,12 @@ +# workspace is required to vendor crates for all packages. +[workspace] +members = [ + "BLAKE3", + "skim", + "prql", +] +resolver = "2" + +# FIXME: even though the profiles should be defined in the main cargo config we +# cannot do this yet, since we compile each package separatelly, so you should +# ignore warning from cargo about this. diff --git a/rust/prql/Cargo.lock b/rust/prql/Cargo.lock deleted file mode 100644 index da94e4ca852..00000000000 --- a/rust/prql/Cargo.lock +++ /dev/null @@ -1,569 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "_ch_rust_prql" -version = "0.1.0" -dependencies = [ - "prql-compiler", - "serde_json", -] - -[[package]] -name = "addr2line" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3" -dependencies = [ - "gimli", -] - -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - -[[package]] -name = "ahash" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" -dependencies = [ - "getrandom", - "once_cell", - "version_check", -] - -[[package]] -name = "aho-corasick" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" -dependencies = [ - "memchr", -] - -[[package]] -name = "anyhow" -version = "1.0.71" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" -dependencies = [ - "backtrace", -] - -[[package]] -name = "ariadne" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "367fd0ad87307588d087544707bc5fbf4805ded96c7db922b70d368fa1cb5702" -dependencies = [ - "unicode-width", - "yansi", -] - -[[package]] -name = "backtrace" -version = "0.3.68" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12" -dependencies = [ - "addr2line", - "cc", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", -] - -[[package]] -name = "cc" -version = "1.0.79" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "chumsky" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d" -dependencies = [ - "hashbrown 0.12.3", - "stacker", -] - -[[package]] -name = "csv" -version = "1.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" -dependencies = [ - "csv-core", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "csv-core" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" -dependencies = [ - "memchr", -] - -[[package]] -name = "either" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" - -[[package]] -name = "enum-as-inner" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "equivalent" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88bffebc5d80432c9b140ee17875ff173a8ab62faad5b257da912bd2f6c1c0a1" - -[[package]] -name = "getrandom" -version = "0.2.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "gimli" -version = "0.27.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e" - -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" -dependencies = [ - "ahash", -] - -[[package]] -name = "hashbrown" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" - -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - -[[package]] -name = "indexmap" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" -dependencies = [ - "equivalent", - "hashbrown 0.14.0", -] - -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a" - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "libc" -version = "0.2.147" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" - -[[package]] -name = "log" -version = "0.4.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" - -[[package]] -name = "memchr" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" - -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - -[[package]] -name = "miniz_oxide" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" -dependencies = [ - "adler", -] - -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - -[[package]] -name = "object" -version = "0.31.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1" -dependencies = [ - "memchr", -] - -[[package]] -name = "once_cell" -version = "1.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" - -[[package]] -name = "proc-macro2" -version = "1.0.63" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "prql-compiler" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c99b52154002ac7f286dd2293c2f8d4e30526c1d396b14deef5ada1deef3c9ff" -dependencies = [ - "anyhow", - "ariadne", - "chumsky", - "csv", - "enum-as-inner", - "itertools", - "lazy_static", - "log", - "once_cell", - "regex", - "semver", - "serde", - "serde_json", - "serde_yaml", - "sqlformat", - "sqlparser", - "strum", - "strum_macros", -] - -[[package]] -name = "psm" -version = "0.1.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" -dependencies = [ - "cc", -] - -[[package]] -name = "quote" -version = "1.0.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "regex" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89089e897c013b3deb627116ae56a6955a72b8bed395c9526af31c9fe528b484" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa250384981ea14565685dea16a9ccc4d1c541a13f82b9c168572264d1df8c56" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846" - -[[package]] -name = "rustc-demangle" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" - -[[package]] -name = "rustversion" -version = "1.0.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc31bd9b61a32c31f9650d18add92aa83a49ba979c143eefd27fe7177b05bd5f" - -[[package]] -name = "ryu" -version = "1.0.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9" - -[[package]] -name = "semver" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" -dependencies = [ - "serde", -] - -[[package]] -name = "serde" -version = "1.0.166" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d01b7404f9d441d3ad40e6a636a7782c377d2abdbe4fa2440e2edcc2f4f10db8" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.166" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dd83d6dde2b6b2d466e14d9d1acce8816dedee94f735eac6395808b3483c6d6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.23", -] - -[[package]] -name = "serde_json" -version = "1.0.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f1e14e89be7aa4c4b78bdbdc9eb5bf8517829a600ae8eaa39a6e1d960b5185c" -dependencies = [ - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "serde_yaml" -version = "0.9.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "452e67b9c20c37fa79df53201dc03839651086ed9bbe92b3ca585ca9fdaa7d85" -dependencies = [ - "indexmap", - "itoa", - "ryu", - "serde", - "unsafe-libyaml", -] - -[[package]] -name = "sqlformat" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e" -dependencies = [ - "itertools", - "nom", - "unicode_categories", -] - -[[package]] -name = "sqlparser" -version = "0.33.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a" -dependencies = [ - "log", - "serde", -] - -[[package]] -name = "stacker" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" -dependencies = [ - "cc", - "cfg-if", - "libc", - "psm", - "winapi", -] - -[[package]] -name = "strum" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" -dependencies = [ - "strum_macros", -] - -[[package]] -name = "strum_macros" -version = "0.24.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 1.0.109", -] - -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "syn" -version = "2.0.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "unicode-ident" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73" - -[[package]] -name = "unicode-width" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" - -[[package]] -name = "unicode_categories" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" - -[[package]] -name = "unsafe-libyaml" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1865806a559042e51ab5414598446a5871b561d21b6764f2eabb0dd481d880a6" - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "yansi" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 16a58a90dcf..fff2975cea4 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -8,6 +8,7 @@ import shutil import subprocess import time import sys +from glob import glob from pathlib import Path from typing import Any, Dict, List, Optional, Set, Tuple, Union @@ -31,6 +32,17 @@ TEMP_PATH = os.path.join(RUNNER_TEMP, "docker_images_check") ImagesDict = Dict[str, dict] +# workaround for mypy issue [1]: +# +# "Argument 1 to "map" has incompatible type overloaded function" [1] +# +# [1]: https://github.com/python/mypy/issues/9864 +# +# NOTE: simply lambda will do the trick as well, but pylint will not like it +def realpath(*args, **kwargs): + return os.path.realpath(*args, **kwargs) + + class DockerImage: def __init__( self, @@ -111,8 +123,23 @@ def get_changed_docker_images( changed_images = [] for dockerfile_dir, image_description in images_dict.items(): + source_dir = GITHUB_WORKSPACE.rstrip("/") + "/" + dockerfile_files = glob(f"{source_dir}/{dockerfile_dir}/**", recursive=True) + # resolve symlinks + dockerfile_files = list(map(realpath, dockerfile_files)) + # trim prefix to get relative path again, to match with files_changed + dockerfile_files = list(map(lambda x: x[len(source_dir) :], dockerfile_files)) + logging.info( + "Docker %s (source_dir=%s) build context for PR %s @ %s: %s", + dockerfile_dir, + source_dir, + pr_info.number, + pr_info.sha, + str(dockerfile_files), + ) + for f in files_changed: - if f.startswith(dockerfile_dir): + if f in dockerfile_files: name = image_description["name"] only_amd64 = image_description.get("only_amd64", False) logging.info( @@ -245,6 +272,8 @@ def build_and_push_one_image( cache_from = f"{cache_from} --cache-from type=registry,ref={image.repo}:{tag}" cmd = ( + # tar is requried to follow symlinks, since docker-build cannot do this + f"tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#{image.full_path.lstrip('/')}#./#' --dereference --create {image.full_path} | " "docker buildx build --builder default " f"--label build-url={GITHUB_RUN_URL} " f"{from_tag_arg}" @@ -254,7 +283,7 @@ def build_and_push_one_image( f"{cache_from} " f"--cache-to type=inline,mode=max " f"{push_arg}" - f"--progress plain {image.full_path}" + f"--progress plain -" ) logging.info("Docker command to run: %s", cmd) with TeePopen(cmd, build_log) as proc: diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index d5d27f73694..c679ab984ee 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -126,12 +126,13 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( + "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " "--build-arg FROM_TAG=version " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version --cache-from type=registry,ref=name:version " "--cache-from type=registry,ref=name:latest " - "--cache-to type=inline,mode=max --push --progress plain path", + "--cache-to type=inline,mode=max --push --progress plain -", mock_popen.call_args.args, ) self.assertTrue(result) @@ -143,12 +144,13 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( + "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " "--build-arg FROM_TAG=version2 " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " - "--cache-to type=inline,mode=max --progress plain path", + "--cache-to type=inline,mode=max --progress plain -", mock_popen.call_args.args, ) self.assertTrue(result) @@ -160,11 +162,12 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( + "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " - "--cache-to type=inline,mode=max --progress plain path", + "--cache-to type=inline,mode=max --progress plain -", mock_popen.call_args.args, ) self.assertFalse(result) @@ -178,13 +181,14 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( + "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " "--cache-from type=registry,ref=name:cached-version " "--cache-from type=registry,ref=name:another-cached " - "--cache-to type=inline,mode=max --progress plain path", + "--cache-to type=inline,mode=max --progress plain -", mock_popen.call_args.args, ) self.assertFalse(result) From dc7c66396223329021641372c9156261edce5f99 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Sat, 22 Jul 2023 23:44:11 +0200 Subject: [PATCH 1892/1997] Update comment in DatabaseCatalog.cpp --- src/Interpreters/DatabaseCatalog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index f9ed2c0d5ca..0d74e86a26d 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -344,7 +344,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( DatabasePtr database; { std::lock_guard lock{databases_mutex}; - // hasDatabase() to avod getDatabaseName() throwing exception if database is empty. + // Callers assume that this method doesn't throw the exceptions, but getDatabaseName() can if there is no database part. auto it = table_id.hasDatabase() ? databases.find(table_id.getDatabaseName()) : databases.end(); if (databases.end() == it) { From 12065d94c5e35c51e3a94c1919f1a38f4723d272 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Sat, 22 Jul 2023 23:59:02 +0200 Subject: [PATCH 1893/1997] Update comment DatabaseCatalog.cpp --- src/Interpreters/DatabaseCatalog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 0d74e86a26d..c8f332ae76d 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -344,7 +344,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( DatabasePtr database; { std::lock_guard lock{databases_mutex}; - // Callers assume that this method doesn't throw the exceptions, but getDatabaseName() can if there is no database part. + // Callers assume that this method doesn't throw exceptions, but getDatabaseName() will throw if there is no database part. auto it = table_id.hasDatabase() ? databases.find(table_id.getDatabaseName()) : databases.end(); if (databases.end() == it) { From ef0dca626142322fa5420eea8fab491bb53c4ac2 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Sun, 23 Jul 2023 00:37:34 +0200 Subject: [PATCH 1894/1997] fix style --- src/Common/OptimizedRegularExpression.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index 918ebd75fc0..e636b0b987d 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -433,7 +433,7 @@ try for (auto & lit : alternative_literals) alternatives.push_back(std::move(lit.literal)); } -catch(...) +catch (...) { required_substring = ""; is_trivial = false; From 9bd8bdca98d21605f10d172b76c80951f990d965 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Sun, 23 Jul 2023 01:14:26 +0200 Subject: [PATCH 1895/1997] Better error message in case of empty database name --- src/Interpreters/DatabaseCatalog.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index c8f332ae76d..f54b0e0ab3a 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -343,9 +343,17 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( DatabasePtr database; { - std::lock_guard lock{databases_mutex}; // Callers assume that this method doesn't throw exceptions, but getDatabaseName() will throw if there is no database part. - auto it = table_id.hasDatabase() ? databases.find(table_id.getDatabaseName()) : databases.end(); + // So, fail early and gracefully... + if (!table_id.hasDatabase()) + { + if (exception) + exception->emplace(Exception(ErrorCodes::UNKNOWN_DATABASE, "Empty database name")); + return {}; + } + + std::lock_guard lock{databases_mutex}; + auto it = databases.find(table_id.getDatabaseName()); if (databases.end() == it) { if (exception) From 00d6f2ee08a3e442363a078b322adab7b6988f91 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 04:56:47 +0200 Subject: [PATCH 1896/1997] Use incbin for resources, part 1 --- .gitmodules | 3 + contrib/CMakeLists.txt | 2 +- contrib/incbin | 1 + contrib/incbin-cmake/CMakeLists.txt | 4 + contrib/nlp-data-cmake/CMakeLists.txt | 15 -- programs/install/CMakeLists.txt | 3 + programs/install/Install.cpp | 13 +- programs/keeper/CMakeLists.txt | 15 -- programs/keeper/Keeper.cpp | 6 +- programs/server/CMakeLists.txt | 12 +- programs/server/Server.cpp | 11 +- programs/server/resources.cpp | 0 src/CMakeLists.txt | 6 +- src/Common/CMakeLists.txt | 2 +- src/Common/Config/ConfigProcessor.cpp | 33 ++-- src/Common/Config/ConfigProcessor.h | 3 + src/Common/FrequencyHolder.cpp | 181 ++++++++++++++++++ src/Common/FrequencyHolder.h | 170 +--------------- src/Daemon/BaseDaemon.cpp | 1 - src/Server/WebUIRequestHandler.cpp | 14 +- src/Storages/System/CMakeLists.txt | 12 +- .../System/attachInformationSchemaTables.cpp | 24 ++- 22 files changed, 268 insertions(+), 263 deletions(-) create mode 160000 contrib/incbin create mode 100644 contrib/incbin-cmake/CMakeLists.txt delete mode 100644 contrib/nlp-data-cmake/CMakeLists.txt create mode 100644 programs/server/resources.cpp create mode 100644 src/Common/FrequencyHolder.cpp diff --git a/.gitmodules b/.gitmodules index ba71a8ae3a7..30085fb8dd4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -340,3 +340,6 @@ [submodule "contrib/c-ares"] path = contrib/c-ares url = https://github.com/c-ares/c-ares.git +[submodule "contrib/incbin"] + path = contrib/incbin + url = https://github.com/graphitemaster/incbin.git diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 0f68c0cbc7c..fdf6e60e58f 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -164,13 +164,13 @@ add_contrib (libpq-cmake libpq) add_contrib (nuraft-cmake NuRaft) add_contrib (fast_float-cmake fast_float) add_contrib (datasketches-cpp-cmake datasketches-cpp) +add_contrib (incbin-cmake incbin) option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES}) if (ENABLE_NLP) add_contrib (libstemmer-c-cmake libstemmer_c) add_contrib (wordnet-blast-cmake wordnet-blast) add_contrib (lemmagen-c-cmake lemmagen-c) - add_contrib (nlp-data-cmake nlp-data) add_contrib (cld2-cmake cld2) endif() diff --git a/contrib/incbin b/contrib/incbin new file mode 160000 index 00000000000..6e576cae5ab --- /dev/null +++ b/contrib/incbin @@ -0,0 +1 @@ +Subproject commit 6e576cae5ab5810f25e2631f2e0b80cbe7dc8cbf diff --git a/contrib/incbin-cmake/CMakeLists.txt b/contrib/incbin-cmake/CMakeLists.txt new file mode 100644 index 00000000000..e64ebc99c73 --- /dev/null +++ b/contrib/incbin-cmake/CMakeLists.txt @@ -0,0 +1,4 @@ +set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin") +add_library(_incbin INTERFACE) +target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR}) +add_library(ch_contrib::incbin ALIAS _incbin) diff --git a/contrib/nlp-data-cmake/CMakeLists.txt b/contrib/nlp-data-cmake/CMakeLists.txt deleted file mode 100644 index 5380269c479..00000000000 --- a/contrib/nlp-data-cmake/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - -set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nlp-data") - -add_library (_nlp_data INTERFACE) - -clickhouse_embed_binaries( - TARGET nlp_dictionaries - RESOURCE_DIR "${LIBRARY_DIR}" - RESOURCES charset.zst tonality_ru.zst programming.zst -) - -add_dependencies(_nlp_data nlp_dictionaries) -target_link_libraries(_nlp_data INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") -add_library(ch_contrib::nlp_data ALIAS _nlp_data) diff --git a/programs/install/CMakeLists.txt b/programs/install/CMakeLists.txt index c3f4d96d631..f3f562bab7c 100644 --- a/programs/install/CMakeLists.txt +++ b/programs/install/CMakeLists.txt @@ -10,3 +10,6 @@ set (CLICKHOUSE_INSTALL_LINK ) clickhouse_program_add_library(install) + +# For incbin +target_include_directories(clickhouse-install-lib PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../server") diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index d83e189f7ef..da2c95af62c 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -20,10 +20,7 @@ #include #include #include -#include -#include #include -#include #include #include #include @@ -35,6 +32,12 @@ #include +#include + +/// Embedded configuration files used inside the install program +INCBIN(resource_config_xml, "config.xml"); +INCBIN(resource_users_xml, "users.xml"); + /** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary. * It also allows to avoid dependency on systemd, upstart, SysV init. @@ -560,7 +563,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) if (!fs::exists(main_config_file)) { - std::string_view main_config_content = getResource("config.xml"); + std::string_view main_config_content(reinterpret_cast(gresource_config_xmlData), gresource_config_xmlSize); if (main_config_content.empty()) { fmt::print("There is no default config.xml, you have to download it and place to {}.\n", main_config_file.string()); @@ -672,7 +675,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) if (!fs::exists(users_config_file)) { - std::string_view users_config_content = getResource("users.xml"); + std::string_view users_config_content(reinterpret_cast(gresource_users_xmlData), gresource_users_xmlSize); if (users_config_content.empty()) { fmt::print("There is no default users.xml, you have to download it and place to {}.\n", users_config_file.string()); diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 940e6848597..317e35959aa 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -1,16 +1,3 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - -if (OS_LINUX) - set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") - # for some reason INTERFACE linkage doesn't work for standalone binary - set (LINK_RESOURCE_LIB_STANDALONE_KEEPER "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") -endif () - -clickhouse_embed_binaries( - TARGET clickhouse_keeper_configs - RESOURCES keeper_config.xml keeper_embedded.xml -) - set(CLICKHOUSE_KEEPER_SOURCES Keeper.cpp ) @@ -29,7 +16,6 @@ set (CLICKHOUSE_KEEPER_LINK clickhouse_program_add(keeper) install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper) -add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs) if (BUILD_STANDALONE_KEEPER) # Straight list of all required sources @@ -215,7 +201,6 @@ if (BUILD_STANDALONE_KEEPER) ${LINK_RESOURCE_LIB_STANDALONE_KEEPER} ) - add_dependencies(clickhouse-keeper clickhouse_keeper_configs) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) if (SPLIT_DEBUG_SYMBOLS) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 6034d63a016..a38467c3369 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -457,8 +457,10 @@ try const std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); std::vector extra_paths = {include_from_path}; - if (!cert_path.empty()) extra_paths.emplace_back(cert_path); - if (!key_path.empty()) extra_paths.emplace_back(key_path); + if (!cert_path.empty()) + extra_paths.emplace_back(cert_path); + if (!key_path.empty()) + extra_paths.emplace_back(key_path); /// ConfigReloader have to strict parameters which are redundant in our case auto main_config_reloader = std::make_unique( diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt index 855973d10e1..e008e65acf6 100644 --- a/programs/server/CMakeLists.txt +++ b/programs/server/CMakeLists.txt @@ -1,12 +1,8 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - set(CLICKHOUSE_SERVER_SOURCES MetricsTransmitter.cpp Server.cpp ) -set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") - set (CLICKHOUSE_SERVER_LINK PRIVATE clickhouse_aggregate_functions @@ -33,10 +29,4 @@ endif() clickhouse_program_add(server) -install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse) - -clickhouse_embed_binaries( - TARGET clickhouse_server_configs - RESOURCES config.xml users.xml embedded.xml play.html dashboard.html js/uplot.js -) -add_dependencies(clickhouse-server-lib clickhouse_server_configs) +target_include_directories(clickhouse-server-lib PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 33fdcc9c1a8..229a169dc1e 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -128,6 +128,10 @@ # include #endif +#include +/// A minimal file used when the server is run without installation +INCBIN(resource_embedded_xml, "embedded.xml"); + namespace CurrentMetrics { extern const Metric Revision; @@ -393,6 +397,7 @@ int Server::run() void Server::initialize(Poco::Util::Application & self) { + ConfigProcessor::registerEmbeddedConfig("config.xml", std::string_view(reinterpret_cast(gresource_embedded_xmlData), gresource_embedded_xmlSize)); BaseDaemon::initialize(self); logger().information("starting up"); @@ -1105,8 +1110,10 @@ try const std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); std::vector extra_paths = {include_from_path}; - if (!cert_path.empty()) extra_paths.emplace_back(cert_path); - if (!key_path.empty()) extra_paths.emplace_back(key_path); + if (!cert_path.empty()) + extra_paths.emplace_back(cert_path); + if (!key_path.empty()) + extra_paths.emplace_back(key_path); auto main_config_reloader = std::make_unique( config_path, diff --git a/programs/server/resources.cpp b/programs/server/resources.cpp new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f870993f080..fda8bafde59 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -210,7 +210,7 @@ if (TARGET ch_contrib::jemalloc) target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc) endif() -target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash) +target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash ch_contrib::incbin) add_subdirectory(Access/Common) add_subdirectory(Common/ZooKeeper) @@ -296,7 +296,7 @@ macro (dbms_target_include_directories) endforeach () endmacro () -dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") +dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src" "${ClickHouse_SOURCE_DIR}/programs/server") target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") if (TARGET ch_contrib::llvm) @@ -561,7 +561,7 @@ if (ENABLE_NLP) dbms_target_link_libraries (PUBLIC ch_contrib::stemmer) dbms_target_link_libraries (PUBLIC ch_contrib::wnb) dbms_target_link_libraries (PUBLIC ch_contrib::lemmagen) - dbms_target_link_libraries (PUBLIC ch_contrib::nlp_data) + target_include_directories(clickhouse_common_io PUBLIC ${CMAKE_SOURCE_DIR}/contrib/nlp-data) endif() if (TARGET ch_contrib::ulid) diff --git a/src/Common/CMakeLists.txt b/src/Common/CMakeLists.txt index e527b3dec43..b83c8431f0a 100644 --- a/src/Common/CMakeLists.txt +++ b/src/Common/CMakeLists.txt @@ -9,5 +9,5 @@ if (ENABLE_EXAMPLES) endif() if (ENABLE_MYSQL) - add_subdirectory (mysqlxx) + add_subdirectory(mysqlxx) endif () diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 5bbc8eae0de..c3a8f69cf3f 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -83,6 +83,13 @@ ConfigProcessor::~ConfigProcessor() Poco::Logger::destroy("ConfigProcessor"); } +static std::unordered_map embedded_configs; + +void ConfigProcessor::registerEmbeddedConfig(std::string name, std::string_view content) +{ + embedded_configs[name] = content; +} + /// Vector containing the name of the element and a sorted list of attribute names and values /// (except "remove" and "replace" attributes). @@ -281,15 +288,15 @@ void ConfigProcessor::doIncludesRecursive( { std::string value = node->nodeValue(); - bool replace_occured = false; + bool replace_occurred = false; size_t pos; while ((pos = value.find(substitution.first)) != std::string::npos) { value.replace(pos, substitution.first.length(), substitution.second); - replace_occured = true; + replace_occurred = true; } - if (replace_occured) + if (replace_occurred) node->setNodeValue(value); } } @@ -528,26 +535,14 @@ XMLDocumentPtr ConfigProcessor::processConfig( } else { - /// These embedded files added during build with some cmake magic. - /// Look at the end of programs/server/CMakeLists.txt. - std::string embedded_name; - if (path == "config.xml") - embedded_name = "embedded.xml"; - - if (path == "keeper_config.xml") - embedded_name = "keeper_embedded.xml"; - - /// When we can use config embedded in binary. - if (!embedded_name.empty()) + /// When we can use a config embedded in the binary. + if (auto it = embedded_configs.find(path); it != embedded_configs.end()) { - auto resource = getResource(embedded_name); - if (resource.empty()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path); LOG_DEBUG(log, "There is no file '{}', will use embedded config.", path); - config = dom_parser.parseMemory(resource.data(), resource.size()); + config = dom_parser.parseMemory(it->second.data(), it->second.size()); } else - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist", path); + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path); } std::vector contributing_files; diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index 0ca3e46db88..eefe65ef06c 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -65,6 +65,9 @@ public: zkutil::ZooKeeperNodeCache * zk_node_cache = nullptr, const zkutil::EventPtr & zk_changed_event = nullptr); + /// These configurations will be used if there is no configuration file. + static void registerEmbeddedConfig(std::string name, std::string_view content); + /// loadConfig* functions apply processConfig and create Poco::Util::XMLConfiguration. /// The resulting XML document is saved into a file with the name diff --git a/src/Common/FrequencyHolder.cpp b/src/Common/FrequencyHolder.cpp new file mode 100644 index 00000000000..3b755cacacb --- /dev/null +++ b/src/Common/FrequencyHolder.cpp @@ -0,0 +1,181 @@ +#include + +#include + +/// Embedded SQL definitions +INCBIN(resource_charset_zst, "charset.zst"); +INCBIN(resource_tonality_ru_zst, "tonality_ru.zst"); +INCBIN(resource_programming_zst, "programming.zst"); + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int FILE_DOESNT_EXIST; +} + + +FrequencyHolder & FrequencyHolder::getInstance() +{ + static FrequencyHolder instance; + return instance; +} + +FrequencyHolder::FrequencyHolder() +{ + loadEmotionalDict(); + loadEncodingsFrequency(); + loadProgrammingFrequency(); +} + +void FrequencyHolder::loadEncodingsFrequency() +{ + Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency"); + + LOG_TRACE(log, "Loading embedded charset frequencies"); + + std::string_view resource(reinterpret_cast(gresource_charset_zstData), gresource_charset_zstSize); + if (resource.empty()) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies"); + + String line; + UInt16 bigram; + Float64 frequency; + String charset_name; + + auto buf = std::make_unique(resource.data(), resource.size()); + ZstdInflatingReadBuffer in(std::move(buf)); + + while (!in.eof()) + { + readString(line, in); + in.ignore(); + + if (line.empty()) + continue; + + ReadBufferFromString buf_line(line); + + // Start loading a new charset + if (line.starts_with("// ")) + { + // Skip "// " + buf_line.ignore(3); + readString(charset_name, buf_line); + + /* In our dictionary we have lines with form: _ + * If we need to find language of data, we return + * If we need to find charset of data, we return . + */ + size_t sep = charset_name.find('_'); + + Encoding enc; + enc.lang = charset_name.substr(0, sep); + enc.name = charset_name.substr(sep + 1); + encodings_freq.push_back(std::move(enc)); + } + else + { + readIntText(bigram, buf_line); + buf_line.ignore(); + readFloatText(frequency, buf_line); + + encodings_freq.back().map[bigram] = frequency; + } + } + LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size()); +} + +void FrequencyHolder::loadEmotionalDict() +{ + Poco::Logger * log = &Poco::Logger::get("EmotionalDict"); + LOG_TRACE(log, "Loading embedded emotional dictionary"); + + std::string_view resource(reinterpret_cast(gresource_tonality_ru_zstData), gresource_tonality_ru_zstSize); + if (resource.empty()) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary"); + + String line; + String word; + Float64 tonality; + size_t count = 0; + + auto buf = std::make_unique(resource.data(), resource.size()); + ZstdInflatingReadBuffer in(std::move(buf)); + + while (!in.eof()) + { + readString(line, in); + in.ignore(); + + if (line.empty()) + continue; + + ReadBufferFromString buf_line(line); + + readStringUntilWhitespace(word, buf_line); + buf_line.ignore(); + readFloatText(tonality, buf_line); + + StringRef ref{string_pool.insert(word.data(), word.size()), word.size()}; + emotional_dict[ref] = tonality; + ++count; + } + LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count)); +} + +void FrequencyHolder::loadProgrammingFrequency() +{ + Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency"); + + LOG_TRACE(log, "Loading embedded programming languages frequencies loading"); + + std::string_view resource(reinterpret_cast(gresource_programming_zstData), gresource_programming_zstSize); + if (resource.empty()) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies"); + + String line; + String bigram; + Float64 frequency; + String programming_language; + + auto buf = std::make_unique(resource.data(), resource.size()); + ZstdInflatingReadBuffer in(std::move(buf)); + + while (!in.eof()) + { + readString(line, in); + in.ignore(); + + if (line.empty()) + continue; + + ReadBufferFromString buf_line(line); + + // Start loading a new language + if (line.starts_with("// ")) + { + // Skip "// " + buf_line.ignore(3); + readString(programming_language, buf_line); + + Language lang; + lang.name = programming_language; + programming_freq.push_back(std::move(lang)); + } + else + { + readStringUntilWhitespace(bigram, buf_line); + buf_line.ignore(); + readFloatText(frequency, buf_line); + + StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()}; + programming_freq.back().map[ref] = frequency; + } + } + LOG_TRACE(log, "Programming languages frequencies was added"); +} + +} diff --git a/src/Common/FrequencyHolder.h b/src/Common/FrequencyHolder.h index 74098598441..270e4dbbd2a 100644 --- a/src/Common/FrequencyHolder.h +++ b/src/Common/FrequencyHolder.h @@ -7,7 +7,6 @@ #include #include -#include #include #include #include @@ -20,11 +19,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int FILE_DOESNT_EXIST; -} - /// FrequencyHolder class is responsible for storing and loading dictionaries /// needed for text classification functions: /// @@ -56,11 +50,7 @@ public: using EncodingMap = HashMap; using EncodingContainer = std::vector; - static FrequencyHolder & getInstance() - { - static FrequencyHolder instance; - return instance; - } + static FrequencyHolder & getInstance(); const Map & getEmotionalDict() const { @@ -78,161 +68,11 @@ public: } private: + FrequencyHolder(); - FrequencyHolder() - { - loadEmotionalDict(); - loadEncodingsFrequency(); - loadProgrammingFrequency(); - } - - void loadEncodingsFrequency() - { - Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency"); - - LOG_TRACE(log, "Loading embedded charset frequencies"); - - auto resource = getResource("charset.zst"); - if (resource.empty()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies"); - - String line; - UInt16 bigram; - Float64 frequency; - String charset_name; - - auto buf = std::make_unique(resource.data(), resource.size()); - ZstdInflatingReadBuffer in(std::move(buf)); - - while (!in.eof()) - { - readString(line, in); - in.ignore(); - - if (line.empty()) - continue; - - ReadBufferFromString buf_line(line); - - // Start loading a new charset - if (line.starts_with("// ")) - { - // Skip "// " - buf_line.ignore(3); - readString(charset_name, buf_line); - - /* In our dictionary we have lines with form: _ - * If we need to find language of data, we return - * If we need to find charset of data, we return . - */ - size_t sep = charset_name.find('_'); - - Encoding enc; - enc.lang = charset_name.substr(0, sep); - enc.name = charset_name.substr(sep + 1); - encodings_freq.push_back(std::move(enc)); - } - else - { - readIntText(bigram, buf_line); - buf_line.ignore(); - readFloatText(frequency, buf_line); - - encodings_freq.back().map[bigram] = frequency; - } - } - LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size()); - } - - void loadEmotionalDict() - { - Poco::Logger * log = &Poco::Logger::get("EmotionalDict"); - LOG_TRACE(log, "Loading embedded emotional dictionary"); - - auto resource = getResource("tonality_ru.zst"); - if (resource.empty()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary"); - - String line; - String word; - Float64 tonality; - size_t count = 0; - - auto buf = std::make_unique(resource.data(), resource.size()); - ZstdInflatingReadBuffer in(std::move(buf)); - - while (!in.eof()) - { - readString(line, in); - in.ignore(); - - if (line.empty()) - continue; - - ReadBufferFromString buf_line(line); - - readStringUntilWhitespace(word, buf_line); - buf_line.ignore(); - readFloatText(tonality, buf_line); - - StringRef ref{string_pool.insert(word.data(), word.size()), word.size()}; - emotional_dict[ref] = tonality; - ++count; - } - LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count)); - } - - void loadProgrammingFrequency() - { - Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency"); - - LOG_TRACE(log, "Loading embedded programming languages frequencies loading"); - - auto resource = getResource("programming.zst"); - if (resource.empty()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies"); - - String line; - String bigram; - Float64 frequency; - String programming_language; - - auto buf = std::make_unique(resource.data(), resource.size()); - ZstdInflatingReadBuffer in(std::move(buf)); - - while (!in.eof()) - { - readString(line, in); - in.ignore(); - - if (line.empty()) - continue; - - ReadBufferFromString buf_line(line); - - // Start loading a new language - if (line.starts_with("// ")) - { - // Skip "// " - buf_line.ignore(3); - readString(programming_language, buf_line); - - Language lang; - lang.name = programming_language; - programming_freq.push_back(std::move(lang)); - } - else - { - readStringUntilWhitespace(bigram, buf_line); - buf_line.ignore(); - readFloatText(frequency, buf_line); - - StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()}; - programming_freq.back().map[ref] = frequency; - } - } - LOG_TRACE(log, "Programming languages frequencies was added"); - } + void loadEncodingsFrequency(); + void loadEmotionalDict(); + void loadProgrammingFrequency(); Arena string_pool; diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 3852ec5ada5..f61ca054b2a 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -38,7 +38,6 @@ #include #include -#include #include #include #include diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp index 3997e0f19b6..cb9e8935d8c 100644 --- a/src/Server/WebUIRequestHandler.cpp +++ b/src/Server/WebUIRequestHandler.cpp @@ -6,10 +6,16 @@ #include #include -#include #include +#include + +/// Embedded HTML pages +INCBIN(resource_play_html, "play.html"); +INCBIN(resource_dashboard_html, "dashboard.html"); +INCBIN(resource_uplot_js, "js/uplot.js"); + namespace DB { @@ -34,13 +40,13 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR if (request.getURI().starts_with("/play")) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - *response.send() << getResource("play.html"); + *response.send() << std::string_view(reinterpret_cast(gresource_play_htmlData), gresource_play_htmlSize); } else if (request.getURI().starts_with("/dashboard")) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - std::string html(getResource("dashboard.html")); + std::string html(reinterpret_cast(gresource_dashboard_htmlData), gresource_dashboard_htmlSize); /// Replace a link to external JavaScript file to embedded file. /// This allows to open the HTML without running a server and to host it on server. @@ -55,7 +61,7 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR else if (request.getURI() == "/js/uplot.js") { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - *response.send() << getResource("js/uplot.js"); + *response.send() << std::string_view(reinterpret_cast(gresource_uplot_jsData), gresource_uplot_jsSize); } else { diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index 1d2a3de5101..6b7d1739e33 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -43,18 +43,9 @@ list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC}) # Overlength strings set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w) -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) -clickhouse_embed_binaries( - TARGET information_schema_metadata - RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/InformationSchema/" - RESOURCES schemata.sql tables.sql views.sql columns.sql -) - list (SORT storages_system_sources) # Reproducible build add_library(clickhouse_storages_system ${storages_system_sources}) -add_dependencies(clickhouse_storages_system information_schema_metadata) - target_link_libraries(clickhouse_storages_system PRIVATE dbms common @@ -62,5 +53,6 @@ target_link_libraries(clickhouse_storages_system PRIVATE clickhouse_common_zookeeper clickhouse_parsers Poco::JSON - INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}" ) + +target_include_directories(clickhouse_storages_system PRIVATE InformationSchema) diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index 61a91685324..bfc5c8c64e2 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -3,14 +3,21 @@ #include #include #include -#include +#include + +/// Embedded SQL definitions +INCBIN(resource_schemata_sql, "schemata.sql"); +INCBIN(resource_tables_sql, "tables.sql"); +INCBIN(resource_views_sql, "views.sql"); +INCBIN(resource_columns_sql, "columns.sql"); + namespace DB { /// View structures are taken from http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt -static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name) +static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name, std::string_view query) { try { @@ -21,12 +28,11 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE; String metadata_resource_name = view_name + ".sql"; - auto attach_query = getResource(metadata_resource_name); - if (attach_query.empty()) + if (query.empty()) return; ParserCreateQuery parser; - ASTPtr ast = parseQuery(parser, attach_query.data(), attach_query.data() + attach_query.size(), + ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "Attach query from embedded resource " + metadata_resource_name, DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH); @@ -50,10 +56,10 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d void attachInformationSchema(ContextMutablePtr context, IDatabase & information_schema_database) { - createInformationSchemaView(context, information_schema_database, "schemata"); - createInformationSchemaView(context, information_schema_database, "tables"); - createInformationSchemaView(context, information_schema_database, "views"); - createInformationSchemaView(context, information_schema_database, "columns"); + createInformationSchemaView(context, information_schema_database, "schemata", std::string_view(reinterpret_cast(gresource_schemata_sqlData), gresource_schemata_sqlSize)); + createInformationSchemaView(context, information_schema_database, "tables", std::string_view(reinterpret_cast(gresource_tables_sqlData), gresource_tables_sqlSize)); + createInformationSchemaView(context, information_schema_database, "views", std::string_view(reinterpret_cast(gresource_views_sqlData), gresource_views_sqlSize)); + createInformationSchemaView(context, information_schema_database, "columns", std::string_view(reinterpret_cast(gresource_columns_sqlData), gresource_columns_sqlSize)); } } From 4170d1458bdbccafe2f8cb2c671ee044b3efe9ba Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 05:25:14 +0200 Subject: [PATCH 1897/1997] Use incbin for resources, part 2 --- cmake/embed_binary.cmake | 58 ------------------ contrib/cctz-cmake/CMakeLists.txt | 45 ++++++++------ src/Common/Config/ConfigProcessor.cpp | 1 - src/Common/DateLUTImpl.cpp | 17 ++++-- src/Common/SymbolIndex.cpp | 61 +++---------------- src/Common/SymbolIndex.h | 30 +-------- src/Common/getResource.cpp | 52 ---------------- src/Common/getResource.h | 7 --- src/Common/tests/gtest_DateLUTImpl.cpp | 14 ++--- .../System/StorageSystemTimeZones.cpp | 7 ++- 10 files changed, 58 insertions(+), 234 deletions(-) delete mode 100644 cmake/embed_binary.cmake delete mode 100644 src/Common/getResource.cpp delete mode 100644 src/Common/getResource.h diff --git a/cmake/embed_binary.cmake b/cmake/embed_binary.cmake deleted file mode 100644 index e5428c24939..00000000000 --- a/cmake/embed_binary.cmake +++ /dev/null @@ -1,58 +0,0 @@ -# Embed a set of resource files into a resulting object file. -# -# Signature: `clickhouse_embed_binaries(TARGET RESOURCE_DIR RESOURCES ...) -# -# This will generate a static library target named ``, which contains the contents of -# each `` file. The files should be located in ``. defaults to -# ${CMAKE_CURRENT_SOURCE_DIR}, and the resources may not be empty. -# -# Each resource will result in three symbols in the final archive, based on the name ``. -# These are: -# 1. `_binary__start`: Points to the start of the binary data from ``. -# 2. `_binary__end`: Points to the end of the binary data from ``. -# 2. `_binary__size`: Points to the size of the binary data from ``. -# -# `` is a normalized name derived from ``, by replacing the characters "./-" with -# the character "_", and the character "+" with "_PLUS_". This scheme is similar to those generated -# by `ld -r -b binary`, and matches the expectations in `./base/common/getResource.cpp`. -macro(clickhouse_embed_binaries) - set(one_value_args TARGET RESOURCE_DIR) - set(resources RESOURCES) - cmake_parse_arguments(EMBED "" "${one_value_args}" ${resources} ${ARGN}) - - if (NOT DEFINED EMBED_TARGET) - message(FATAL_ERROR "A target name must be provided for embedding binary resources into") - endif() - - if (NOT DEFINED EMBED_RESOURCE_DIR) - set(EMBED_RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") - endif() - - list(LENGTH EMBED_RESOURCES N_RESOURCES) - if (N_RESOURCES LESS 1) - message(FATAL_ERROR "The list of binary resources to embed may not be empty") - endif() - - add_library("${EMBED_TARGET}" STATIC) - set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C) - - set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in") - - foreach(RESOURCE_FILE ${EMBED_RESOURCES}) - set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S") - set(BINARY_FILE_NAME "${RESOURCE_FILE}") - - # Normalize the name of the resource. - string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex - string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}") - - # Generate the configured assembly file in the output directory. - configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY) - - # Set the include directory for relative paths specified for `.incbin` directive. - set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" APPEND PROPERTY INCLUDE_DIRECTORIES "${EMBED_RESOURCE_DIR}") - - target_sources("${EMBED_TARGET}" PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}") - set_target_properties("${EMBED_TARGET}" PROPERTIES OBJECT_DEPENDS "${RESOURCE_FILE}") - endforeach() -endmacro() diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index 10070fbd949..8aa3c7886db 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -1,4 +1,3 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz") set (SRCS @@ -23,12 +22,10 @@ if (OS_FREEBSD) endif () # Related to time_zones table: -# StorageSystemTimeZones.generated.cpp is autogenerated each time during a build -# data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX -# as the library that's built using embedded tzdata is also specific to OS_LINUX -set(SYSTEM_STORAGE_TZ_FILE "${PROJECT_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp") +# TimeZones.generated.cpp is autogenerated each time during a build +set(TIMEZONES_FILE "${CMAKE_CURRENT_BINARY_DIR}/TimeZones.generated.cpp") # remove existing copies so that its generated fresh on each build. -file(REMOVE ${SYSTEM_STORAGE_TZ_FILE}) +file(REMOVE ${TIMEZONES_FILE}) # get the list of timezones from tzdata shipped with cctz set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo") @@ -36,28 +33,36 @@ file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION) set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}") message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}") -set(TIMEZONE_RESOURCE_FILES) - # each file in that dir (except of tab and localtime) store the info about timezone execute_process(COMMAND bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -" OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE TIMEZONES) -file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n") -file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" ) +file(APPEND ${TIMEZONES_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n") +file(APPEND ${TIMEZONES_FILE} "#include \n") +set (COUNTER 1) foreach(TIMEZONE ${TIMEZONES}) - file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n") - list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}") + file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TIMEZONE}\");\n") + MATH(EXPR COUNTER "${COUNTER}+1") endforeach(TIMEZONE) -file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n") -clickhouse_embed_binaries( - TARGET tzdata - RESOURCE_DIR "${TZDIR}" - RESOURCES ${TIMEZONE_RESOURCE_FILES} -) -add_dependencies(_cctz tzdata) -target_link_libraries(_cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") + +file(APPEND ${TIMEZONES_FILE} "#include \n") +file(APPEND ${TIMEZONES_FILE} "struct TimeZone { const char * name; const unsigned char * data; size_t size; };\n") +file(APPEND ${TIMEZONES_FILE} "TimeZone auto_time_zones[] {\n" ) + +set (COUNTER 1) +foreach(TIMEZONE ${TIMEZONES}) + file(APPEND ${TIMEZONES_FILE} " {\"${TIMEZONE}\", gresource_timezone${COUNTER}Data, gresource_timezone${COUNTER}Size},\n") + MATH(EXPR COUNTER "${COUNTER}+1") +endforeach(TIMEZONE) + +file(APPEND ${TIMEZONES_FILE} " {nullptr, nullptr, 0}};\n") + +add_library (tzdata ${TIMEZONES_FILE}) +target_link_libraries(tzdata ch_contrib::incbin) +target_include_directories(tzdata PRIVATE ${TZDIR}) +target_link_libraries(_cctz tzdata) add_library(ch_contrib::cctz ALIAS _cctz) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index c3a8f69cf3f..bda181eceeb 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Common/DateLUTImpl.cpp b/src/Common/DateLUTImpl.cpp index 8146b35cc5f..3619462e79b 100644 --- a/src/Common/DateLUTImpl.cpp +++ b/src/Common/DateLUTImpl.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include @@ -13,6 +12,10 @@ #include +/// Embedded timezones. +struct TimeZone { const char * name; const unsigned char * data; size_t size; }; +extern TimeZone auto_time_zones[]; + namespace { @@ -249,9 +252,15 @@ namespace cctz_extension const std::string & name, const std::function(const std::string & name)> & fallback) { - std::string_view resource = getResource(name); - if (!resource.empty()) - return std::make_unique(resource.data(), resource.size()); + const TimeZone * timezone = auto_time_zones; + while (timezone->name != nullptr) + { + if (timezone->name == name) + break; + ++timezone; + } + if (timezone->size) + return std::make_unique(reinterpret_cast(timezone->data), timezone->size); return fallback(name); } diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp index cb02bb3ff75..ac406538033 100644 --- a/src/Common/SymbolIndex.cpp +++ b/src/Common/SymbolIndex.cpp @@ -87,50 +87,13 @@ namespace /// https://stackoverflow.com/questions/32088140/multiple-string-tables-in-elf-object -void updateResources(ElfW(Addr) base_address, std::string_view object_name, std::string_view name, const void * address, SymbolIndex::Resources & resources) -{ - const char * char_address = static_cast(address); - - if (name.starts_with("_binary_") || name.starts_with("binary_")) - { - if (name.ends_with("_start")) - { - name = name.substr((name[0] == '_') + strlen("binary_")); - name = name.substr(0, name.size() - strlen("_start")); - - auto & resource = resources[name]; - if (!resource.base_address || resource.base_address == base_address) - { - resource.base_address = base_address; - resource.start = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor) - resource.object_name = object_name; - } - } - if (name.ends_with("_end")) - { - name = name.substr((name[0] == '_') + strlen("binary_")); - name = name.substr(0, name.size() - strlen("_end")); - - auto & resource = resources[name]; - if (!resource.base_address || resource.base_address == base_address) - { - resource.base_address = base_address; - resource.end = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor) - resource.object_name = object_name; - } - } - } -} - - /// Based on the code of musl-libc and the answer of Kanalpiroge on /// https://stackoverflow.com/questions/15779185/list-all-the-functions-symbols-on-the-fly-in-c-code-on-a-linux-architecture /// It does not extract all the symbols (but only public - exported and used for dynamic linking), /// but will work if we cannot find or parse ELF files. void collectSymbolsFromProgramHeaders( dl_phdr_info * info, - std::vector & symbols, - SymbolIndex::Resources & resources) + std::vector & symbols) { /* Iterate over all headers of the current shared lib * (first call is for the executable itself) @@ -248,9 +211,6 @@ void collectSymbolsFromProgramHeaders( /// We are not interested in empty symbols. if (elf_sym[sym_index].st_size) symbols.push_back(symbol); - - /// But resources can be represented by a pair of empty symbols (indicating their boundaries). - updateResources(base_address, info->dlpi_name, symbol.name, symbol.address_begin, resources); } break; @@ -281,8 +241,7 @@ void collectSymbolsFromELFSymbolTable( const Elf & elf, const Elf::Section & symbol_table, const Elf::Section & string_table, - std::vector & symbols, - SymbolIndex::Resources & resources) + std::vector & symbols) { /// Iterate symbol table. const ElfSym * symbol_table_entry = reinterpret_cast(symbol_table.begin()); @@ -312,8 +271,6 @@ void collectSymbolsFromELFSymbolTable( if (symbol_table_entry->st_size) symbols.push_back(symbol); - - updateResources(info->dlpi_addr, info->dlpi_name, symbol.name, symbol.address_begin, resources); } } @@ -323,8 +280,7 @@ bool searchAndCollectSymbolsFromELFSymbolTable( const Elf & elf, unsigned section_header_type, const char * string_table_name, - std::vector & symbols, - SymbolIndex::Resources & resources) + std::vector & symbols) { std::optional symbol_table; std::optional string_table; @@ -342,7 +298,7 @@ bool searchAndCollectSymbolsFromELFSymbolTable( return false; } - collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols, resources); + collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols); return true; } @@ -351,7 +307,6 @@ void collectSymbolsFromELF( dl_phdr_info * info, std::vector & symbols, std::vector & objects, - SymbolIndex::Resources & resources, String & build_id) { String object_name; @@ -462,11 +417,11 @@ void collectSymbolsFromELF( object.name = object_name; objects.push_back(std::move(object)); - searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols, resources); + searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols); /// Unneeded if they were parsed from "program headers" of loaded objects. #if defined USE_MUSL - searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols, resources); + searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols); #endif } @@ -479,8 +434,8 @@ int collectSymbols(dl_phdr_info * info, size_t, void * data_ptr) { SymbolIndex::Data & data = *reinterpret_cast(data_ptr); - collectSymbolsFromProgramHeaders(info, data.symbols, data.resources); - collectSymbolsFromELF(info, data.symbols, data.objects, data.resources, data.build_id); + collectSymbolsFromProgramHeaders(info, data.symbols); + collectSymbolsFromELF(info, data.symbols, data.objects, data.build_id); /* Continue iterations */ return 0; diff --git a/src/Common/SymbolIndex.h b/src/Common/SymbolIndex.h index 4fd108434d5..8c7b8971805 100644 --- a/src/Common/SymbolIndex.h +++ b/src/Common/SymbolIndex.h @@ -8,6 +8,7 @@ #include #include + namespace DB { @@ -45,44 +46,15 @@ public: const std::vector & symbols() const { return data.symbols; } const std::vector & objects() const { return data.objects; } - std::string_view getResource(String name) const - { - if (auto it = data.resources.find(name); it != data.resources.end()) - return it->second.data(); - return {}; - } - /// The BuildID that is generated by compiler. String getBuildID() const { return data.build_id; } String getBuildIDHex() const; - struct ResourcesBlob - { - /// Symbol can be presented in multiple shared objects, - /// base_address will be used to compare only symbols from the same SO. - ElfW(Addr) base_address = 0; - /// Just a human name of the SO. - std::string_view object_name; - /// Data blob. - std::string_view start; - std::string_view end; - - std::string_view data() const - { - assert(end.data() >= start.data()); - return std::string_view{start.data(), static_cast(end.data() - start.data())}; - } - }; - using Resources = std::unordered_map; - struct Data { std::vector symbols; std::vector objects; String build_id; - - /// Resources (embedded binary data) are located by symbols in form of _binary_name_start and _binary_name_end. - Resources resources; }; private: Data data; diff --git a/src/Common/getResource.cpp b/src/Common/getResource.cpp deleted file mode 100644 index 72ba24c2f44..00000000000 --- a/src/Common/getResource.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include "getResource.h" -#include -#include -#include -#include - - -std::string_view getResource(std::string_view name) -{ - // Convert the resource file name into the form generated by `ld -r -b binary`. - std::string name_replaced(name); - std::replace(name_replaced.begin(), name_replaced.end(), '/', '_'); - std::replace(name_replaced.begin(), name_replaced.end(), '-', '_'); - std::replace(name_replaced.begin(), name_replaced.end(), '.', '_'); - boost::replace_all(name_replaced, "+", "_PLUS_"); - -#if defined USE_MUSL - /// If static linking is used, we cannot use dlsym and have to parse ELF symbol table by ourself. - return DB::SymbolIndex::instance().getResource(name_replaced); - -#else - // In most `dlsym(3)` APIs, one passes the symbol name as it appears via - // something like `nm` or `objdump -t`. For example, a symbol `_foo` would be - // looked up with the string `"_foo"`. - // - // Apple's linker is confusingly different. The NOTES on the man page for - // `dlsym(3)` claim that one looks up the symbol with "the name used in C - // source code". In this example, that would mean using the string `"foo"`. - // This apparently applies even in the case where the symbol did not originate - // from C source, such as the embedded binary resource files used here. So - // the symbol name must not have a leading `_` on Apple platforms. It's not - // clear how this applies to other symbols, such as those which _have_ a leading - // underscore in them by design, many leading underscores, etc. -#if defined OS_DARWIN - std::string prefix = "binary_"; -#else - std::string prefix = "_binary_"; -#endif - std::string symbol_name_start = prefix + name_replaced + "_start"; - std::string symbol_name_end = prefix + name_replaced + "_end"; - - const char * sym_start = reinterpret_cast(dlsym(RTLD_DEFAULT, symbol_name_start.c_str())); - const char * sym_end = reinterpret_cast(dlsym(RTLD_DEFAULT, symbol_name_end.c_str())); - - if (sym_start && sym_end) - { - auto resource_size = static_cast(std::distance(sym_start, sym_end)); - return { sym_start, resource_size }; - } - return {}; -#endif -} diff --git a/src/Common/getResource.h b/src/Common/getResource.h deleted file mode 100644 index 8975cc7841e..00000000000 --- a/src/Common/getResource.h +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -#include - -/// Get resource from binary if exists. Otherwise return empty string view. -/// Resources are data that is embedded into executable at link time. -std::string_view getResource(std::string_view name); diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp index 04f63403ec2..b09319c78d6 100644 --- a/src/Common/tests/gtest_DateLUTImpl.cpp +++ b/src/Common/tests/gtest_DateLUTImpl.cpp @@ -15,7 +15,8 @@ #endif // All timezones present at build time and embedded into ClickHouse binary. -extern const char * auto_time_zones[]; +struct TimeZone { const char * name; const unsigned char * data; size_t size; }; +extern TimeZone auto_time_zones[]; namespace { @@ -32,14 +33,14 @@ std::vector allTimezones(bool with_weird_offsets = true) { std::vector result; - const auto * timezone_name = auto_time_zones; - while (*timezone_name) + const TimeZone * timezone = auto_time_zones; + while (timezone->name) { - bool weird_offsets = (std::string_view(*timezone_name) == "Africa/Monrovia"); + bool weird_offsets = (std::string_view(timezone->name) == "Africa/Monrovia"); if (!weird_offsets || with_weird_offsets) - result.push_back(*timezone_name); - ++timezone_name; + result.push_back(timezone->name); + ++timezone; } return result; @@ -548,4 +549,3 @@ INSTANTIATE_TEST_SUITE_P(AllTimezones_Year1970, // {0, 0 + 11 * 3600 * 24 + 12, 11}, })) ); - diff --git a/src/Storages/System/StorageSystemTimeZones.cpp b/src/Storages/System/StorageSystemTimeZones.cpp index dc3711812a6..41227ab7780 100644 --- a/src/Storages/System/StorageSystemTimeZones.cpp +++ b/src/Storages/System/StorageSystemTimeZones.cpp @@ -4,7 +4,8 @@ #include -extern const char * auto_time_zones[]; +struct TimeZone { const char * name; const unsigned char * data; size_t size; }; +extern TimeZone auto_time_zones[]; namespace DB { @@ -17,7 +18,7 @@ NamesAndTypesList StorageSystemTimeZones::getNamesAndTypes() void StorageSystemTimeZones::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const { - for (auto * it = auto_time_zones; *it; ++it) - res_columns[0]->insert(String(*it)); + for (auto * it = auto_time_zones; it->name != nullptr; ++it) + res_columns[0]->insert(String(it->name)); } } From c8f8a23c71dc88ab53318be369ca17b528047b05 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 06:09:15 +0200 Subject: [PATCH 1898/1997] Fix errors --- contrib/cctz-cmake/CMakeLists.txt | 21 +++++++++++++------ src/Common/DateLUTImpl.cpp | 18 +++++++--------- src/Common/tests/gtest_DateLUTImpl.cpp | 13 ++++++------ src/Storages/System/CMakeLists.txt | 2 -- .../System/StorageSystemTimeZones.cpp | 7 +++---- 5 files changed, 31 insertions(+), 30 deletions(-) diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index 8aa3c7886db..7edeada6e59 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -48,17 +48,26 @@ foreach(TIMEZONE ${TIMEZONES}) MATH(EXPR COUNTER "${COUNTER}+1") endforeach(TIMEZONE) -file(APPEND ${TIMEZONES_FILE} "#include \n") -file(APPEND ${TIMEZONES_FILE} "struct TimeZone { const char * name; const unsigned char * data; size_t size; };\n") -file(APPEND ${TIMEZONES_FILE} "TimeZone auto_time_zones[] {\n" ) +file(APPEND ${TIMEZONES_FILE} "const char * auto_time_zones[] {\n" ) -set (COUNTER 1) foreach(TIMEZONE ${TIMEZONES}) - file(APPEND ${TIMEZONES_FILE} " {\"${TIMEZONE}\", gresource_timezone${COUNTER}Data, gresource_timezone${COUNTER}Size},\n") + file(APPEND ${TIMEZONES_FILE} " \"${TIMEZONE}\",\n") MATH(EXPR COUNTER "${COUNTER}+1") endforeach(TIMEZONE) -file(APPEND ${TIMEZONES_FILE} " {nullptr, nullptr, 0}};\n") +file(APPEND ${TIMEZONES_FILE} "};\n\n") + +file(APPEND ${TIMEZONES_FILE} "#include \n\n") +file(APPEND ${TIMEZONES_FILE} "std::string_view getTimeZone(const char * name)\n{\n" ) + +set (COUNTER 1) +foreach(TIMEZONE ${TIMEZONES}) + file(APPEND ${TIMEZONES_FILE} " if (std::string_view(\"${TIMEZONE}\") == name) return { reinterpret_cast(gresource_timezone${COUNTER}Data), gresource_timezone${COUNTER}Size };\n") + MATH(EXPR COUNTER "${COUNTER}+1") +endforeach(TIMEZONE) + +file(APPEND ${TIMEZONES_FILE} " return {};\n") +file(APPEND ${TIMEZONES_FILE} "}\n") add_library (tzdata ${TIMEZONES_FILE}) target_link_libraries(tzdata ch_contrib::incbin) diff --git a/src/Common/DateLUTImpl.cpp b/src/Common/DateLUTImpl.cpp index 3619462e79b..d5e04238ef9 100644 --- a/src/Common/DateLUTImpl.cpp +++ b/src/Common/DateLUTImpl.cpp @@ -10,11 +10,12 @@ #include #include #include +#include /// Embedded timezones. -struct TimeZone { const char * name; const unsigned char * data; size_t size; }; -extern TimeZone auto_time_zones[]; +std::string_view getTimeZone(const char * name); + namespace { @@ -252,15 +253,10 @@ namespace cctz_extension const std::string & name, const std::function(const std::string & name)> & fallback) { - const TimeZone * timezone = auto_time_zones; - while (timezone->name != nullptr) - { - if (timezone->name == name) - break; - ++timezone; - } - if (timezone->size) - return std::make_unique(reinterpret_cast(timezone->data), timezone->size); + std::string_view tz_file = getTimeZone(name.data()); + + if (!tz_file.empty()) + return std::make_unique(tz_file.data(), tz_file.size()); return fallback(name); } diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp index b09319c78d6..3d3a3f04941 100644 --- a/src/Common/tests/gtest_DateLUTImpl.cpp +++ b/src/Common/tests/gtest_DateLUTImpl.cpp @@ -15,8 +15,7 @@ #endif // All timezones present at build time and embedded into ClickHouse binary. -struct TimeZone { const char * name; const unsigned char * data; size_t size; }; -extern TimeZone auto_time_zones[]; +extern const char * auto_time_zones[]; namespace { @@ -33,14 +32,14 @@ std::vector allTimezones(bool with_weird_offsets = true) { std::vector result; - const TimeZone * timezone = auto_time_zones; - while (timezone->name) + const auto * timezone_name = auto_time_zones; + while (*timezone_name) { - bool weird_offsets = (std::string_view(timezone->name) == "Africa/Monrovia"); + bool weird_offsets = (std::string_view(*timezone_name) == "Africa/Monrovia"); if (!weird_offsets || with_weird_offsets) - result.push_back(timezone->name); - ++timezone; + result.push_back(*timezone_name); + ++timezone_name; } return result; diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index 6b7d1739e33..c3a2e726365 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -30,7 +30,6 @@ endif() add_dependencies(generate-source generate-contributors) set(GENERATED_LICENSES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemLicenses.generated.cpp") -set(GENERATED_TIMEZONES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemTimeZones.generated.cpp") add_custom_command( OUTPUT StorageSystemLicenses.generated.cpp @@ -38,7 +37,6 @@ add_custom_command( WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) list (APPEND storages_system_sources ${GENERATED_LICENSES_SRC}) -list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC}) # Overlength strings set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w) diff --git a/src/Storages/System/StorageSystemTimeZones.cpp b/src/Storages/System/StorageSystemTimeZones.cpp index 41227ab7780..dc3711812a6 100644 --- a/src/Storages/System/StorageSystemTimeZones.cpp +++ b/src/Storages/System/StorageSystemTimeZones.cpp @@ -4,8 +4,7 @@ #include -struct TimeZone { const char * name; const unsigned char * data; size_t size; }; -extern TimeZone auto_time_zones[]; +extern const char * auto_time_zones[]; namespace DB { @@ -18,7 +17,7 @@ NamesAndTypesList StorageSystemTimeZones::getNamesAndTypes() void StorageSystemTimeZones::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const { - for (auto * it = auto_time_zones; it->name != nullptr; ++it) - res_columns[0]->insert(String(it->name)); + for (auto * it = auto_time_zones; *it; ++it) + res_columns[0]->insert(String(*it)); } } From 8013cb1f784f6324b3c7b227499751dc7e666009 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 23 Jul 2023 08:46:44 +0200 Subject: [PATCH 1899/1997] Remove skip_startup_tables from IDatabase::loadStoredObjects() Signed-off-by: Azat Khuzhin --- src/Databases/DatabaseAtomic.cpp | 5 ++--- src/Databases/DatabaseAtomic.h | 2 +- src/Databases/DatabaseLazy.cpp | 3 +-- src/Databases/DatabaseLazy.h | 2 +- src/Databases/DatabaseOrdinary.cpp | 9 +-------- src/Databases/DatabaseOrdinary.h | 2 +- src/Databases/DatabaseReplicated.cpp | 5 ++--- src/Databases/DatabaseReplicated.h | 2 +- src/Databases/IDatabase.h | 3 +-- src/Databases/MySQL/DatabaseMySQL.cpp | 2 +- src/Databases/MySQL/DatabaseMySQL.h | 2 +- src/Databases/PostgreSQL/DatabasePostgreSQL.cpp | 2 +- src/Databases/PostgreSQL/DatabasePostgreSQL.h | 2 +- src/Databases/TablesLoader.cpp | 2 +- 14 files changed, 16 insertions(+), 27 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index 7e20b6f6535..0f65069db35 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -441,11 +441,10 @@ void DatabaseAtomic::beforeLoadingMetadata(ContextMutablePtr /*context*/, Loadin } } -void DatabaseAtomic::loadStoredObjects( - ContextMutablePtr local_context, LoadingStrictnessLevel mode, bool skip_startup_tables) +void DatabaseAtomic::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel mode) { beforeLoadingMetadata(local_context, mode); - DatabaseOrdinary::loadStoredObjects(local_context, mode, skip_startup_tables); + DatabaseOrdinary::loadStoredObjects(local_context, mode); } void DatabaseAtomic::startupTables(ThreadPool & thread_pool, LoadingStrictnessLevel mode) diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h index cb275812098..70553b2d5c2 100644 --- a/src/Databases/DatabaseAtomic.h +++ b/src/Databases/DatabaseAtomic.h @@ -48,7 +48,7 @@ public: DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; - void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode, bool skip_startup_tables) override; + void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode) override; void beforeLoadingMetadata(ContextMutablePtr context, LoadingStrictnessLevel mode) override; diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index f27c6c0c3ee..896ae99656f 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -37,8 +37,7 @@ DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, } -void DatabaseLazy::loadStoredObjects( - ContextMutablePtr local_context, LoadingStrictnessLevel /*mode*/, bool /* skip_startup_tables */) +void DatabaseLazy::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel /*mode*/) { iterateMetadataFiles(local_context, [this, &local_context](const String & file_name) { diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h index b01038073ef..2b1b119754d 100644 --- a/src/Databases/DatabaseLazy.h +++ b/src/Databases/DatabaseLazy.h @@ -26,7 +26,7 @@ public: bool canContainDistributedTables() const override { return false; } - void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel /*mode*/, bool skip_startup_tables) override; + void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel /*mode*/) override; void createTable( ContextPtr context, diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 8c92b8064ca..51d37b84e14 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -89,8 +89,7 @@ DatabaseOrdinary::DatabaseOrdinary( { } -void DatabaseOrdinary::loadStoredObjects( - ContextMutablePtr local_context, LoadingStrictnessLevel mode, bool skip_startup_tables) +void DatabaseOrdinary::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel mode) { /** Tables load faster if they are loaded in sorted (by name) order. * Otherwise (for the ext4 filesystem), `DirectoryIterator` iterates through them in some order, @@ -159,12 +158,6 @@ void DatabaseOrdinary::loadStoredObjects( } pool.wait(); - - if (!skip_startup_tables) - { - /// After all tables was basically initialized, startup them. - startupTables(pool, mode); - } } void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTablesMetadata & metadata, bool is_startup) diff --git a/src/Databases/DatabaseOrdinary.h b/src/Databases/DatabaseOrdinary.h index f9aa3214ef5..cabc8f9c55b 100644 --- a/src/Databases/DatabaseOrdinary.h +++ b/src/Databases/DatabaseOrdinary.h @@ -21,7 +21,7 @@ public: String getEngineName() const override { return "Ordinary"; } - void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode, bool skip_startup_tables) override; + void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode) override; bool supportsLoadingInTopologicalOrder() const override { return true; } diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 25c23e2be17..d3b3d4b545f 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -495,11 +495,10 @@ void DatabaseReplicated::beforeLoadingMetadata(ContextMutablePtr /*context*/, Lo tryConnectToZooKeeperAndInitDatabase(mode); } -void DatabaseReplicated::loadStoredObjects( - ContextMutablePtr local_context, LoadingStrictnessLevel mode, bool skip_startup_tables) +void DatabaseReplicated::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel mode) { beforeLoadingMetadata(local_context, mode); - DatabaseAtomic::loadStoredObjects(local_context, mode, skip_startup_tables); + DatabaseAtomic::loadStoredObjects(local_context, mode); } UInt64 DatabaseReplicated::getMetadataHash(const String & table_name) const diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index ff1a4aba41c..8e33f482ac1 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -67,7 +67,7 @@ public: void drop(ContextPtr /*context*/) override; - void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode, bool skip_startup_tables) override; + void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode) override; void beforeLoadingMetadata(ContextMutablePtr context, LoadingStrictnessLevel mode) override; diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index a9577dfc84a..9bed3c4bfc5 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -134,8 +134,7 @@ public: /// You can call only once, right after the object is created. virtual void loadStoredObjects( /// NOLINT ContextMutablePtr /*context*/, - LoadingStrictnessLevel /*mode*/, - bool /* skip_startup_tables */) + LoadingStrictnessLevel /*mode*/) { } diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 70bd32efed9..94e5ba1773e 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -402,7 +402,7 @@ String DatabaseMySQL::getMetadataPath() const return metadata_path; } -void DatabaseMySQL::loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/, bool /* skip_startup_tables */) +void DatabaseMySQL::loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/) { std::lock_guard lock{mutex}; diff --git a/src/Databases/MySQL/DatabaseMySQL.h b/src/Databases/MySQL/DatabaseMySQL.h index f34a2fff4f7..e5b1f434d2f 100644 --- a/src/Databases/MySQL/DatabaseMySQL.h +++ b/src/Databases/MySQL/DatabaseMySQL.h @@ -76,7 +76,7 @@ public: void createTable(ContextPtr, const String & table_name, const StoragePtr & storage, const ASTPtr & create_query) override; - void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/, bool skip_startup_tables) override; + void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/) override; StoragePtr detachTable(ContextPtr context, const String & table_name) override; diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index f4d750f85d4..812a0d8717e 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -296,7 +296,7 @@ void DatabasePostgreSQL::drop(ContextPtr /*context*/) } -void DatabasePostgreSQL::loadStoredObjects(ContextMutablePtr /* context */, LoadingStrictnessLevel /*mode*/, bool /* skip_startup_tables */) +void DatabasePostgreSQL::loadStoredObjects(ContextMutablePtr /* context */, LoadingStrictnessLevel /*mode*/) { { std::lock_guard lock{mutex}; diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.h b/src/Databases/PostgreSQL/DatabasePostgreSQL.h index 31fa036c0ee..d731e06649b 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.h @@ -44,7 +44,7 @@ public: bool empty() const override; - void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/, bool skip_startup_tables) override; + void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/) override; DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp index ea0f2072430..f8b4e7fe33b 100644 --- a/src/Databases/TablesLoader.cpp +++ b/src/Databases/TablesLoader.cpp @@ -49,7 +49,7 @@ void TablesLoader::loadTables() if (need_resolve_dependencies && database.second->supportsLoadingInTopologicalOrder()) databases_to_load.push_back(database.first); else - database.second->loadStoredObjects(global_context, strictness_mode, /* skip_startup_tables */ true); + database.second->loadStoredObjects(global_context, strictness_mode); } if (databases_to_load.empty()) From 282258a855cfed40e0b2cd7c0ada3ec1defe8e06 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Sun, 23 Jul 2023 11:29:29 +0200 Subject: [PATCH 1900/1997] fix style --- src/Common/OptimizedRegularExpression.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index e636b0b987d..05e6aefbb5e 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -439,7 +440,7 @@ catch (...) is_trivial = false; required_substring_is_prefix = false; alternatives.clear(); - std::cerr << "Analyze RegularExpression failed, got error: {}" << DB::getCurrentExceptionMessage(false) << "\n"; + LOG_ERROR(&Poco::Logger::get("OptimizeRegularExpression"), "Analyze RegularExpression failed, got error: {}", DB::getCurrentExceptionMessage(false)); } template From 4c1f8f38cd4073b24064e076a677082db546c680 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 17:30:23 +0200 Subject: [PATCH 1901/1997] Fix CI --- docker/test/fasttest/run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index e25b5fdbfed..60e6199aaa4 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -147,6 +147,7 @@ function clone_submodules contrib/simdjson contrib/liburing contrib/libfiu + contrib/incbin ) git submodule sync From 8902bbdb60b466498ab2825000502195d5d35c91 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 17:59:49 +0200 Subject: [PATCH 1902/1997] Fix fasttest --- src/Common/FrequencyHolder.cpp | 4 ++++ src/Common/FrequencyHolder.h | 6 ++++++ src/Functions/FunctionsCharsetClassification.cpp | 2 +- src/Functions/FunctionsLanguageClassification.cpp | 4 +--- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/Common/FrequencyHolder.cpp b/src/Common/FrequencyHolder.cpp index 3b755cacacb..fe03e6a1b44 100644 --- a/src/Common/FrequencyHolder.cpp +++ b/src/Common/FrequencyHolder.cpp @@ -1,5 +1,7 @@ #include +#if USE_NLP + #include /// Embedded SQL definitions @@ -179,3 +181,5 @@ void FrequencyHolder::loadProgrammingFrequency() } } + +#endif diff --git a/src/Common/FrequencyHolder.h b/src/Common/FrequencyHolder.h index 270e4dbbd2a..73675ed9814 100644 --- a/src/Common/FrequencyHolder.h +++ b/src/Common/FrequencyHolder.h @@ -1,5 +1,9 @@ #pragma once +#include "config.h" + +#if USE_NLP + #include #include @@ -81,3 +85,5 @@ private: EncodingContainer encodings_freq; }; } + +#endif diff --git a/src/Functions/FunctionsCharsetClassification.cpp b/src/Functions/FunctionsCharsetClassification.cpp index a25da8f6c13..237d4c37fa2 100644 --- a/src/Functions/FunctionsCharsetClassification.cpp +++ b/src/Functions/FunctionsCharsetClassification.cpp @@ -3,7 +3,7 @@ #include #include -#include + namespace DB { diff --git a/src/Functions/FunctionsLanguageClassification.cpp b/src/Functions/FunctionsLanguageClassification.cpp index 6088fd52efa..55485d41ce0 100644 --- a/src/Functions/FunctionsLanguageClassification.cpp +++ b/src/Functions/FunctionsLanguageClassification.cpp @@ -5,19 +5,17 @@ #include #include #include -#include #include #include #include -#include #include #include #include #include -#include #include + namespace DB { /* Determine language of Unicode UTF-8 text. From 43bd6d1b8336f282cc4548c0f61b52516f49ac13 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 19:00:49 +0300 Subject: [PATCH 1903/1997] Revert "Add an ability to specify allocations size for sampling memory profiler" --- programs/server/Server.cpp | 21 +++-------- src/Common/MemoryTracker.cpp | 10 +---- src/Common/MemoryTracker.h | 18 --------- src/Core/ServerSettings.h | 8 +--- src/Core/Settings.h | 4 +- src/Interpreters/ProcessList.cpp | 3 -- src/Interpreters/ThreadStatusExt.cpp | 2 - .../__init__.py | 1 - .../configs/max_untracked_memory.xml | 7 ---- .../configs/memory_profiler.xml | 5 --- .../test.py | 37 ------------------- ...r_sample_min_max_allocation_size.reference | 1 - ...profiler_sample_min_max_allocation_size.sh | 18 --------- 13 files changed, 11 insertions(+), 124 deletions(-) delete mode 100644 tests/integration/test_memory_profiler_min_max_borders/__init__.py delete mode 100644 tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml delete mode 100644 tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml delete mode 100644 tests/integration/test_memory_profiler_min_max_borders/test.py delete mode 100644 tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference delete mode 100755 tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 33fdcc9c1a8..9202d4b32c1 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1637,26 +1637,17 @@ try global_context->initializeTraceCollector(); /// Set up server-wide memory profiler (for total memory tracker). - if (server_settings.total_memory_profiler_step) + UInt64 total_memory_profiler_step = config().getUInt64("total_memory_profiler_step", 0); + if (total_memory_profiler_step) { - total_memory_tracker.setProfilerStep(server_settings.total_memory_profiler_step); + total_memory_tracker.setProfilerStep(total_memory_profiler_step); } - if (server_settings.total_memory_tracker_sample_probability > 0.0) + double total_memory_tracker_sample_probability = config().getDouble("total_memory_tracker_sample_probability", 0); + if (total_memory_tracker_sample_probability > 0.0) { - total_memory_tracker.setSampleProbability(server_settings.total_memory_tracker_sample_probability); + total_memory_tracker.setSampleProbability(total_memory_tracker_sample_probability); } - - if (server_settings.total_memory_profiler_sample_min_allocation_size) - { - total_memory_tracker.setSampleMinAllocationSize(server_settings.total_memory_profiler_sample_min_allocation_size); - } - - if (server_settings.total_memory_profiler_sample_max_allocation_size) - { - total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size); - } - } #endif diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 52cae0768dc..81cac2617c5 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -229,7 +229,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT } std::bernoulli_distribution sample(sample_probability); - if (unlikely(sample_probability > 0.0 && isSizeOkForSampling(size) && sample(thread_local_rng))) + if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) { MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = size}); @@ -413,7 +413,7 @@ void MemoryTracker::free(Int64 size) } std::bernoulli_distribution sample(sample_probability); - if (unlikely(sample_probability > 0.0 && isSizeOkForSampling(size) && sample(thread_local_rng))) + if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) { MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -size}); @@ -534,12 +534,6 @@ void MemoryTracker::setOrRaiseProfilerLimit(Int64 value) ; } -bool MemoryTracker::isSizeOkForSampling(UInt64 size) const -{ - /// We can avoid comparison min_allocation_size_bytes with zero, because we cannot have 0 bytes allocation/deallocation - return ((max_allocation_size_bytes == 0 || size <= max_allocation_size_bytes) && size >= min_allocation_size_bytes); -} - bool canEnqueueBackgroundTask() { auto limit = background_memory_tracker.getSoftLimit(); diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index 768dc8a7404..4e29d40c953 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -67,12 +67,6 @@ private: /// To randomly sample allocations and deallocations in trace_log. double sample_probability = 0; - /// Randomly sample allocations only larger or equal to this size - UInt64 min_allocation_size_bytes = 0; - - /// Randomly sample allocations only smaller or equal to this size - UInt64 max_allocation_size_bytes = 0; - /// Singly-linked list. All information will be passed to subsequent memory trackers also (it allows to implement trackers hierarchy). /// In terms of tree nodes it is the list of parents. Lifetime of these trackers should "include" lifetime of current tracker. std::atomic parent {}; @@ -94,8 +88,6 @@ private: void setOrRaiseProfilerLimit(Int64 value); - bool isSizeOkForSampling(UInt64 size) const; - /// allocImpl(...) and free(...) should not be used directly friend struct CurrentMemoryTracker; void allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr); @@ -173,16 +165,6 @@ public: sample_probability = value; } - void setSampleMinAllocationSize(UInt64 value) - { - min_allocation_size_bytes = value; - } - - void setSampleMaxAllocationSize(UInt64 value) - { - max_allocation_size_bytes = value; - } - void setProfilerStep(Int64 value) { profiler_step = value; diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index f7a6c9e950e..1a9f226041b 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -81,12 +81,8 @@ namespace DB M(UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \ M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \ M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \ - M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \ - \ - M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \ - M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ - M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ - M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) + M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) + DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4fc93500910..24be644ee55 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -427,9 +427,7 @@ class IColumn; M(UInt64, memory_overcommit_ratio_denominator_for_user, 1_GiB, "It represents soft memory limit on the global level. This value is used to compute query overcommit ratio.", 0) \ M(UInt64, max_untracked_memory, (4 * 1024 * 1024), "Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when amount (in absolute value) becomes larger than specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'.", 0) \ M(UInt64, memory_profiler_step, (4 * 1024 * 1024), "Whenever query memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down query processing.", 0) \ - M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ - M(UInt64, memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ - M(UInt64, memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ + M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation. Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ M(Bool, trace_profile_events, false, "Send to system.trace_log profile event and value of increment on each increment with 'ProfileEvent' trace_type", 0) \ \ M(UInt64, memory_usage_overcommit_max_wait_microseconds, 5'000'000, "Maximum time thread will wait for memory to be freed in the case of memory overcommit. If timeout is reached and memory is not freed, exception is thrown.", 0) \ diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index c299572ef41..1503e396298 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -223,10 +223,7 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q { /// Set up memory profiling thread_group->memory_tracker.setProfilerStep(settings.memory_profiler_step); - thread_group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); - thread_group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size); - thread_group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size); thread_group->performance_counters.setTraceProfileEvents(settings.trace_profile_events); } diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index bac16c05533..398bea26b87 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -83,8 +83,6 @@ ThreadGroupPtr ThreadGroup::createForBackgroundProcess(ContextPtr storage_contex const Settings & settings = storage_context->getSettingsRef(); group->memory_tracker.setProfilerStep(settings.memory_profiler_step); group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); - group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size); - group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size); group->memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator); group->memory_tracker.setParent(&background_memory_tracker); if (settings.memory_tracker_fault_probability > 0.0) diff --git a/tests/integration/test_memory_profiler_min_max_borders/__init__.py b/tests/integration/test_memory_profiler_min_max_borders/__init__.py deleted file mode 100644 index e5a0d9b4834..00000000000 --- a/tests/integration/test_memory_profiler_min_max_borders/__init__.py +++ /dev/null @@ -1 +0,0 @@ -#!/usr/bin/env python3 diff --git a/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml deleted file mode 100644 index 56fc5ed34ca..00000000000 --- a/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - 1 - - - diff --git a/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml deleted file mode 100644 index 5b3e17d145f..00000000000 --- a/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml +++ /dev/null @@ -1,5 +0,0 @@ - - 1 - 4096 - 8192 - diff --git a/tests/integration/test_memory_profiler_min_max_borders/test.py b/tests/integration/test_memory_profiler_min_max_borders/test.py deleted file mode 100644 index 6ab971fa9c4..00000000000 --- a/tests/integration/test_memory_profiler_min_max_borders/test.py +++ /dev/null @@ -1,37 +0,0 @@ -from helpers.cluster import ClickHouseCluster -import pytest - -cluster = ClickHouseCluster(__file__) -node = cluster.add_instance( - "node", - main_configs=["configs/memory_profiler.xml"], - user_configs=["configs/max_untracked_memory.xml"], -) - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - yield cluster - - finally: - cluster.shutdown() - - -def test_trace_boundaries_work(started_cluster): - node.query("select randomPrintableASCII(number) from numbers(1000) FORMAT Null") - node.query("SYSTEM FLUSH LOGS") - - assert ( - node.query( - "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where trace_type = 'MemorySample'" - ) - == "1\n" - ) - assert ( - node.query( - "SELECT count() FROM system.trace_log where trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)" - ) - == "0\n" - ) diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference deleted file mode 100644 index d00491fd7e5..00000000000 --- a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference +++ /dev/null @@ -1 +0,0 @@ -1 diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh deleted file mode 100755 index b1fbea26da7..00000000000 --- a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-cpu-aarch64, no-random-settings -# requires TraceCollector, does not available under sanitizers and aarch64 - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -query_id="${CLICKHOUSE_DATABASE}_min_max_allocation_size_$RANDOM$RANDOM" -${CLICKHOUSE_CLIENT} --query_id="$query_id" --memory_profiler_sample_min_allocation_size=4096 --memory_profiler_sample_max_allocation_size=8192 --log_queries=1 --max_threads=1 --max_untracked_memory=0 --memory_profiler_sample_probability=1 --query "select randomPrintableASCII(number) from numbers(1000) FORMAT Null" - -${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" - -# at least something allocated -${CLICKHOUSE_CLIENT} --query "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample'" - -# show wrong allocations -${CLICKHOUSE_CLIENT} --query "SELECT abs(size) FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)" From e56e1ebd5d8fbb808867c1f98e421383acf38b1f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 18:29:07 +0200 Subject: [PATCH 1904/1997] Fix fasttest --- src/Functions/FunctionsCharsetClassification.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Functions/FunctionsCharsetClassification.cpp b/src/Functions/FunctionsCharsetClassification.cpp index 237d4c37fa2..7704e3eafc0 100644 --- a/src/Functions/FunctionsCharsetClassification.cpp +++ b/src/Functions/FunctionsCharsetClassification.cpp @@ -1,4 +1,9 @@ #include + +#include "config.h" + +#if USE_NLP + #include #include @@ -150,3 +155,5 @@ REGISTER_FUNCTION(DetectCharset) } } + +#endif From 039cac69cf6d30cc58c8531b1efac4d9847cb599 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Sun, 23 Jul 2023 18:35:37 +0200 Subject: [PATCH 1905/1997] Fix test_insert_same_partition_and_merge by increasing wait time --- tests/integration/test_merge_tree_azure_blob_storage/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py index 761b5257a34..86b70f8db70 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/test.py +++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py @@ -215,7 +215,7 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical): if attempt == 59: assert parts_count == "(1)" - time.sleep(1) + time.sleep(10) assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" assert ( From 311b3adf89b9d54c4b3bf40feb4179d967ed3d2e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 20:10:41 +0200 Subject: [PATCH 1906/1997] Fix fasttest --- src/Functions/FunctionsCharsetClassification.cpp | 2 -- src/Functions/FunctionsProgrammingClassification.cpp | 5 +++++ src/Functions/FunctionsTonalityClassification.cpp | 5 +++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsCharsetClassification.cpp b/src/Functions/FunctionsCharsetClassification.cpp index 7704e3eafc0..05b173e3d95 100644 --- a/src/Functions/FunctionsCharsetClassification.cpp +++ b/src/Functions/FunctionsCharsetClassification.cpp @@ -1,7 +1,5 @@ #include -#include "config.h" - #if USE_NLP #include diff --git a/src/Functions/FunctionsProgrammingClassification.cpp b/src/Functions/FunctionsProgrammingClassification.cpp index 8a552a30e65..a93e1d9a87d 100644 --- a/src/Functions/FunctionsProgrammingClassification.cpp +++ b/src/Functions/FunctionsProgrammingClassification.cpp @@ -1,4 +1,7 @@ #include + +#if USE_NLP + #include #include #include @@ -118,3 +121,5 @@ REGISTER_FUNCTION(DetectProgrammingLanguage) } } + +#endif diff --git a/src/Functions/FunctionsTonalityClassification.cpp b/src/Functions/FunctionsTonalityClassification.cpp index e39f9c63758..3de38d99c88 100644 --- a/src/Functions/FunctionsTonalityClassification.cpp +++ b/src/Functions/FunctionsTonalityClassification.cpp @@ -1,4 +1,7 @@ #include + +#if USE_NLP + #include #include #include @@ -87,3 +90,5 @@ REGISTER_FUNCTION(DetectTonality) } } + +#endif From 49f4ef6ffb9264d8b4a31c8e4ab683f01afd4268 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 20:11:24 +0200 Subject: [PATCH 1907/1997] Fix typo --- src/Functions/FunctionsCharsetClassification.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionsCharsetClassification.cpp b/src/Functions/FunctionsCharsetClassification.cpp index 05b173e3d95..0a332ab70a9 100644 --- a/src/Functions/FunctionsCharsetClassification.cpp +++ b/src/Functions/FunctionsCharsetClassification.cpp @@ -49,7 +49,7 @@ namespace return res; } - /// Сount how many times each bigram occurs in the text. + /// Count how many times each bigram occurs in the text. template ALWAYS_INLINE inline void calculateStats( const UInt8 * data, From e21a4c4c9a3f50436b8e708b6a38cdf8eee3c6be Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 20:57:16 +0200 Subject: [PATCH 1908/1997] Fix the test --- .../02415_all_new_functions_must_be_documented.reference | 4 ---- .../02415_all_new_functions_must_be_documented.sql | 4 +++- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 595ebb483d5..b7097ad329b 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -238,10 +238,6 @@ defaultValueOfArgumentType defaultValueOfTypeName degrees demangle -detectCharset -detectLanguageUnknown -detectProgrammingLanguage -detectTonality divide dotProduct dumpColumnStructure diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql index ed95c06d016..4f40da6c626 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql @@ -15,5 +15,7 @@ AND name NOT IN ( 'h3ToGeoBoundary', 'h3ToParent', 'h3ToString', 'h3UnidirectionalEdgeIsValid', 'h3kRing', 'stringToH3', 'geoToS2', 's2CapContains', 's2CapUnion', 's2CellsIntersect', 's2GetNeighbors', 's2RectAdd', 's2RectContains', 's2RectIntersection', 's2RectUnion', 's2ToGeo', 'normalizeUTF8NFC', 'normalizeUTF8NFD', 'normalizeUTF8NFKC', 'normalizeUTF8NFKD', - 'lemmatize', 'tokenize', 'stem', 'synonyms' -- these functions are not enabled in fast test + 'lemmatize', 'tokenize', 'stem', 'synonyms', + 'detectCharset', 'detectLanguageUnknown', 'detectProgrammingLanguage', 'detectTonality' + -- these functions are not enabled in fast test ) ORDER BY name; From 67f643f27e5930765d0b6881c415ffacf369c14f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 21:00:28 +0200 Subject: [PATCH 1909/1997] Fix error --- contrib/cctz-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index 7edeada6e59..fde31dd469d 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -55,7 +55,7 @@ foreach(TIMEZONE ${TIMEZONES}) MATH(EXPR COUNTER "${COUNTER}+1") endforeach(TIMEZONE) -file(APPEND ${TIMEZONES_FILE} "};\n\n") +file(APPEND ${TIMEZONES_FILE} " nullptr\n};\n\n") file(APPEND ${TIMEZONES_FILE} "#include \n\n") file(APPEND ${TIMEZONES_FILE} "std::string_view getTimeZone(const char * name)\n{\n" ) From e02948580b31c61e32860da04f966a21231e14c7 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 23 Jul 2023 22:38:59 +0200 Subject: [PATCH 1910/1997] Don't shutdown interserver before tables --- programs/server/Server.cpp | 91 +++++++++++++++++++++++++++++--------- programs/server/Server.h | 11 ++++- 2 files changed, 79 insertions(+), 23 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index c7a7ba71e83..8c6e41d28c6 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -739,11 +739,12 @@ try [&]() -> std::vector { std::vector metrics; - metrics.reserve(servers_to_start_before_tables.size()); + + std::lock_guard lock(servers_lock); + metrics.reserve(servers_to_start_before_tables.size() + servers.size()); for (const auto & server : servers_to_start_before_tables) metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); - std::lock_guard lock(servers_lock); for (const auto & server : servers) metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); return metrics; @@ -1302,7 +1303,7 @@ try global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config); std::lock_guard lock(servers_lock); - updateServers(*config, server_pool, async_metrics, servers); + updateServers(*config, server_pool, async_metrics, servers, servers_to_start_before_tables); } global_context->updateStorageConfiguration(*config); @@ -1404,10 +1405,27 @@ try } - for (auto & server : servers_to_start_before_tables) { - server.start(); - LOG_INFO(log, "Listening for {}", server.getDescription()); + std::lock_guard lock(servers_lock); + /// We should start interserver communications before (and more imporant shutdown after) tables. + /// Because server can wait for a long-running queries (for example in tcp_handler) after interserver handler was already shut down. + /// In this case we will have replicated tables which are unable to send any parts to other replicas, but still can + /// communicate with zookeeper, execute merges, etc. + createInterserverServers( + config(), + interserver_listen_hosts, + listen_try, + server_pool, + async_metrics, + servers_to_start_before_tables, + /* start_servers= */ false); + + + for (auto & server : servers_to_start_before_tables) + { + server.start(); + LOG_INFO(log, "Listening for {}", server.getDescription()); + } } /// Initialize access storages. @@ -1527,10 +1545,13 @@ try { LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish."); size_t current_connections = 0; - for (auto & server : servers_to_start_before_tables) { - server.stop(); - current_connections += server.currentConnections(); + std::lock_guard lock(servers_lock); + for (auto & server : servers_to_start_before_tables) + { + server.stop(); + current_connections += server.currentConnections(); + } } if (current_connections) @@ -1709,7 +1730,7 @@ try { std::lock_guard lock(servers_lock); - createServers(config(), listen_hosts, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers); + createServers(config(), listen_hosts, listen_try, server_pool, async_metrics, servers); if (servers.empty()) throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "No servers started (add valid listen_host and 'tcp_port' or 'http_port' " @@ -1967,7 +1988,6 @@ HTTPContextPtr Server::httpContext() const void Server::createServers( Poco::Util::AbstractConfiguration & config, const Strings & listen_hosts, - const Strings & interserver_listen_hosts, bool listen_try, Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, @@ -2189,6 +2209,23 @@ void Server::createServers( httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); }); } +} + +void Server::createInterserverServers( + Poco::Util::AbstractConfiguration & config, + const Strings & interserver_listen_hosts, + bool listen_try, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers, + bool start_servers) +{ + const Settings & settings = global_context->getSettingsRef(); + + Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0); + Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; + http_params->setTimeout(settings.http_receive_timeout); + http_params->setKeepAliveTimeout(keep_alive_timeout); /// Now iterate over interserver_listen_hosts for (const auto & interserver_listen_host : interserver_listen_hosts) @@ -2237,14 +2274,14 @@ void Server::createServers( #endif }); } - } void Server::updateServers( Poco::Util::AbstractConfiguration & config, Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, - std::vector & servers) + std::vector & servers, + std::vector & servers_to_start_before_tables) { Poco::Logger * log = &logger(); @@ -2270,11 +2307,19 @@ void Server::updateServers( Poco::Util::AbstractConfiguration & previous_config = latest_config ? *latest_config : this->config(); + std::vector all_servers; + all_servers.reserve(servers.size() + servers_to_start_before_tables.size()); for (auto & server : servers) + all_servers.push_back(&server); + + for (auto & server : servers_to_start_before_tables) + all_servers.push_back(&server); + + for (auto * server : all_servers) { - if (!server.isStopping()) + if (!server->isStopping()) { - std::string port_name = server.getPortName(); + std::string port_name = server->getPortName(); bool has_host = false; bool is_http = false; if (port_name.starts_with("protocols.")) @@ -2312,27 +2357,29 @@ void Server::updateServers( /// NOTE: better to compare using getPortName() over using /// dynamic_cast<> since HTTPServer is also used for prometheus and /// internal replication communications. - is_http = server.getPortName() == "http_port" || server.getPortName() == "https_port"; + is_http = server->getPortName() == "http_port" || server->getPortName() == "https_port"; } if (!has_host) - has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server.getListenHost()) != listen_hosts.end(); + has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server->getListenHost()) != listen_hosts.end(); bool has_port = !config.getString(port_name, "").empty(); bool force_restart = is_http && !isSameConfiguration(previous_config, config, "http_handlers"); if (force_restart) - LOG_TRACE(log, " had been changed, will reload {}", server.getDescription()); + LOG_TRACE(log, " had been changed, will reload {}", server->getDescription()); - if (!has_host || !has_port || config.getInt(server.getPortName()) != server.portNumber() || force_restart) + if (!has_host || !has_port || config.getInt(server->getPortName()) != server->portNumber() || force_restart) { - server.stop(); - LOG_INFO(log, "Stopped listening for {}", server.getDescription()); + server->stop(); + LOG_INFO(log, "Stopped listening for {}", server->getDescription()); } } } - createServers(config, listen_hosts, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true); + createServers(config, listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true); + createInterserverServers(config, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers_to_start_before_tables, /* start_servers= */ true); std::erase_if(servers, std::bind_front(check_server, "")); + std::erase_if(servers_to_start_before_tables, std::bind_front(check_server, "")); } } diff --git a/programs/server/Server.h b/programs/server/Server.h index e9ae6d8d937..d13378dcd65 100644 --- a/programs/server/Server.h +++ b/programs/server/Server.h @@ -102,6 +102,14 @@ private: void createServers( Poco::Util::AbstractConfiguration & config, const Strings & listen_hosts, + bool listen_try, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers, + bool start_servers = false); + + void createInterserverServers( + Poco::Util::AbstractConfiguration & config, const Strings & interserver_listen_hosts, bool listen_try, Poco::ThreadPool & server_pool, @@ -113,7 +121,8 @@ private: Poco::Util::AbstractConfiguration & config, Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, - std::vector & servers); + std::vector & servers, + std::vector & servers_to_start_before_tables); }; } From c0f16dcf031b62e2eebdef249c132e9351203bc0 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sun, 23 Jul 2023 21:10:12 +0000 Subject: [PATCH 1911/1997] Test from fuzzer --- .../02831_ast_fuzz_asan_join.reference | 0 .../0_stateless/02831_ast_fuzz_asan_join.sql | 22 +++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 tests/queries/0_stateless/02831_ast_fuzz_asan_join.reference create mode 100644 tests/queries/0_stateless/02831_ast_fuzz_asan_join.sql diff --git a/tests/queries/0_stateless/02831_ast_fuzz_asan_join.reference b/tests/queries/0_stateless/02831_ast_fuzz_asan_join.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02831_ast_fuzz_asan_join.sql b/tests/queries/0_stateless/02831_ast_fuzz_asan_join.sql new file mode 100644 index 00000000000..7c7bfd2df88 --- /dev/null +++ b/tests/queries/0_stateless/02831_ast_fuzz_asan_join.sql @@ -0,0 +1,22 @@ +SELECT + '0', + toTypeName(materialize(js2.s)) +FROM +( + SELECT number AS k + FROM numbers(100) +) AS js1 +FULL OUTER JOIN +( + SELECT + toLowCardinality(2147483647 + 256) AS k, + '-0.0000000001', + 1024, + toString(number + 10) AS s + FROM numbers(1024) +) AS js2 ON js1.k = js2.k +ORDER BY + inf DESC NULLS FIRST, + js1.k ASC NULLS LAST, + js2.k ASC +FORMAT `Null` From 1e467867e68c2c382f26291753bab45e2bc87a60 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 00:03:40 +0200 Subject: [PATCH 1912/1997] Attempt to fix LTO --- contrib/cctz-cmake/CMakeLists.txt | 3 +-- programs/install/Install.cpp | 6 ++++-- programs/server/Server.cpp | 2 +- src/Common/FrequencyHolder.cpp | 6 +++--- src/Common/config.h.in | 4 ++++ src/Server/WebUIRequestHandler.cpp | 8 +++++--- src/Storages/System/attachInformationSchemaTables.cpp | 10 ++++++---- src/configure_config.cmake | 2 ++ 8 files changed, 26 insertions(+), 15 deletions(-) diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index fde31dd469d..7161f743de1 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -44,7 +44,7 @@ file(APPEND ${TIMEZONES_FILE} "#include \n") set (COUNTER 1) foreach(TIMEZONE ${TIMEZONES}) - file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TIMEZONE}\");\n") + file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TZDIR}/${TIMEZONE}\");\n") MATH(EXPR COUNTER "${COUNTER}+1") endforeach(TIMEZONE) @@ -71,7 +71,6 @@ file(APPEND ${TIMEZONES_FILE} "}\n") add_library (tzdata ${TIMEZONES_FILE}) target_link_libraries(tzdata ch_contrib::incbin) -target_include_directories(tzdata PRIVATE ${TZDIR}) target_link_libraries(_cctz tzdata) add_library(ch_contrib::cctz ALIAS _cctz) diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index da2c95af62c..d7086c95beb 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -34,9 +34,11 @@ #include +#include "config.h" + /// Embedded configuration files used inside the install program -INCBIN(resource_config_xml, "config.xml"); -INCBIN(resource_users_xml, "users.xml"); +INCBIN(resource_config_xml, SOURCE_DIR "/programs/server/config.xml"); +INCBIN(resource_users_xml, SOURCE_DIR "/programs/server/users.xml"); /** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary. diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 229a169dc1e..2ab89ad048a 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -130,7 +130,7 @@ #include /// A minimal file used when the server is run without installation -INCBIN(resource_embedded_xml, "embedded.xml"); +INCBIN(resource_embedded_xml, SOURCE_DIR "/programs/server/embedded.xml"); namespace CurrentMetrics { diff --git a/src/Common/FrequencyHolder.cpp b/src/Common/FrequencyHolder.cpp index fe03e6a1b44..7dc1f622aeb 100644 --- a/src/Common/FrequencyHolder.cpp +++ b/src/Common/FrequencyHolder.cpp @@ -5,9 +5,9 @@ #include /// Embedded SQL definitions -INCBIN(resource_charset_zst, "charset.zst"); -INCBIN(resource_tonality_ru_zst, "tonality_ru.zst"); -INCBIN(resource_programming_zst, "programming.zst"); +INCBIN(resource_charset_zst, SOURCE_DIR "/contrib/nlp-data/charset.zst"); +INCBIN(resource_tonality_ru_zst, SOURCE_DIR "/contrib/nlp-data/tonality_ru.zst"); +INCBIN(resource_programming_zst, SOURCE_DIR "/contrib/nlp-data/programming.zst"); namespace DB diff --git a/src/Common/config.h.in b/src/Common/config.h.in index a2c18fc330f..628f0847d65 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -59,3 +59,7 @@ #cmakedefine01 USE_ULID #cmakedefine01 FIU_ENABLE #cmakedefine01 USE_BCRYPT + +/// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO. +/// That's why we use absolute paths. +#cmakedefine SOURCE_DIR "@SOURCE_DIR@" diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp index cb9e8935d8c..6fa1d65de42 100644 --- a/src/Server/WebUIRequestHandler.cpp +++ b/src/Server/WebUIRequestHandler.cpp @@ -11,10 +11,12 @@ #include +#include "config.h" + /// Embedded HTML pages -INCBIN(resource_play_html, "play.html"); -INCBIN(resource_dashboard_html, "dashboard.html"); -INCBIN(resource_uplot_js, "js/uplot.js"); +INCBIN(resource_play_html, SOURCE_DIR "/programs/server/play.html"); +INCBIN(resource_dashboard_html, SOURCE_DIR "/programs/server/dashboard.html"); +INCBIN(resource_uplot_js, SOURCE_DIR "/programs/server/js/uplot.js"); namespace DB diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index bfc5c8c64e2..12cef89b553 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -5,11 +5,13 @@ #include #include +#include "config.h" + /// Embedded SQL definitions -INCBIN(resource_schemata_sql, "schemata.sql"); -INCBIN(resource_tables_sql, "tables.sql"); -INCBIN(resource_views_sql, "views.sql"); -INCBIN(resource_columns_sql, "columns.sql"); +INCBIN(resource_schemata_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/schemata.sql"); +INCBIN(resource_tables_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/tables.sql"); +INCBIN(resource_views_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/views.sql"); +INCBIN(resource_columns_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/columns.sql"); namespace DB diff --git a/src/configure_config.cmake b/src/configure_config.cmake index ae6305705c2..5529e2f2f39 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -162,3 +162,5 @@ endif () if (TARGET ch_contrib::fiu) set(FIU_ENABLE 1) endif() + +set(SOURCE_DIR ${CMAKE_SOURCE_DIR}) From 7b4d0cf9d5b261eb68bd1db4021fcc350b907fc1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 00:51:20 +0200 Subject: [PATCH 1913/1997] Fix Darwin --- contrib/incbin-cmake/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/contrib/incbin-cmake/CMakeLists.txt b/contrib/incbin-cmake/CMakeLists.txt index e64ebc99c73..8f4dad7e0d9 100644 --- a/contrib/incbin-cmake/CMakeLists.txt +++ b/contrib/incbin-cmake/CMakeLists.txt @@ -2,3 +2,7 @@ set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin") add_library(_incbin INTERFACE) target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR}) add_library(ch_contrib::incbin ALIAS _incbin) + +# Warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. +# Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning." +target_compile_definitions(_inclin PUBLIC INCBIN_SILENCE_BITCODE_WARNING) From 641c086dbd771c14cc7db089e265ec508da9ccff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 00:53:11 +0200 Subject: [PATCH 1914/1997] Fix Darwin --- contrib/incbin-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/incbin-cmake/CMakeLists.txt b/contrib/incbin-cmake/CMakeLists.txt index 8f4dad7e0d9..5778cf83c22 100644 --- a/contrib/incbin-cmake/CMakeLists.txt +++ b/contrib/incbin-cmake/CMakeLists.txt @@ -5,4 +5,4 @@ add_library(ch_contrib::incbin ALIAS _incbin) # Warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. # Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning." -target_compile_definitions(_inclin PUBLIC INCBIN_SILENCE_BITCODE_WARNING) +target_compile_definitions(_incbin INTERFACE INCBIN_SILENCE_BITCODE_WARNING) From 40f5649811bb579b3cf8d634281f862675934773 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 01:02:11 +0200 Subject: [PATCH 1915/1997] Fix test --- .../no_allow_vertical_merges_from_compact_to_wide_parts.xml | 5 +++++ .../test_vertical_merges_from_compact_parts.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_backward_compatibility/configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml diff --git a/tests/integration/test_backward_compatibility/configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml b/tests/integration/test_backward_compatibility/configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml new file mode 100644 index 00000000000..c69be846c46 --- /dev/null +++ b/tests/integration/test_backward_compatibility/configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml @@ -0,0 +1,5 @@ + + + 0 + + diff --git a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py index 82ffcc20b60..481621cacfe 100644 --- a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py +++ b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py @@ -15,7 +15,7 @@ node_old = cluster.add_instance( ) node_new = cluster.add_instance( "node2", - main_configs=["configs/no_compress_marks.xml"], + main_configs=["configs/no_compress_marks.xml", "configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml"], with_zookeeper=True, stay_alive=True, allow_analyzer=False, From dba7a0dffc4927a88c04cb7b9ec93faeeba40b3c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 23 Jul 2023 23:18:19 +0000 Subject: [PATCH 1916/1997] Automatic style fix --- .../test_vertical_merges_from_compact_parts.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py index 481621cacfe..9c9d1a4d312 100644 --- a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py +++ b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py @@ -15,7 +15,10 @@ node_old = cluster.add_instance( ) node_new = cluster.add_instance( "node2", - main_configs=["configs/no_compress_marks.xml", "configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml"], + main_configs=[ + "configs/no_compress_marks.xml", + "configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml", + ], with_zookeeper=True, stay_alive=True, allow_analyzer=False, From d7cdfb47d3795a3a09c2a204789c95e9726dc2b6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 01:56:04 +0200 Subject: [PATCH 1917/1997] Fix merge --- src/IO/WriteHelpers.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 0494cdf22e7..76778543bd0 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -953,6 +953,11 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool { auto remainder = value % 10; value /= 10; + + if (remainder != 0 && last_nonzero_pos == 0) + last_nonzero_pos = pos; + + buf[pos] += static_cast(remainder); } writeChar('.', ostr); From 75efee9675f277fc3405ca5b256296aa406baca4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 05:34:00 +0200 Subject: [PATCH 1918/1997] Fix errors --- programs/install/CMakeLists.txt | 3 --- programs/server/CMakeLists.txt | 2 +- src/CMakeLists.txt | 3 +-- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/programs/install/CMakeLists.txt b/programs/install/CMakeLists.txt index f3f562bab7c..c3f4d96d631 100644 --- a/programs/install/CMakeLists.txt +++ b/programs/install/CMakeLists.txt @@ -10,6 +10,3 @@ set (CLICKHOUSE_INSTALL_LINK ) clickhouse_program_add_library(install) - -# For incbin -target_include_directories(clickhouse-install-lib PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../server") diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt index e008e65acf6..b8241afa1eb 100644 --- a/programs/server/CMakeLists.txt +++ b/programs/server/CMakeLists.txt @@ -29,4 +29,4 @@ endif() clickhouse_program_add(server) -target_include_directories(clickhouse-server-lib PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) +install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fda8bafde59..975bf9bb618 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -296,7 +296,7 @@ macro (dbms_target_include_directories) endforeach () endmacro () -dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src" "${ClickHouse_SOURCE_DIR}/programs/server") +dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") if (TARGET ch_contrib::llvm) @@ -561,7 +561,6 @@ if (ENABLE_NLP) dbms_target_link_libraries (PUBLIC ch_contrib::stemmer) dbms_target_link_libraries (PUBLIC ch_contrib::wnb) dbms_target_link_libraries (PUBLIC ch_contrib::lemmagen) - target_include_directories(clickhouse_common_io PUBLIC ${CMAKE_SOURCE_DIR}/contrib/nlp-data) endif() if (TARGET ch_contrib::ulid) From 169b9d5cc0c8dc54d31bc7229204b195f294c877 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 05:49:06 +0200 Subject: [PATCH 1919/1997] Fix tidy --- src/Functions/GregorianDate.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Functions/GregorianDate.cpp b/src/Functions/GregorianDate.cpp index da1172c8916..aaaeeb7339d 100644 --- a/src/Functions/GregorianDate.cpp +++ b/src/Functions/GregorianDate.cpp @@ -20,12 +20,12 @@ namespace ErrorCodes namespace { - static inline constexpr bool is_leap_year(int32_t year) + inline constexpr bool is_leap_year(int32_t year) { return (year % 4 == 0) && ((year % 400 == 0) || (year % 100 != 0)); } - static inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month) + inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month) { switch (month) { @@ -49,7 +49,7 @@ namespace /** Integer division truncated toward negative infinity. */ template - static inline constexpr I div(I x, J y) + inline constexpr I div(I x, J y) { const auto y_cast = static_cast(y); if (x > 0 && y_cast < 0) @@ -63,7 +63,7 @@ namespace /** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x. */ template - static inline constexpr I mod(I x, J y) + inline constexpr I mod(I x, J y) { const auto y_cast = static_cast(y); const auto r = x % y_cast; @@ -76,13 +76,13 @@ namespace /** Like std::min(), but the type of operands may differ. */ template - static inline constexpr I min(I x, J y) + inline constexpr I min(I x, J y) { const auto y_cast = static_cast(y); return x < y_cast ? x : y_cast; } - static inline char readDigit(ReadBuffer & in) + inline char readDigit(ReadBuffer & in) { char c; if (!in.read(c)) @@ -93,7 +93,7 @@ namespace return c - '0'; } - static inline bool tryReadDigit(ReadBuffer & in, char & c) + inline bool tryReadDigit(ReadBuffer & in, char & c) { if (in.read(c) && c >= '0' && c <= '9') { From d7f7f16fbcfa8063e295708b4feb3b0079ad05f0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Jun 2023 10:44:13 +0200 Subject: [PATCH 1920/1997] Introduce IStorage::supportsTrivialCountOptimization() Signed-off-by: Azat Khuzhin --- src/Interpreters/InterpreterSelectQuery.cpp | 3 +-- src/Planner/PlannerJoinTree.cpp | 3 +++ src/Storages/IStorage.h | 3 +++ src/Storages/MergeTree/MergeTreeData.h | 2 ++ src/Storages/StorageMaterializedMySQL.h | 2 ++ 5 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index d07a6521544..fc3ea3a13ca 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2274,8 +2274,7 @@ std::optional InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle && !settings.allow_experimental_query_deduplication && !settings.empty_result_for_aggregation_by_empty_set && storage - && storage->getName() != "MaterializedMySQL" - && !storage->hasLightweightDeletedMask() + && storage->supportsTrivialCountOptimization() && query_info.filter_asts.empty() && query_analyzer->hasAggregation() && (query_analyzer->aggregates().size() == 1) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 5d8f8ca8741..c118fccded4 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -182,6 +182,9 @@ bool applyTrivialCountIfPossible( return false; const auto & storage = table_node.getStorage(); + if (!storage->supportsTrivialCountOptimization()) + return false; + auto storage_id = storage->getStorageID(); auto row_policy_filter = query_context->getRowPolicyFilter(storage_id.getDatabaseName(), storage_id.getTableName(), diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 76641b656a2..701e02a85ac 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -254,6 +254,9 @@ public: /// because those are internally translated into 'ALTER UDPATE' mutations. virtual bool supportsDelete() const { return false; } + /// Return true if the trivial count query could be optimized without reading the data at all. + virtual bool supportsTrivialCountOptimization() const { return false; } + private: StorageID storage_id; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 41fc4657854..5e6b043c31c 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -434,6 +434,8 @@ public: bool areAsynchronousInsertsEnabled() const override { return getSettings()->async_insert; } + bool supportsTrivialCountOptimization() const override { return !hasLightweightDeletedMask(); } + NamesAndTypesList getVirtuals() const override; bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr, const StorageMetadataPtr & metadata_snapshot) const override; diff --git a/src/Storages/StorageMaterializedMySQL.h b/src/Storages/StorageMaterializedMySQL.h index 08fbb61960f..e6fcbc203e6 100644 --- a/src/Storages/StorageMaterializedMySQL.h +++ b/src/Storages/StorageMaterializedMySQL.h @@ -41,6 +41,8 @@ public: void drop() override { nested_storage->drop(); } + bool supportsTrivialCountOptimization() const override { return false; } + private: [[noreturn]] static void throwNotAllowed() { From a0070eda02736903b984518daf3d1c79bfe5fd94 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Jun 2023 10:48:21 +0200 Subject: [PATCH 1921/1997] Slightly optimize code in ClusterProxy::executeQuery() Signed-off-by: Azat Khuzhin --- src/Interpreters/ClusterProxy/executeQuery.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 3dea52faf46..5efba383e4b 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -176,11 +176,9 @@ void executeQuery( size_t shards = query_info.getCluster()->getShardCount(); for (const auto & shard_info : query_info.getCluster()->getShardsInfo()) { - ASTPtr query_ast_for_shard; - if (query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1) + ASTPtr query_ast_for_shard = query_ast->clone(); + if (sharding_key_expr && query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1) { - query_ast_for_shard = query_ast->clone(); - OptimizeShardingKeyRewriteInVisitor::Data visitor_data{ sharding_key_expr, sharding_key_expr->getSampleBlock().getByPosition(0).type, @@ -191,8 +189,6 @@ void executeQuery( OptimizeShardingKeyRewriteInVisitor visitor(visitor_data); visitor.visit(query_ast_for_shard); } - else - query_ast_for_shard = query_ast->clone(); if (shard_filter_generator) { From 67095d2150cafc91c0eebea4a17a8dc5f17b307c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Jun 2023 10:48:58 +0200 Subject: [PATCH 1922/1997] Fix comment for function argument in TableFunctionRemote Signed-off-by: Azat Khuzhin --- src/TableFunctions/TableFunctionRemote.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index 4143014a7b3..e6d72ddf17b 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -264,7 +264,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr secure, /* priority= */ Priority{1}, /* cluster_name= */ "", - /* password= */ "" + /* cluster_secret= */ "" }; cluster = std::make_shared(context->getSettingsRef(), names, params); } From b22247609036020e9bc4da64f1a297e49c29edfa Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Jun 2023 14:19:08 +0200 Subject: [PATCH 1923/1997] Add ability to pass table for connections checks per-shard to ReadFromRemote Signed-off-by: Azat Khuzhin --- src/Interpreters/ClusterProxy/SelectStreamFactory.cpp | 1 + src/Interpreters/ClusterProxy/SelectStreamFactory.h | 2 ++ src/Processors/QueryPlan/ReadFromRemote.cpp | 6 ++++-- src/Processors/QueryPlan/ReadFromRemote.h | 1 + 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 0cf3f360994..953e38d56cd 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -124,6 +124,7 @@ void SelectStreamFactory::createForShard( { remote_shards.emplace_back(Shard{ .query = query_ast, + .main_table = main_table, .header = header, .shard_info = shard_info, .lazy = lazy, diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h index 030c0b77dd5..1cc5a3b1a77 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h @@ -50,6 +50,8 @@ public: { /// Query and header may be changed depending on shard. ASTPtr query; + /// Used to check the table existence on remote node + StorageID main_table; Block header; Cluster::ShardInfo shard_info; diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 5cc13f45df4..7a99c363232 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -162,7 +162,9 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream if (my_table_func_ptr) try_results = my_shard.shard_info.pool->getManyForTableFunction(timeouts, ¤t_settings, PoolMode::GET_MANY); else - try_results = my_shard.shard_info.pool->getManyChecked(timeouts, ¤t_settings, PoolMode::GET_MANY, my_main_table.getQualifiedName()); + try_results = my_shard.shard_info.pool->getManyChecked( + timeouts, ¤t_settings, PoolMode::GET_MANY, + my_shard.main_table ? my_shard.main_table.getQualifiedName() : my_main_table.getQualifiedName()); } catch (const Exception & ex) { @@ -241,7 +243,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact remote_query_executor->setPoolMode(PoolMode::GET_MANY); if (!table_func_ptr) - remote_query_executor->setMainTable(main_table); + remote_query_executor->setMainTable(shard.main_table ? shard.main_table : main_table); pipes.emplace_back(createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending)); addConvertingActions(pipes.back(), output_stream->header); diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h index d4005d81f1b..ac869cd89f9 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.h +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -22,6 +22,7 @@ using ThrottlerPtr = std::shared_ptr; class ReadFromRemote final : public ISourceStep { public: + /// @param main_table_ if Shards contains main_table then this parameter will be ignored ReadFromRemote( ClusterProxy::SelectStreamFactory::Shards shards_, Block header_, From 83c0f03b98d6b3cbd10f9690256aed2fada47177 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Jun 2023 14:21:53 +0200 Subject: [PATCH 1924/1997] Change signature of the updateSettingsForCluster() to avoid cluster requirement Signed-off-by: Azat Khuzhin --- src/Interpreters/ClusterProxy/executeQuery.cpp | 11 ++++++++--- src/Interpreters/ClusterProxy/executeQuery.h | 8 ++++++-- src/Storages/getStructureOfRemoteTable.cpp | 4 ++-- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 5efba383e4b..2fed626ffb7 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -35,7 +35,12 @@ namespace ErrorCodes namespace ClusterProxy { -ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info, Poco::Logger * log) +ContextMutablePtr updateSettingsForCluster(bool interserver_mode, + ContextPtr context, + const Settings & settings, + const StorageID & main_table, + const SelectQueryInfo * query_info, + Poco::Logger * log) { Settings new_settings = settings; new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time); @@ -43,7 +48,7 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr c /// If "secret" (in remote_servers) is not in use, /// user on the shard is not the same as the user on the initiator, /// hence per-user limits should not be applied. - if (cluster.getSecret().empty()) + if (!interserver_mode) { /// Does not matter on remote servers, because queries are sent under different user. new_settings.max_concurrent_queries_for_user = 0; @@ -170,7 +175,7 @@ void executeQuery( std::vector plans; SelectStreamFactory::Shards remote_shards; - auto new_context = updateSettingsForCluster(*query_info.getCluster(), context, settings, main_table, &query_info, log); + auto new_context = updateSettingsForCluster(!query_info.getCluster()->getSecret().empty(), context, settings, main_table, &query_info, log); new_context->increaseDistributedDepth(); size_t shards = query_info.getCluster()->getShardCount(); diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index 41f6da55686..511914e99e4 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -34,8 +34,12 @@ class SelectStreamFactory; /// - optimize_skip_unused_shards_nesting /// /// @return new Context with adjusted settings -ContextMutablePtr updateSettingsForCluster( - const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info = nullptr, Poco::Logger * log = nullptr); +ContextMutablePtr updateSettingsForCluster(bool interserver_mode, + ContextPtr context, + const Settings & settings, + const StorageID & main_table, + const SelectQueryInfo * query_info = nullptr, + Poco::Logger * log = nullptr); using AdditionalShardFilterGenerator = std::function; /// Execute a distributed query, creating a query plan, from which the query pipeline can be built. diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index e5fc01be9f4..cbed05e30ed 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -58,7 +58,7 @@ ColumnsDescription getStructureOfRemoteTableInShard( } ColumnsDescription res; - auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), table_id); + auto new_context = ClusterProxy::updateSettingsForCluster(!cluster.getSecret().empty(), context, context->getSettingsRef(), table_id); /// Ignore limit for result number of rows (that could be set during handling CSE/CTE), /// since this is a service query and should not lead to query failure. @@ -177,7 +177,7 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables( const auto & shards_info = cluster.getShardsInfo(); auto query = "DESC TABLE " + remote_table_id.getFullTableName(); - auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), remote_table_id); + auto new_context = ClusterProxy::updateSettingsForCluster(!cluster.getSecret().empty(), context, context->getSettingsRef(), remote_table_id); new_context->setSetting("describe_extend_object_types", true); /// Expect only needed columns from the result of DESC TABLE. From 323128df6f3c779f3b2fe4a751fa98372a54fbbb Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Jun 2023 15:02:32 +0200 Subject: [PATCH 1925/1997] Remove non existing ctor of Cluster::Address Signed-off-by: Azat Khuzhin --- src/Interpreters/Cluster.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index de10a445d01..b90acd1d576 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -144,12 +144,6 @@ public: UInt32 shard_index_ = 0, UInt32 replica_index_ = 0); - Address( - const String & host_port_, - const ClusterConnectionParameters & params, - UInt32 shard_index_, - UInt32 replica_index_); - Address( const DatabaseReplicaInfo & info, const ClusterConnectionParameters & params, From 4a33e027c518f51d120c60b21ccd962264e1356a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 20 Jun 2023 17:31:45 +0200 Subject: [PATCH 1926/1997] Split StorageReplicatedMergeTree reading methods Signed-off-by: Azat Khuzhin --- src/Storages/StorageReplicatedMergeTree.cpp | 141 ++++++++++++-------- src/Storages/StorageReplicatedMergeTree.h | 32 ++++- 2 files changed, 119 insertions(+), 54 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 13c0fb3f7c2..4e053c4598c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4902,67 +4902,102 @@ void StorageReplicatedMergeTree::read( snapshot_data.alter_conversions = {}; }); - /** The `select_sequential_consistency` setting has two meanings: - * 1. To throw an exception if on a replica there are not all parts which have been written down on quorum of remaining replicas. - * 2. Do not read parts that have not yet been written to the quorum of the replicas. - * For this you have to synchronously go to ZooKeeper. - */ - if (local_context->getSettingsRef().select_sequential_consistency) - { - auto max_added_blocks = std::make_shared(getMaxAddedBlocks()); - if (auto plan = reader.read( - column_names, storage_snapshot, query_info, local_context, - max_block_size, num_streams, processed_stage, std::move(max_added_blocks), /*enable_parallel_reading*/false)) - query_plan = std::move(*plan); - return; - } + const auto & settings = local_context->getSettingsRef(); + + /// The `select_sequential_consistency` setting has two meanings: + /// 1. To throw an exception if on a replica there are not all parts which have been written down on quorum of remaining replicas. + /// 2. Do not read parts that have not yet been written to the quorum of the replicas. + /// For this you have to synchronously go to ZooKeeper. + if (settings.select_sequential_consistency) + return readLocalSequentialConsistencyImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); if (local_context->canUseParallelReplicasOnInitiator()) + return readParallelReplicasImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); + + readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); +} + +void StorageReplicatedMergeTree::readLocalSequentialConsistencyImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) +{ + auto max_added_blocks = std::make_shared(getMaxAddedBlocks()); + auto plan = reader.read(column_names, storage_snapshot, query_info, local_context, + max_block_size, num_streams, processed_stage, std::move(max_added_blocks), + /* enable_parallel_reading= */false); + if (plan) + query_plan = std::move(*plan); +} + +void StorageReplicatedMergeTree::readParallelReplicasImpl( + QueryPlan & query_plan, + const Names & /*column_names*/, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + const size_t /*max_block_size*/, + const size_t /*num_streams*/) +{ + auto table_id = getStorageID(); + + auto parallel_replicas_cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas); + + ASTPtr modified_query_ast; + Block header; + if (local_context->getSettingsRef().allow_experimental_analyzer) { - auto table_id = getStorageID(); + auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree); - ASTPtr modified_query_ast; - - Block header; - - if (local_context->getSettingsRef().allow_experimental_analyzer) - { - auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree); - - header = InterpreterSelectQueryAnalyzer::getSampleBlock( - modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze()); - modified_query_ast = queryNodeToSelectQuery(modified_query_tree); - } - else - { - modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query, - table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); - header - = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); - } - - auto cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas); - - ClusterProxy::SelectStreamFactory select_stream_factory = - ClusterProxy::SelectStreamFactory( - header, - {}, - storage_snapshot, - processed_stage); - - ClusterProxy::executeQueryWithParallelReplicas( - query_plan, getStorageID(), /*remove_table_function_ptr*/ nullptr, - select_stream_factory, modified_query_ast, - local_context, query_info, cluster); + header = InterpreterSelectQueryAnalyzer::getSampleBlock( + modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze()); + modified_query_ast = queryNodeToSelectQuery(modified_query_tree); } else { - if (auto plan = reader.read( - column_names, storage_snapshot, query_info, - local_context, max_block_size, num_streams, - processed_stage, nullptr, /*enable_parallel_reading*/local_context->canUseParallelReplicasOnFollower())) - query_plan = std::move(*plan); + modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query, + table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); + header + = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); } + + ClusterProxy::SelectStreamFactory select_stream_factory = ClusterProxy::SelectStreamFactory( + header, + {}, + storage_snapshot, + processed_stage); + + ClusterProxy::executeQueryWithParallelReplicas( + query_plan, getStorageID(), + /* table_func_ptr= */ nullptr, + select_stream_factory, modified_query_ast, + local_context, query_info, parallel_replicas_cluster); +} + +void StorageReplicatedMergeTree::readLocalImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + const size_t max_block_size, + const size_t num_streams) +{ + auto plan = reader.read( + column_names, storage_snapshot, query_info, + local_context, max_block_size, num_streams, + processed_stage, + /* max_block_numbers_to_read= */ nullptr, + /* enable_parallel_reading= */ local_context->canUseParallelReplicasOnFollower()); + if (plan) + query_plan = std::move(*plan); } template diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 1a1b3c3b10c..ded940bc1d2 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -130,7 +130,7 @@ public: const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, - ContextPtr context, + ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, size_t num_streams) override; @@ -513,6 +513,36 @@ private: static std::optional distributedWriteFromClusterStorage(const std::shared_ptr & src_storage_cluster, const ASTInsertQuery & query, ContextPtr context); + void readLocalImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams); + + void readLocalSequentialConsistencyImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams); + + void readParallelReplicasImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams); + template void foreachActiveParts(Func && func, bool select_sequential_consistency) const; From b22313ef2d721ec0f8687515de58f4e2ba785d1d Mon Sep 17 00:00:00 2001 From: flynn Date: Mon, 24 Jul 2023 03:54:34 +0000 Subject: [PATCH 1927/1997] Replace with three way comparison --- src/Common/IntervalTree.h | 34 ++-------------------------------- 1 file changed, 2 insertions(+), 32 deletions(-) diff --git a/src/Common/IntervalTree.h b/src/Common/IntervalTree.h index 2214a4e842d..ad079a312f2 100644 --- a/src/Common/IntervalTree.h +++ b/src/Common/IntervalTree.h @@ -27,39 +27,9 @@ struct Interval }; template -bool operator<(const Interval & lhs, const Interval & rhs) +auto operator<=>(const Interval & lhs, const Interval & rhs) { - return std::tie(lhs.left, lhs.right) < std::tie(rhs.left, rhs.right); -} - -template -bool operator<=(const Interval & lhs, const Interval & rhs) -{ - return std::tie(lhs.left, lhs.right) <= std::tie(rhs.left, rhs.right); -} - -template -bool operator==(const Interval & lhs, const Interval & rhs) -{ - return std::tie(lhs.left, lhs.right) == std::tie(rhs.left, rhs.right); -} - -template -bool operator!=(const Interval & lhs, const Interval & rhs) -{ - return std::tie(lhs.left, lhs.right) != std::tie(rhs.left, rhs.right); -} - -template -bool operator>(const Interval & lhs, const Interval & rhs) -{ - return std::tie(lhs.left, lhs.right) > std::tie(rhs.left, rhs.right); -} - -template -bool operator>=(const Interval & lhs, const Interval & rhs) -{ - return std::tie(lhs.left, lhs.right) >= std::tie(rhs.left, rhs.right); + return std::tie(lhs.left, lhs.right) <=> std::tie(rhs.left, rhs.right); } struct IntervalTreeVoidValue From ac54be9652414e10a1b79ec4f92439db5155310b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 05:56:18 +0200 Subject: [PATCH 1928/1997] Fix a test --- tests/integration/test_backward_compatibility/test_functions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index fa24b146fec..c86c3ba0ab2 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -143,6 +143,7 @@ def test_string_functions(start_cluster): "position", "substring", "CAST", + "getTypeSerializationStreams", # NOTE: no need to ignore now()/now64() since they will fail because they don't accept any argument # 22.8 Backward Incompatible Change: Extended range of Date32 "toDate32OrZero", From 2389e0f0b68d03ecbb117745ed00c54979715ea7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 17 Jul 2023 09:54:51 +0200 Subject: [PATCH 1929/1997] Randomize timezone in tests across non-deterministic around 1970 and default There was some cases when some patches to the datetime code leads to flaky tests, due to the tests itself had been runned using regular timezone (TZ). But if you will this tests with something "specific" (that is not strictly defined around 1970 year), those tests will fail. So to catch such issues in the PRs itself, let's randomize session_timezone as well. Signed-off-by: Azat Khuzhin --- docker/test/stateless/run.sh | 3 +++ tests/clickhouse-test | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index fe53925ecc8..3694fb7c2f6 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -4,6 +4,9 @@ set -e -x -a # Choose random timezone for this test run. +# +# NOTE: that clickhouse-test will randomize session_timezone by itself as well +# (it will choose between default server timezone and something specific). TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)" echo "Choosen random timezone $TZ" ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone diff --git a/tests/clickhouse-test b/tests/clickhouse-test index abd109d00b2..185e3003c95 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -529,6 +529,12 @@ def threshold_generator(always_on_prob, always_off_prob, min_val, max_val): return gen +# To keep dependency list as short as possible, tzdata is not used here (to +# avoid try/except block for import) +def get_localzone(): + return os.getenv("TZ", "/".join(os.readlink("/etc/localtime").split("/")[-2:])) + + class SettingsRandomizer: settings = { "max_insert_threads": lambda: 0 @@ -602,6 +608,19 @@ class SettingsRandomizer: "enable_memory_bound_merging_of_aggregation_results": lambda: random.randint( 0, 1 ), + "session_timezone": lambda: random.choice( + [ + # special non-deterministic around 1970 timezone, see [1]. + # + # [1]: https://github.com/ClickHouse/ClickHouse/issues/42653 + "America/Mazatlan", + "America/Hermosillo", + "Mexico/BajaSur", + # server default that is randomized across all timezones + # NOTE: due to lots of trickery we cannot use empty timezone here, but this should be the same. + get_localzone(), + ] + ), } @staticmethod From bc167dfde81c44bb93ee7dd0c634ff3428ea3c33 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 18 Jul 2023 06:20:05 +0200 Subject: [PATCH 1930/1997] clickhouse-test: add proper escaping for HTTP parameters The problem is that old versions of cURL (7.81.0 at least) handle additional parameters incorrectly if in previous parameter was "/": $ docker run --rm curlimages/curl:8.1.2 --http1.1 --get -vvv 'http://kernel.org/?bar=foo/baz' --data-urlencode "query=select 1 format Null"; echo > GET /?bar=foo/baz&query=select+1+format+Null HTTP/1.1 > User-Agent: curl/8.1.2 $ docker run --rm curlimages/curl:7.81.0 --http1.1 --get -vvv 'http://kernel.org/?bar=foo/baz' --data-urlencode "query=select 1 format Null"; echo > GET /?bar=foo/baz?query=select+1+format+Null HTTP/1.1 > User-Agent: curl/7.81.0-DEV Note, that I thought about making the same for cli, but it is not that easy, even after getting rid of sh -c and string contantenation, it still cannot be done for CLICKHOUSE_CLIENT_OPT. Signed-off-by: Azat Khuzhin --- tests/clickhouse-test | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 185e3003c95..c63e1e3ae52 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -625,16 +625,16 @@ class SettingsRandomizer: @staticmethod def get_random_settings(args): - random_settings = [] + random_settings = {} is_debug = BuildFlags.DEBUG in args.build_flags for setting, generator in SettingsRandomizer.settings.items(): if ( is_debug and setting == "allow_prefetched_read_pool_for_remote_filesystem" ): - random_settings.append(f"{setting}=0") + random_settings[setting] = 0 else: - random_settings.append(f"{setting}={generator()}") + random_settings[setting] = generator() return random_settings @@ -670,10 +670,10 @@ class MergeTreeSettingsRandomizer: @staticmethod def get_random_settings(args): - random_settings = [] + random_settings = {} for setting, generator in MergeTreeSettingsRandomizer.settings.items(): if setting not in args.changed_merge_tree_settings: - random_settings.append(f"{setting}={generator()}") + random_settings[setting] = generator() return random_settings @@ -785,7 +785,14 @@ class TestCase: @staticmethod def cli_format_settings(settings_list) -> str: - return " ".join([f"--{setting}" for setting in settings_list]) + out = [] + for k, v in settings_list.items(): + out.extend([f"--{k}", str(v)]) + return " ".join(out) + + @staticmethod + def http_format_settings(settings_list) -> str: + return urllib.parse.urlencode(settings_list) def has_show_create_table_in_test(self): return not subprocess.call(["grep", "-iq", "show create", self.case_file]) @@ -793,11 +800,12 @@ class TestCase: def add_random_settings(self, client_options): new_options = "" if self.randomize_settings: + http_params = self.http_format_settings(self.random_settings) if len(self.base_url_params) == 0: - os.environ["CLICKHOUSE_URL_PARAMS"] = "&".join(self.random_settings) + os.environ["CLICKHOUSE_URL_PARAMS"] = http_params else: os.environ["CLICKHOUSE_URL_PARAMS"] = ( - self.base_url_params + "&" + "&".join(self.random_settings) + self.base_url_params + "&" + http_params ) new_options += f" {self.cli_format_settings(self.random_settings)}" From 6ae4d291800c7d9b32622f1d520f1ab27b9f90b7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 19 Jul 2023 13:22:31 +0200 Subject: [PATCH 1931/1997] Fix tests after session_timezone randomization Signed-off-by: Azat Khuzhin --- .../0_stateless/00387_use_client_time_zone.sh | 3 ++- tests/queries/0_stateless/00427_alter_primary_key.sh | 11 ++++++----- tests/queries/0_stateless/00933_ttl_simple.sql | 12 ++++++++++++ ...42_system_reload_dictionary_reloads_completely.sh | 4 ++-- .../0_stateless/01070_modify_ttl_recalc_only.sql | 3 +++ .../0_stateless/02530_dictionaries_update_field.sh | 3 ++- 6 files changed, 27 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/00387_use_client_time_zone.sh b/tests/queries/0_stateless/00387_use_client_time_zone.sh index 2a6d81eebfe..e54d5244eef 100755 --- a/tests/queries/0_stateless/00387_use_client_time_zone.sh +++ b/tests/queries/0_stateless/00387_use_client_time_zone.sh @@ -5,4 +5,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -env TZ=UTC ${CLICKHOUSE_CLIENT} --use_client_time_zone=1 --query="SELECT toDateTime(1000000000)" +# NOTE: session_timezone overrides use_client_time_zone, disable it randomization +env TZ=UTC ${CLICKHOUSE_CLIENT} --session_timezone '' --use_client_time_zone=1 --query="SELECT toDateTime(1000000000)" diff --git a/tests/queries/0_stateless/00427_alter_primary_key.sh b/tests/queries/0_stateless/00427_alter_primary_key.sh index 1269e2ad6e3..f9984384d79 100755 --- a/tests/queries/0_stateless/00427_alter_primary_key.sh +++ b/tests/queries/0_stateless/00427_alter_primary_key.sh @@ -7,11 +7,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function perform() { local query=$1 - TZ=UTC $CLICKHOUSE_CLIENT \ - --allow_deprecated_syntax_for_merge_tree=1 \ - --use_client_time_zone=1 \ - --input_format_values_interpret_expressions=0 \ - --query "$query" 2>/dev/null + local settings=( + --allow_deprecated_syntax_for_merge_tree 1 + --session_timezone UTC + --input_format_values_interpret_expressions 0 + ) + TZ=UTC $CLICKHOUSE_CLIENT "${settings[@]}" --query "$query" 2>/dev/null if [ "$?" -ne 0 ]; then echo "query failed" fi diff --git a/tests/queries/0_stateless/00933_ttl_simple.sql b/tests/queries/0_stateless/00933_ttl_simple.sql index 2bf686822d5..ad40e7c7e47 100644 --- a/tests/queries/0_stateless/00933_ttl_simple.sql +++ b/tests/queries/0_stateless/00933_ttl_simple.sql @@ -1,3 +1,15 @@ +-- disable timezone randomization since otherwise TTL may fail at particular datetime, i.e.: +-- +-- SELECT +-- now(), +-- toDate(toTimeZone(now(), 'America/Mazatlan')), +-- today() +-- +-- ┌───────────────now()─┬─toDate(toTimeZone(now(), 'America/Mazatlan'))─┬────today()─┐ +-- │ 2023-07-24 06:24:06 │ 2023-07-23 │ 2023-07-24 │ +-- └─────────────────────┴───────────────────────────────────────────────┴────────────┘ +set session_timezone = ''; + drop table if exists ttl_00933_1; -- Column TTL works only with wide parts, because it's very expensive to apply it for compact parts diff --git a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh index f2b30e05040..9d34470c38d 100755 --- a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh +++ b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh @@ -7,8 +7,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -o pipefail -# Run the client. -$CLICKHOUSE_CLIENT --multiquery <<'EOF' +# NOTE: dictionaries TTLs works with server timezone, so session_timeout cannot be used +$CLICKHOUSE_CLIENT --session_timezone '' --multiquery <<'EOF' DROP DATABASE IF EXISTS dictdb_01042; CREATE DATABASE dictdb_01042; CREATE TABLE dictdb_01042.table(x Int64, y Int64, insert_time DateTime) ENGINE = MergeTree ORDER BY tuple(); diff --git a/tests/queries/0_stateless/01070_modify_ttl_recalc_only.sql b/tests/queries/0_stateless/01070_modify_ttl_recalc_only.sql index 247e412484f..7ac70d41871 100644 --- a/tests/queries/0_stateless/01070_modify_ttl_recalc_only.sql +++ b/tests/queries/0_stateless/01070_modify_ttl_recalc_only.sql @@ -2,6 +2,9 @@ set mutations_sync = 2; +-- system.parts has server default, timezone cannot be randomized +set session_timezone = ''; + drop table if exists ttl; create table ttl (d Date, a Int) engine = MergeTree order by a partition by toDayOfMonth(d) diff --git a/tests/queries/0_stateless/02530_dictionaries_update_field.sh b/tests/queries/0_stateless/02530_dictionaries_update_field.sh index 569466fe606..6ac10ea2308 100755 --- a/tests/queries/0_stateless/02530_dictionaries_update_field.sh +++ b/tests/queries/0_stateless/02530_dictionaries_update_field.sh @@ -5,7 +5,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q " +# NOTE: dictionaries will be updated according to server TZ, not session, so prohibit it's randomization +$CLICKHOUSE_CLIENT --session_timezone '' -q " CREATE TABLE table_for_update_field_dictionary ( key UInt64, From 810137e57a53467e9fea668769749c559af12bc1 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 24 Jul 2023 05:59:07 +0000 Subject: [PATCH 1932/1997] Add new peak_memory_usage to docs --- docs/en/interfaces/http.md | 18 +++++++++--------- docs/ru/interfaces/http.md | 16 ++++++++-------- docs/zh/interfaces/http.md | 18 +++++++++--------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 3a7f6d4d854..37821f0fee1 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -56,7 +56,7 @@ Connection: Close Content-Type: text/tab-separated-values; charset=UTF-8 X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f -X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} 1 ``` @@ -286,9 +286,9 @@ Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Example of the header sequence: ``` text -X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"} +X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"} +X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"} +X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"} ``` Possible header fields: @@ -416,7 +416,7 @@ $ curl -v 'http://localhost:8123/predefined_query' < X-ClickHouse-Format: Template < X-ClickHouse-Timezone: Asia/Shanghai < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < # HELP "Query" "Number of executing queries" # TYPE "Query" counter @@ -581,7 +581,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact Say Hi!% @@ -621,7 +621,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler' < Content-Type: text/plain; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact
% @@ -673,7 +673,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < Absolute Path File * Connection #0 to host localhost left intact @@ -692,7 +692,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < Relative Path File * Connection #0 to host localhost left intact diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index b8c5ee77f0c..981f1c7b5a2 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -50,7 +50,7 @@ Connection: Close Content-Type: text/tab-separated-values; charset=UTF-8 X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f -X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} 1 ``` @@ -266,9 +266,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812 Прогресс выполнения запроса можно отслеживать с помощью заголовков ответа `X-ClickHouse-Progress`. Для этого включите [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Пример последовательности заголовков: ``` text -X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"} +X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"} +X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"} +X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"} ``` Возможные поля заголовка: @@ -529,7 +529,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact Say Hi!% @@ -569,7 +569,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler' < Content-Type: text/plain; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact
% @@ -621,7 +621,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < Absolute Path File * Connection #0 to host localhost left intact @@ -640,7 +640,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < Relative Path File * Connection #0 to host localhost left intact diff --git a/docs/zh/interfaces/http.md b/docs/zh/interfaces/http.md index c7a0f355a92..f84768beccc 100644 --- a/docs/zh/interfaces/http.md +++ b/docs/zh/interfaces/http.md @@ -53,7 +53,7 @@ Connection: Close Content-Type: text/tab-separated-values; charset=UTF-8 X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f -X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} 1 ``` @@ -262,9 +262,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812 您可以在`X-ClickHouse-Progress`响应头中收到查询进度的信息。为此,启用[Http Header携带进度](../operations/settings/settings.md#settings-send_progress_in_http_headers)。示例: ``` text -X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"} +X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"} +X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"} +X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"} ``` 显示字段信息: @@ -363,7 +363,7 @@ $ curl -v 'http://localhost:8123/predefined_query' < X-ClickHouse-Format: Template < X-ClickHouse-Timezone: Asia/Shanghai < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < # HELP "Query" "Number of executing queries" # TYPE "Query" counter @@ -521,7 +521,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact Say Hi!% @@ -561,7 +561,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler' < Content-Type: text/plain; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact
% @@ -613,7 +613,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < Absolute Path File * Connection #0 to host localhost left intact @@ -632,7 +632,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < Relative Path File * Connection #0 to host localhost left intact From ceaaa78fdcfac2243bcf28624336217bd44898f0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 08:04:38 +0200 Subject: [PATCH 1933/1997] Fix transform --- src/Functions/transform.cpp | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index 1fc0e3adf96..a48d8d47489 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -156,15 +156,15 @@ namespace { initialize(arguments, result_type); - const auto * in = arguments.front().column.get(); - - if (isColumnConst(*in)) + if (isColumnConst(*arguments[0].column)) return executeConst(arguments, result_type, input_rows_count); ColumnPtr default_non_const; if (!cache.default_column && arguments.size() == 4) default_non_const = castColumn(arguments[3], result_type); + ColumnPtr in = cache.default_column ? arguments[0].column : castColumn(arguments[0], result_type); + auto column_result = result_type->createColumn(); if (cache.is_empty) { @@ -174,30 +174,30 @@ namespace } else if (cache.table_num_to_idx) { - if (!executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const)) + if (!executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const)) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName()); } } else if (cache.table_string_to_idx) { - if (!executeString(in, *column_result, default_non_const)) - executeContiguous(in, *column_result, default_non_const); + if (!executeString(in.get(), *column_result, default_non_const)) + executeContiguous(in.get(), *column_result, default_non_const); } else if (cache.table_anything_to_idx) { - executeAnything(in, *column_result, default_non_const); + executeAnything(in.get(), *column_result, default_non_const); } else throw Exception(ErrorCodes::LOGICAL_ERROR, "State of the function `transform` is not initialized"); @@ -810,7 +810,6 @@ namespace cache.initialized = true; } }; - } REGISTER_FUNCTION(Transform) From aaa0bf64fd888332bfa59c284508d4e7a84d372c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 08:05:55 +0200 Subject: [PATCH 1934/1997] Add a test --- .../02832_transform_fixed_string_no_default.reference | 1 + .../0_stateless/02832_transform_fixed_string_no_default.sql | 1 + 2 files changed, 2 insertions(+) create mode 100644 tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference create mode 100644 tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql diff --git a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference new file mode 100644 index 00000000000..9daeafb9864 --- /dev/null +++ b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference @@ -0,0 +1 @@ +test diff --git a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql new file mode 100644 index 00000000000..8d316d3413f --- /dev/null +++ b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql @@ -0,0 +1 @@ +SELECT transform(name, ['a', 'b'], ['', NULL]) AS name FROM (SELECT 'test'::Nullable(FixedString(4)) AS name); From 890a3754a6a093545122e42bcab066a27c72ed5e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 08:19:46 +0200 Subject: [PATCH 1935/1997] Fix error --- src/Functions/transform.cpp | 55 ++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index a48d8d47489..79168d82c54 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -156,14 +156,18 @@ namespace { initialize(arguments, result_type); - if (isColumnConst(*arguments[0].column)) + const auto * in = arguments[0].column.get(); + + if (isColumnConst(*in)) return executeConst(arguments, result_type, input_rows_count); ColumnPtr default_non_const; if (!cache.default_column && arguments.size() == 4) default_non_const = castColumn(arguments[3], result_type); - ColumnPtr in = cache.default_column ? arguments[0].column : castColumn(arguments[0], result_type); + ColumnPtr in_casted = arguments[0].column; + if (arguments.size() == 3) + in_casted = castColumn(arguments[0], result_type); auto column_result = result_type->createColumn(); if (cache.is_empty) @@ -174,30 +178,30 @@ namespace } else if (cache.table_num_to_idx) { - if (!executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const)) + if (!executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted)) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName()); } } else if (cache.table_string_to_idx) { - if (!executeString(in.get(), *column_result, default_non_const)) - executeContiguous(in.get(), *column_result, default_non_const); + if (!executeString(in, *column_result, default_non_const, *in_casted)) + executeContiguous(in, *column_result, default_non_const, *in_casted); } else if (cache.table_anything_to_idx) { - executeAnything(in.get(), *column_result, default_non_const); + executeAnything(in, *column_result, default_non_const, *in_casted); } else throw Exception(ErrorCodes::LOGICAL_ERROR, "State of the function `transform` is not initialized"); @@ -218,7 +222,7 @@ namespace return impl->execute(args, result_type, input_rows_count); } - void executeAnything(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const) const + void executeAnything(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const { const size_t size = in->size(); const auto & table = *cache.table_anything_to_idx; @@ -236,11 +240,11 @@ namespace else if (default_non_const) column_result.insertFrom(*default_non_const, i); else - column_result.insertFrom(*in, i); + column_result.insertFrom(in_casted, i); } } - void executeContiguous(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const) const + void executeContiguous(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const { const size_t size = in->size(); const auto & table = *cache.table_string_to_idx; @@ -255,12 +259,12 @@ namespace else if (default_non_const) column_result.insertFrom(*default_non_const, i); else - column_result.insertFrom(*in, i); + column_result.insertFrom(in_casted, i); } } template - bool executeNum(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const) const + bool executeNum(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const { const auto * const in = checkAndGetColumn(in_untyped); if (!in) @@ -297,7 +301,7 @@ namespace else if (default_non_const) column_result.insertFrom(*default_non_const, i); else - column_result.insertFrom(*in, i); + column_result.insertFrom(in_casted, i); } } return true; @@ -451,7 +455,7 @@ namespace } } - bool executeString(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const) const + bool executeString(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const { const auto * const in = checkAndGetColumn(in_untyped); if (!in) @@ -488,7 +492,7 @@ namespace else if (default_non_const) column_result.insertFrom(*default_non_const, 0); else - column_result.insertFrom(*in, i); + column_result.insertFrom(in_casted, i); } } return true; @@ -810,6 +814,7 @@ namespace cache.initialized = true; } }; + } REGISTER_FUNCTION(Transform) From c79492240194f0d5dd9053c70a967c39a7536cb3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 08:20:30 +0200 Subject: [PATCH 1936/1997] More tests --- .../02832_transform_fixed_string_no_default.reference | 2 ++ .../0_stateless/02832_transform_fixed_string_no_default.sql | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference index 9daeafb9864..ea545c90391 100644 --- a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference +++ b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference @@ -1 +1,3 @@ test + +\N diff --git a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql index 8d316d3413f..0e58c716c9f 100644 --- a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql +++ b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql @@ -1 +1,3 @@ SELECT transform(name, ['a', 'b'], ['', NULL]) AS name FROM (SELECT 'test'::Nullable(FixedString(4)) AS name); +SELECT transform(name, ['test', 'b'], ['', NULL]) AS name FROM (SELECT 'test'::Nullable(FixedString(4)) AS name); +SELECT transform(name, ['a', 'test'], ['', NULL]) AS name FROM (SELECT 'test'::Nullable(FixedString(4)) AS name); From 0e46cf86b772e1513d837d6019181a6d291b7219 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Mon, 24 Jul 2023 08:52:19 +0200 Subject: [PATCH 1937/1997] Added try-except to check cases when second backup/restore is picked up first --- .../test_disallow_concurrency.py | 69 +++++++++++++++---- 1 file changed, 57 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index d0ce2e03016..a863a6e2047 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -133,9 +133,21 @@ def test_concurrent_backups_on_same_node(): ) assert status in ["CREATING_BACKUP", "BACKUP_CREATED"] - error = nodes[0].query_and_get_error( - f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" - ) + try: + error = nodes[0].query_and_get_error( + f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" + ) + except Exception as e: + status = ( + nodes[0] + .query(f"SELECT status FROM system.backups WHERE id == '{id}'") + .rstrip("\n") + ) + # It is possible that the second backup was picked up first, and then the async backup + if status == "CREATING_BACKUP" or status == "BACKUP_FAILED": + return + else: + raise e expected_errors = [ "Concurrent backups not supported", f"Backup {backup_name} already exists", @@ -179,9 +191,20 @@ def test_concurrent_backups_on_different_nodes(): ) assert status in ["CREATING_BACKUP", "BACKUP_CREATED"] - error = nodes[0].query_and_get_error( - f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" - ) + try: + error = nodes[0].query_and_get_error( + f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" + ) + except Exception as e: + status = ( + nodes[1] + .query(f"SELECT status FROM system.backups WHERE id == '{id}'") + .rstrip("\n") + ) + if status == "CREATING_BACKUP" or status == "BACKUP_FAILED": + return + else: + raise e expected_errors = [ "Concurrent backups not supported", f"Backup {backup_name} already exists", @@ -224,9 +247,20 @@ def test_concurrent_restores_on_same_node(): ) assert status in ["RESTORING", "RESTORED"] - error = nodes[0].query_and_get_error( - f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" - ) + try: + error = nodes[0].query_and_get_error( + f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" + ) + except Exception as e: + status = ( + nodes[0] + .query(f"SELECT status FROM system.backups WHERE id == '{id}'") + .rstrip("\n") + ) + if status == "RESTORING" or status == "RESTORE_FAILED": + return + else: + raise e expected_errors = [ "Concurrent restores not supported", "Cannot restore the table default.tbl because it already contains some data", @@ -269,9 +303,20 @@ def test_concurrent_restores_on_different_node(): ) assert status in ["RESTORING", "RESTORED"] - error = nodes[1].query_and_get_error( - f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" - ) + try: + error = nodes[1].query_and_get_error( + f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" + ) + except Exception as e: + status = ( + nodes[0] + .query(f"SELECT status FROM system.backups WHERE id == '{id}'") + .rstrip("\n") + ) + if status == "RESTORING" or status == "RESTORE_FAILED": + return + else: + raise e expected_errors = [ "Concurrent restores not supported", "Cannot restore the table default.tbl because it already contains some data", From 96d40ff3c4dd34a9396c625b8a1d57f697f80dd0 Mon Sep 17 00:00:00 2001 From: flynn Date: Mon, 24 Jul 2023 07:30:32 +0000 Subject: [PATCH 1938/1997] fix --- src/Common/IntervalTree.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Common/IntervalTree.h b/src/Common/IntervalTree.h index ad079a312f2..9a42aadf70e 100644 --- a/src/Common/IntervalTree.h +++ b/src/Common/IntervalTree.h @@ -32,6 +32,12 @@ auto operator<=>(const Interval & lhs, const Interval std::tie(rhs.left, rhs.right); } +template +bool operator==(const Interval & lhs, const Interval & rhs) +{ + return std::tie(lhs.left, lhs.right) == std::tie(rhs.left, rhs.right); +} + struct IntervalTreeVoidValue { }; From 0401dc453e9502697328879728bf0dbf7c1dd9e0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 24 Jul 2023 10:14:23 +0200 Subject: [PATCH 1939/1997] Fix flakiness of test_version_update_after_mutation by enabling force_remove_data_recursively_on_drop Since there can be some leftovers: 2023.07.24 07:08:25.238066 [ 140 ] {} Application: Code: 219. DB::Exception: Cannot drop: filesystem error: in remove: Directory not empty ["/var/lib/clickhouse/data/system/"]. Probably database contain some detached tables or metadata leftovers from Ordinary engine. If you want to remove all data anyway, try to attach database back and drop it again with enabled force_remove_data_recursively_on_drop setting: Exception while trying to convert database system from Ordinary to Atomic. It may be in some intermediate state. You can finish conversion manually by moving the rest tables from system to .tmp_convert.system.9396432095832455195 (using RENAME TABLE) and executing DROP DATABASE system and RENAME DATABASE .tmp_convert.system.9396432095832455195 TO system. (DATABASE_NOT_EMPTY), Stack trace (when copying this message, always include the lines below): 0. DB::Exception::Exception(DB::Exception::MessageMasked&&, int, bool) @ 0x000000000e68af57 in /usr/bin/clickhouse 1. ? @ 0x000000000cab443c in /usr/bin/clickhouse 2. DB::DatabaseOnDisk::drop(std::shared_ptr) @ 0x000000001328d617 in /usr/bin/clickhouse 3. DB::DatabaseCatalog::detachDatabase(std::shared_ptr, String const&, bool, bool) @ 0x0000000013524a6c in /usr/bin/clickhouse 4. DB::InterpreterDropQuery::executeToDatabaseImpl(DB::ASTDropQuery const&, std::shared_ptr&, std::vector, DB::UUIDTag>, std::allocator, DB::UUIDTag>>>&) @ 0x0000000013bc05e4 in /usr/bin/clickhouse 5. DB::InterpreterDropQuery::executeToDatabase(DB::ASTDropQuery const&) @ 0x0000000013bbc6b8 in /usr/bin/clickhouse 6. DB::InterpreterDropQuery::execute() @ 0x0000000013bbba22 in /usr/bin/clickhouse 7. ? @ 0x00000000140b13a5 in /usr/bin/clickhouse 8. DB::executeQuery(String const&, std::shared_ptr, bool, DB::QueryProcessingStage::Enum) @ 0x00000000140ad20e in /usr/bin/clickhouse 9. ? @ 0x00000000140d2ef0 in /usr/bin/clickhouse 10. DB::maybeConvertSystemDatabase(std::shared_ptr) @ 0x00000000140d0aaf in /usr/bin/clickhouse 11. DB::Server::main(std::vector> const&) @ 0x000000000e724e55 in /usr/bin/clickhouse 12. Poco::Util::Application::run() @ 0x0000000017ead086 in /usr/bin/clickhouse 13. DB::Server::run() @ 0x000000000e714a5d in /usr/bin/clickhouse 14. Poco::Util::ServerApplication::run(int, char**) @ 0x0000000017ec07b9 in /usr/bin/clickhouse 15. mainEntryClickHouseServer(int, char**) @ 0x000000000e711a26 in /usr/bin/clickhouse 16. main @ 0x0000000008cf13cf in /usr/bin/clickhouse 17. __libc_start_main @ 0x0000000000021b97 in /lib/x86_64-linux-gnu/libc-2.27.so 18. _start @ 0x00000000080705ae in /usr/bin/clickhouse (version 23.7.1.2012) Signed-off-by: Azat Khuzhin --- tests/integration/helpers/cluster.py | 9 +++++++++ .../force_remove_data_recursively_on_drop.xml | 7 +++++++ .../test_version_update_after_mutation/test.py | 13 ++++++++++--- 3 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index eff44de842a..0448eb2437f 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3199,6 +3199,7 @@ class ClickHouseInstance: ): self.name = name self.base_cmd = cluster.base_cmd + self.base_dir = base_path self.docker_id = cluster.get_instance_docker_id(self.name) self.cluster = cluster self.hostname = hostname if hostname is not None else self.name @@ -4193,6 +4194,14 @@ class ClickHouseInstance: ["bash", "-c", f"sed -i 's/{replace}/{replacement}/g' {path_to_config}"] ) + def put_users_config(self, config_path): + """Put new config (useful if you cannot put it at the start)""" + + instance_config_dir = p.abspath(p.join(self.path, "configs")) + users_d_dir = p.abspath(p.join(instance_config_dir, "users.d")) + config_path = p.join(self.base_dir, config_path) + shutil.copy(config_path, users_d_dir) + def create_dir(self): """Create the instance directory and all the needed files there.""" diff --git a/tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml b/tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml new file mode 100644 index 00000000000..7a00648b28e --- /dev/null +++ b/tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py index c80205d48c1..416220c93c3 100644 --- a/tests/integration/test_version_update_after_mutation/test.py +++ b/tests/integration/test_version_update_after_mutation/test.py @@ -51,6 +51,12 @@ def start_cluster(): cluster.shutdown() +def restart_node(node): + # set force_remove_data_recursively_on_drop (cannot be done before, because the version is too old) + node.put_users_config("configs/force_remove_data_recursively_on_drop.xml") + node.restart_with_latest_version(signal=9, fix_metadata=True) + + def test_mutate_and_upgrade(start_cluster): for node in [node1, node2]: node.query("DROP TABLE IF EXISTS mt") @@ -67,8 +73,9 @@ def test_mutate_and_upgrade(start_cluster): node2.query("DETACH TABLE mt") # stop being leader node1.query("DETACH TABLE mt") # stop being leader - node1.restart_with_latest_version(signal=9, fix_metadata=True) - node2.restart_with_latest_version(signal=9, fix_metadata=True) + + restart_node(node1) + restart_node(node2) # After hard restart table can be in readonly mode exec_query_with_retry( @@ -124,7 +131,7 @@ def test_upgrade_while_mutation(start_cluster): # (We could be in process of creating some system table, which will leave empty directory on restart, # so when we start moving system tables from ordinary to atomic db, it will complain about some undeleted files) node3.query("SYSTEM FLUSH LOGS") - node3.restart_with_latest_version(signal=9, fix_metadata=True) + restart_node(node3) # checks for readonly exec_query_with_retry(node3, "OPTIMIZE TABLE mt1", sleep_time=5, retry_count=60) From efa638ef3cc7db3c6149b7c031cc4c7904987abd Mon Sep 17 00:00:00 2001 From: Val Doroshchuk Date: Wed, 19 Jul 2023 12:53:27 +0200 Subject: [PATCH 1940/1997] MaterializedMySQL: Support unquoted utf-8 strings in DDL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since ClickHouse does not support unquoted utf-8 strings but MySQL does. Instead of fixing Lexer to recognize utf-8 chars as TokenType::BareWord, suggesting to quote all unrecognized tokens before applying any DDL. Actual parsing and validating the syntax will be done by particular Parser. If there is any TokenType::Error, the query is unable to be parsed anyway. Quoting such tokens can provide the support of utf-8 names. See `tryQuoteUnrecognizedTokens` and `QuoteUnrecognizedTokensTest`. mysql> CREATE TABLE 道.渠(... is converted to CREATE TABLE `道`.`渠`(... Also fixed the bug with missing * while doing SELECT in full sync because db or table name are back quoted when not needed. --- src/Common/quoteString.cpp | 11 + src/Common/quoteString.h | 3 + .../MySQL/MaterializedMySQLSyncThread.cpp | 7 +- .../gtest_try_quote_unrecognized_tokens.cpp | 289 ++++++++++++++++++ .../MySQL/tryQuoteUnrecognizedTokens.cpp | 96 ++++++ .../MySQL/tryQuoteUnrecognizedTokens.h | 10 + src/Storages/StorageMySQL.cpp | 11 +- .../materialized_with_ddl.py | 122 ++++++++ .../test_materialized_mysql_database/test.py | 6 + 9 files changed, 542 insertions(+), 13 deletions(-) create mode 100644 src/Databases/MySQL/tests/gtest_try_quote_unrecognized_tokens.cpp create mode 100644 src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp create mode 100644 src/Databases/MySQL/tryQuoteUnrecognizedTokens.h diff --git a/src/Common/quoteString.cpp b/src/Common/quoteString.cpp index b464f4837a1..17129441c8f 100644 --- a/src/Common/quoteString.cpp +++ b/src/Common/quoteString.cpp @@ -44,4 +44,15 @@ String backQuoteIfNeed(StringRef x) return res; } + +String backQuoteMySQL(StringRef x) +{ + String res(x.size, '\0'); + { + WriteBufferFromString wb(res); + writeBackQuotedStringMySQL(x, wb); + } + return res; +} + } diff --git a/src/Common/quoteString.h b/src/Common/quoteString.h index b83988258e2..3f17d6e7621 100644 --- a/src/Common/quoteString.h +++ b/src/Common/quoteString.h @@ -24,4 +24,7 @@ String backQuote(StringRef x); /// Quote the identifier with backquotes, if required. String backQuoteIfNeed(StringRef x); +/// Quote the identifier with backquotes, for use in MySQL queries. +String backQuoteMySQL(StringRef x); + } diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 603bf3d0166..673bd155f77 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -342,9 +343,8 @@ static inline String rewriteMysqlQueryColumn(mysqlxx::Pool::Entry & connection, { std::make_shared(), "column_type" } }; - const String & query = "SELECT COLUMN_NAME AS column_name, COLUMN_TYPE AS column_type FROM INFORMATION_SCHEMA.COLUMNS" - " WHERE TABLE_SCHEMA = '" + backQuoteIfNeed(database_name) + - "' AND TABLE_NAME = '" + backQuoteIfNeed(table_name) + "' ORDER BY ORDINAL_POSITION"; + String query = "SELECT COLUMN_NAME AS column_name, COLUMN_TYPE AS column_type FROM INFORMATION_SCHEMA.COLUMNS" + " WHERE TABLE_SCHEMA = '" + database_name + "' AND TABLE_NAME = '" + table_name + "' ORDER BY ORDINAL_POSITION"; StreamSettings mysql_input_stream_settings(global_settings, false, true); auto mysql_source = std::make_unique(connection, query, tables_columns_sample_block, mysql_input_stream_settings); @@ -812,6 +812,7 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even CurrentThread::QueryScope query_scope(query_context); String query = query_event.query; + tryQuoteUnrecognizedTokens(query, query); if (!materialized_tables_list.empty()) { auto table_id = tryParseTableIDFromDDL(query, query_event.schema); diff --git a/src/Databases/MySQL/tests/gtest_try_quote_unrecognized_tokens.cpp b/src/Databases/MySQL/tests/gtest_try_quote_unrecognized_tokens.cpp new file mode 100644 index 00000000000..9c76deb2712 --- /dev/null +++ b/src/Databases/MySQL/tests/gtest_try_quote_unrecognized_tokens.cpp @@ -0,0 +1,289 @@ +#include + +#include + +using namespace DB; + +struct TestCase +{ + String query; + String res; + bool ok; + + TestCase( + const String & query_, + const String & res_, + bool ok_) + : query(query_) + , res(res_) + , ok(ok_) + { + } +}; + +std::ostream & operator<<(std::ostream & ostr, const TestCase & test_case) +{ + return ostr << '"' << test_case.query << "\" -> \"" << test_case.res << "\" ok:" << test_case.ok; +} + +class QuoteUnrecognizedTokensTest : public ::testing::TestWithParam +{ +}; + +TEST_P(QuoteUnrecognizedTokensTest, escape) +{ + const auto & [query, expected, ok] = GetParam(); + String actual; + bool res = tryQuoteUnrecognizedTokens(query, actual); + EXPECT_EQ(ok, res); + EXPECT_EQ(expected, actual); +} + +INSTANTIATE_TEST_SUITE_P(MaterializedMySQL, QuoteUnrecognizedTokensTest, ::testing::ValuesIn(std::initializer_list{ + { + "", + "", + false + }, + { + "test '\"`", + "", + false + }, + { + "SELECT * FROM db.`table`", + "", + false + }, + { + "道渠", + "`道渠`", + true + }, + { + "道", + "`道`", + true + }, + { + "道道(skip) 道(", + "`道道`(skip) `道`(", + true + }, + { + "`道渠`", + "", + false + }, + { + "'道'", + "", + false + }, + { + "\"道\"", + "", + false + }, + { + "` 道 test 渠 `", + "", + false + }, + { + "skip 道 skip 123", + "skip `道` skip 123", + true + }, + { + "skip 123 `道` skip", + "", + false + }, + { + "skip `道 skip 123", + "", + false + }, + { + "skip test道 skip", + "skip `test道` skip", + true + }, + { + "test道2test", + "`test道2test`", + true + }, + { + "skip test道2test 123", + "skip `test道2test` 123", + true + }, + { + "skip 您a您a您a a您a您a您a 1您2您3您4 skip", + "skip `您a您a您a` `a您a您a您a` `1您2您3您4` skip", + true + }, + { + "skip 您a 您a您a b您2您c您4 skip", + "skip `您a` `您a您a` `b您2您c您4` skip", + true + }, + { + "123您a skip 56_您a 您a2 b_您2_您c123您_a4 skip", + "`123您a` skip `56_您a` `您a2` `b_您2_您c123您_a4` skip", + true + }, + { + "_您_ 123 skip 56_您_您_您_您_您_您_您_您_您_a 您a2 abc 123_您_您_321 a1b2c3 aaaaa您您_a4 skip", + "`_您_` 123 skip `56_您_您_您_您_您_您_您_您_您_a` `您a2` abc `123_您_您_321` a1b2c3 `aaaaa您您_a4` skip", + true + }, + { + "TABLE 您2 您(", + "TABLE `您2` `您`(", + true + }, + { + "TABLE 您.a您2(日2日2 INT", + "TABLE `您`.`a您2`(`日2日2` INT", + true + }, + { + "TABLE 您$.a_您2a_($日2日_2 INT, 您Hi好 a您b好c)", + "TABLE `您`$.`a_您2a_`($`日2日_2` INT, `您Hi好` `a您b好c`)", + true + }, + { + "TABLE 您a日.您a您a您a(test INT", + "TABLE `您a日`.`您a您a您a`(test INT", + true + }, + { + "TABLE 您a日.您a您a您a(Hi您Hi好Hi INT", + "TABLE `您a日`.`您a您a您a`(`Hi您Hi好Hi` INT", + true + }, + { + "--TABLE 您a日.您a您a您a(test INT", + "", + false + }, + { + "--您a日.您a您a您a(\n您Hi好", + "--您a日.您a您a您a(\n`您Hi好`", + true + }, + { + " /* TABLE 您a日.您a您a您a(test INT", + "", + false + }, + { + "/*您a日.您a您a您a(*/\n您Hi好", + "/*您a日.您a您a您a(*/\n`您Hi好`", + true + }, + { + " 您a日.您您aa您a /* 您a日.您a您a您a */ a您a日a.a您您您a", + " `您a日`.`您您aa您a` /* 您a日.您a您a您a */ `a您a日a`.`a您您您a`", + true + }, + //{ TODO + // "TABLE 您2.您a您a您a(test INT", + // "TABLE `您2`.`您a您a您a`(test INT", + // true + //}, + { + "skip 您a您a您a skip", + "skip `您a您a您a` skip", + true + }, + { + "test 您a2您3a您a 4 again", + "test `您a2您3a您a` 4 again", + true + }, + { + "CREATE TABLE db.`道渠`", + "", + false + }, + { + "CREATE TABLE db.`道渠", + "", + false + }, + { + "CREATE TABLE db.道渠", + "CREATE TABLE db.`道渠`", + true + }, + { + "CREATE TABLE db. 道渠", + "CREATE TABLE db. `道渠`", + true + }, + { + R"sql( + CREATE TABLE gb2312.`道渠` ( `id` int NOT NULL, + 您 INT, + 道渠 DATETIME, + 您test INT, test您 INT, test您test INT, + 道渠test INT, test道渠 INT, test道渠test INT, + 您_ INT, _您 INT, _您_ INT, + 您您__ INT, __您您 INT, __您您__ INT, + 您2 INT, 2您 INT, 2您2 INT, + 您您22 INT, 22您您 INT, 22您您22 INT, + 您_2 INT, _2您 INT, _2您_2 INT, _2您2_ INT, 2_您_2 INT, + 您您__22 INT, __22您您 INT, __22您您__22 INT, __22您您22__ INT, 22__您您__22 INT, + 您2_ INT, 2_您 INT, 2_您2_ INT, + 您您22__ INT, 22__您您 INT, 22__您您22__ INT, + 您_test INT, _test您 INT, _test您_test INT, _test您test_ INT, test_您test_ INT, test_您_test INT, + 您您_test INT, _test您您 INT, _test您您_test INT, _test您您test_ INT, test_您您test_ INT, test_您您_test INT, + 您test3 INT, test3您 INT, test3您test3 INT, test3您3test INT, + 您您test3 INT, test3您您 INT, test3您您test3 INT, test3您您3test INT, + 您3test INT, 3test您 INT, 3test您3test INT, 3test您test3 INT, + 您您3test INT, 3test您您 INT, 3test您您3test INT, 3test您您test3 INT, + 您_test4 INT, _test4您 INT, _test4您_test4 INT, test4_您_test4 INT, _test4您4test_ INT, _test4您test4_ INT, + 您您_test4 INT, _test4您您 INT, _test4您您_test4 INT, test4_您您_test4 INT, _test4您您4test_ INT, _test4您您test4_ INT, + 您_5test INT, _5test您 INT, _5test您_5test INT, 5test_您_test5 INT, _4test您test4_ INT, + test_日期 varchar(256), test_道_2 varchar(256) NOT NULL , + test_道渠您_3 + BIGINT NOT NULL, + 道您3_test INT, + PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=gb2312; + )sql", + R"sql( + CREATE TABLE gb2312.`道渠` ( `id` int NOT NULL, + `您` INT, + `道渠` DATETIME, + `您test` INT, `test您` INT, `test您test` INT, + `道渠test` INT, `test道渠` INT, `test道渠test` INT, + `您_` INT, `_您` INT, `_您_` INT, + `您您__` INT, `__您您` INT, `__您您__` INT, + `您2` INT, `2您` INT, `2您2` INT, + `您您22` INT, `22您您` INT, `22您您22` INT, + `您_2` INT, `_2您` INT, `_2您_2` INT, `_2您2_` INT, `2_您_2` INT, + `您您__22` INT, `__22您您` INT, `__22您您__22` INT, `__22您您22__` INT, `22__您您__22` INT, + `您2_` INT, `2_您` INT, `2_您2_` INT, + `您您22__` INT, `22__您您` INT, `22__您您22__` INT, + `您_test` INT, `_test您` INT, `_test您_test` INT, `_test您test_` INT, `test_您test_` INT, `test_您_test` INT, + `您您_test` INT, `_test您您` INT, `_test您您_test` INT, `_test您您test_` INT, `test_您您test_` INT, `test_您您_test` INT, + `您test3` INT, `test3您` INT, `test3您test3` INT, `test3您3test` INT, + `您您test3` INT, `test3您您` INT, `test3您您test3` INT, `test3您您3test` INT, + `您3test` INT, `3test您` INT, `3test您3test` INT, `3test您test3` INT, + `您您3test` INT, `3test您您` INT, `3test您您3test` INT, `3test您您test3` INT, + `您_test4` INT, `_test4您` INT, `_test4您_test4` INT, `test4_您_test4` INT, `_test4您4test_` INT, `_test4您test4_` INT, + `您您_test4` INT, `_test4您您` INT, `_test4您您_test4` INT, `test4_您您_test4` INT, `_test4您您4test_` INT, `_test4您您test4_` INT, + `您_5test` INT, `_5test您` INT, `_5test您_5test` INT, `5test_您_test5` INT, `_4test您test4_` INT, + `test_日期` varchar(256), `test_道_2` varchar(256) NOT NULL , + `test_道渠您_3` + BIGINT NOT NULL, + `道您3_test` INT, + PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=gb2312; + )sql", + true + }, +})); diff --git a/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp new file mode 100644 index 00000000000..cd4603ddaec --- /dev/null +++ b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp @@ -0,0 +1,96 @@ +#include +#include +#include + +namespace DB +{ + +/// Checks if there are no any tokens (like whitespaces) between current and previous pos +static bool noWhitespaces(const char * to, const char * from) +{ + return static_cast(from - to) == 0; +} + +/// Checks if the token should be quoted too together with unrecognized +static bool isWordOrNumber(TokenType type) +{ + return type == TokenType::BareWord || type == TokenType::Number; +} + +static void quoteLiteral( + IParser::Pos & pos, + IParser::Pos & pos_prev, + const char *& pos_unrecognized, + const char *& copy_from, + String & rewritten_query) +{ + /// Copy also whitespaces if any + const auto * end = + isWordOrNumber(pos->type) && noWhitespaces(pos_prev->end, pos->begin) + ? pos->end + : pos_prev->end; + String literal(pos_unrecognized, static_cast(end - pos_unrecognized)); + rewritten_query.append(copy_from, pos_unrecognized - copy_from).append(backQuoteMySQL(literal)); + copy_from = end; +} + +bool tryQuoteUnrecognizedTokens(const String & query, String & res) +{ + Tokens tokens(query.data(), query.data() + query.size()); + IParser::Pos pos(tokens, 0); + Expected expected; + String rewritten_query; + const char * copy_from = query.data(); + auto pos_prev = pos; + const char * pos_unrecognized = nullptr; + for (;pos->type != TokenType::EndOfStream; ++pos) + { + /// Commit quotes if any whitespaces found or the token is not a word + bool commit = !noWhitespaces(pos_prev->end, pos->begin) || (pos->type != TokenType::Error && !isWordOrNumber(pos->type)); + if (pos_unrecognized && commit) + { + quoteLiteral( + pos, + pos_prev, + pos_unrecognized, + copy_from, + rewritten_query); + pos_unrecognized = nullptr; + } + if (pos->type == TokenType::Error) + { + /// Find first appearance of the error token + if (!pos_unrecognized) + { + pos_unrecognized = + isWordOrNumber(pos_prev->type) && noWhitespaces(pos_prev->end, pos->begin) + ? pos_prev->begin + : pos->begin; + } + } + pos_prev = pos; + } + + /// There was EndOfStream but not committed unrecognized token + if (pos_unrecognized) + { + quoteLiteral( + pos, + pos_prev, + pos_unrecognized, + copy_from, + rewritten_query); + pos_unrecognized = nullptr; + } + + /// If no Errors found + if (copy_from == query.data()) + return false; + + auto size = static_cast(pos->end - copy_from); + rewritten_query.append(copy_from, size); + res = rewritten_query; + return true; +} + +} diff --git a/src/Databases/MySQL/tryQuoteUnrecognizedTokens.h b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.h new file mode 100644 index 00000000000..582a297c485 --- /dev/null +++ b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace DB +{ + +bool tryQuoteUnrecognizedTokens(const String & query, String & res); + +} diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 3e928c3a811..b0a220eb1d2 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -34,16 +35,6 @@ namespace ErrorCodes extern const int UNKNOWN_TABLE; } -static String backQuoteMySQL(const String & x) -{ - String res(x.size(), '\0'); - { - WriteBufferFromString wb(res); - writeBackQuotedStringMySQL(x, wb); - } - return res; -} - StorageMySQL::StorageMySQL( const StorageID & table_id_, mysqlxx::PoolWithFailover && pool_, diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py index c97c3e5e2a8..9130ccc359c 100644 --- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py @@ -1581,6 +1581,128 @@ def utf8mb4_test(clickhouse_node, mysql_node, service_name): mysql_node.query("DROP DATABASE utf8mb4_test") +def utf8mb4_column_test(clickhouse_node, mysql_node, service_name): + db = "utf8mb4_column_test" + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"CREATE DATABASE {db}") + + # Full sync + mysql_node.query(f"CREATE TABLE {db}.unquoted (id INT primary key, 日期 DATETIME)") + mysql_node.query(f"CREATE TABLE {db}.quoted (id INT primary key, `日期` DATETIME)") + mysql_node.query(f"INSERT INTO {db}.unquoted VALUES(1, now())") + mysql_node.query(f"INSERT INTO {db}.quoted VALUES(1, now())") + clickhouse_node.query( + f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')" + ) + + # Full sync replicated unquoted columns names since they use SHOW CREATE TABLE + # which returns quoted column names + check_query( + clickhouse_node, + f"/* expect: quoted unquoted */ SHOW TABLES FROM {db}", + "quoted\nunquoted\n", + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM {db}.unquoted", + "1\n", + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM {db}.quoted", + "1\n", + ) + + # Inc sync + mysql_node.query( + f"CREATE TABLE {db}.unquoted_new (id INT primary key, 日期 DATETIME)" + ) + mysql_node.query( + f"CREATE TABLE {db}.quoted_new (id INT primary key, `日期` DATETIME)" + ) + mysql_node.query(f"INSERT INTO {db}.unquoted_new VALUES(1, now())") + mysql_node.query(f"INSERT INTO {db}.quoted_new VALUES(1, now())") + mysql_node.query(f"INSERT INTO {db}.unquoted VALUES(2, now())") + mysql_node.query(f"INSERT INTO {db}.quoted VALUES(2, now())") + check_query( + clickhouse_node, + f"/* expect: 2 */ SELECT COUNT() FROM {db}.quoted", + "2\n", + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM {db}.quoted_new", + "1\n", + ) + check_query( + clickhouse_node, + f"/* expect: 2 */ SELECT COUNT() FROM {db}.unquoted", + "2\n", + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM {db}.unquoted_new", + "1\n", + ) + + clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`") + mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`") + + +def utf8mb4_name_test(clickhouse_node, mysql_node, service_name): + db = "您Hi您" + table = "日期" + mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`") + clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`") + mysql_node.query(f"CREATE DATABASE `{db}`") + mysql_node.query( + f"CREATE TABLE `{db}`.`{table}` (id INT(11) NOT NULL PRIMARY KEY, `{table}` DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" + ) + mysql_node.query(f"INSERT INTO `{db}`.`{table}` VALUES(1, now())") + mysql_node.query( + f"CREATE TABLE {db}.{table}_unquoted (id INT(11) NOT NULL PRIMARY KEY, {table} DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" + ) + mysql_node.query(f"INSERT INTO {db}.{table}_unquoted VALUES(1, now())") + clickhouse_node.query( + f"CREATE DATABASE `{db}` ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')" + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}`", + "1\n", + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}_unquoted`", + "1\n", + ) + + # Inc sync + mysql_node.query( + f"CREATE TABLE `{db}`.`{table}2` (id INT(11) NOT NULL PRIMARY KEY, `{table}` DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" + ) + mysql_node.query(f"INSERT INTO `{db}`.`{table}2` VALUES(1, now())") + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}2`", + "1\n", + ) + + mysql_node.query( + f"CREATE TABLE {db}.{table}2_unquoted (id INT(11) NOT NULL PRIMARY KEY, {table} DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" + ) + mysql_node.query(f"INSERT INTO {db}.{table}2_unquoted VALUES(1, now())") + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}2_unquoted`", + "1\n", + ) + + clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`") + mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`") + + def system_parts_test(clickhouse_node, mysql_node, service_name): mysql_node.query("DROP DATABASE IF EXISTS system_parts_test") clickhouse_node.query("DROP DATABASE IF EXISTS system_parts_test") diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index 32c1da8a2bd..e31ef70b4ad 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -381,6 +381,12 @@ def test_utf8mb4( ): materialized_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_5_7, "mysql57") materialized_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_8_0, "mysql80") + materialized_with_ddl.utf8mb4_column_test( + clickhouse_node, started_mysql_8_0, "mysql80" + ) + materialized_with_ddl.utf8mb4_name_test( + clickhouse_node, started_mysql_8_0, "mysql80" + ) def test_system_parts_table(started_cluster, started_mysql_8_0, clickhouse_node): From 3710c7238d9eaf0328170bafb03eb4b15ea5d67c Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 24 Jul 2023 09:19:06 +0000 Subject: [PATCH 1941/1997] Fix test_throttling --- tests/integration/test_throttling/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_throttling/test.py b/tests/integration/test_throttling/test.py index ff8e7154d0d..2b5e9312a4c 100644 --- a/tests/integration/test_throttling/test.py +++ b/tests/integration/test_throttling/test.py @@ -114,7 +114,7 @@ def node_update_config(mode, setting, value=None): def assert_took(took, should_took): - assert took >= should_took[0] * 0.9 and took < should_took[1] + assert took >= should_took[0] * 0.85 and took < should_took[1] @pytest.mark.parametrize( From 2471b032ab7a504d1997e9d3681bf97f0564273d Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 24 Jul 2023 09:52:49 +0000 Subject: [PATCH 1942/1997] fix lightweight delete after drop of projection --- src/Storages/MergeTree/MergeTreeData.cpp | 3 ++- .../02792_drop_projection_lwd.reference | 1 + .../0_stateless/02792_drop_projection_lwd.sql | 26 +++++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02792_drop_projection_lwd.reference create mode 100644 tests/queries/0_stateless/02792_drop_projection_lwd.sql diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d773f380377..06a9b62d9de 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5693,7 +5693,8 @@ bool MergeTreeData::supportsLightweightDelete() const auto lock = lockParts(); for (const auto & part : data_parts_by_info) { - if (!part->supportLightweightDeleteMutate()) + if (part->getState() == MergeTreeDataPartState::Active + && !part->supportLightweightDeleteMutate()) return false; } return true; diff --git a/tests/queries/0_stateless/02792_drop_projection_lwd.reference b/tests/queries/0_stateless/02792_drop_projection_lwd.reference new file mode 100644 index 00000000000..6529ff889b0 --- /dev/null +++ b/tests/queries/0_stateless/02792_drop_projection_lwd.reference @@ -0,0 +1 @@ +98 diff --git a/tests/queries/0_stateless/02792_drop_projection_lwd.sql b/tests/queries/0_stateless/02792_drop_projection_lwd.sql new file mode 100644 index 00000000000..fd446a8efe8 --- /dev/null +++ b/tests/queries/0_stateless/02792_drop_projection_lwd.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS t_projections_lwd; + +CREATE TABLE t_projections_lwd (a UInt32, b UInt32) ENGINE = MergeTree ORDER BY a; + +INSERT INTO t_projections_lwd SELECT number, number FROM numbers(100); + +-- LWD works +DELETE FROM t_projections_lwd WHERE a = 0; + +-- add projection +ALTER TABLE t_projections_lwd ADD PROJECTION p_t_projections_lwd (SELECT * ORDER BY b); +ALTER TABLE t_projections_lwd MATERIALIZE PROJECTION p_t_projections_lwd; + +-- LWD does not work, as expected +DELETE FROM t_projections_lwd WHERE a = 1; -- { serverError UNFINISHED } +KILL MUTATION WHERE database = currentDatabase() AND table = 't_projections_lwd' SYNC FORMAT Null; + +-- drop projection +SET mutations_sync = 2; +ALTER TABLE t_projections_lwd DROP projection p_t_projections_lwd; + +DELETE FROM t_projections_lwd WHERE a = 2; + +SELECT count() FROM t_projections_lwd; + +DROP TABLE t_projections_lwd; From 5da6c99f6df90ae5a8dde59f9cccce8cee48fc61 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 24 Jul 2023 12:02:27 +0200 Subject: [PATCH 1943/1997] Add comment --- tests/integration/test_throttling/test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/integration/test_throttling/test.py b/tests/integration/test_throttling/test.py index 2b5e9312a4c..62640394a85 100644 --- a/tests/integration/test_throttling/test.py +++ b/tests/integration/test_throttling/test.py @@ -114,6 +114,9 @@ def node_update_config(mode, setting, value=None): def assert_took(took, should_took): + # we need to decrease the lower limit because the server limits could + # be enforced by throttling some server background IO instead of query IO + # and we have no control over it assert took >= should_took[0] * 0.85 and took < should_took[1] From 79cc81890316338e35f13576cfd0360494e72645 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 24 Jul 2023 11:06:21 +0000 Subject: [PATCH 1944/1997] try to fix test --- .../02726_async_insert_flush_stress.sh | 33 ++++++++++++------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/tests/queries/0_stateless/02726_async_insert_flush_stress.sh b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh index 5fafb773d16..876766d0780 100755 --- a/tests/queries/0_stateless/02726_async_insert_flush_stress.sh +++ b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh @@ -11,7 +11,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function insert1() { url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" - while true; do + + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT CSV 1,"a" 2,"b" @@ -22,7 +24,9 @@ function insert1() function insert2() { url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" - while true; do + + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT JSONEachRow {"id": 5, "s": "e"} {"id": 6, "s": "f"}' done } @@ -30,28 +34,33 @@ function insert2() function insert3() { url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" - while true; do + + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO FUNCTION remote('127.0.0.1', $CLICKHOUSE_DATABASE, async_inserts) VALUES (7, 'g') (8, 'h')" done } function select1() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts FORMAT Null" done } function select2() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CLIENT} -q "SELECT * FROM system.asynchronous_inserts FORMAT Null" done } function flush1() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do sleep 0.2 ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH ASYNC INSERT QUEUE" done @@ -70,14 +79,14 @@ export -f select2 export -f flush1 for _ in {1..5}; do - timeout $TIMEOUT bash -c insert1 & - timeout $TIMEOUT bash -c insert2 & - timeout $TIMEOUT bash -c insert3 & + insert1 $TIMEOUT & + insert2 $TIMEOUT & + insert3 $TIMEOUT & done -timeout $TIMEOUT bash -c select1 & -timeout $TIMEOUT bash -c select2 & -timeout $TIMEOUT bash -c flush1 & +select1 $TIMEOUT & +select2 $TIMEOUT & +flush1 $TIMEOUT & wait From 21097209d2e709db8022782a02980e52a7bc5df7 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Mon, 24 Jul 2023 15:41:21 +0300 Subject: [PATCH 1945/1997] Revert "Remove `toDecimalString`" --- .../functions/type-conversion-functions.md | 38 +++ .../functions/type-conversion-functions.md | 38 +++ src/Functions/FunctionToDecimalString.cpp | 22 ++ src/Functions/FunctionToDecimalString.h | 312 ++++++++++++++++++ src/IO/WriteHelpers.h | 39 ++- .../02676_to_decimal_string.reference | 21 ++ .../0_stateless/02676_to_decimal_string.sql | 35 ++ 7 files changed, 492 insertions(+), 13 deletions(-) create mode 100644 src/Functions/FunctionToDecimalString.cpp create mode 100644 src/Functions/FunctionToDecimalString.h create mode 100644 tests/queries/0_stateless/02676_to_decimal_string.reference create mode 100644 tests/queries/0_stateless/02676_to_decimal_string.sql diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index c2bd525c483..36f40b37238 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -945,6 +945,44 @@ Result: └────────────┴───────┘ ``` +## toDecimalString + +Converts a numeric value to String with the number of fractional digits in the output specified by the user. + +**Syntax** + +``` sql +toDecimalString(number, scale) +``` + +**Parameters** + +- `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md), +- `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md). + * Maximum scale for [Decimal](/docs/en/sql-reference/data-types/decimal.md) and [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal), + * Maximum scale for [Float](/docs/en/sql-reference/data-types/float.md) is 60. + +**Returned value** + +- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale). + The number is rounded up or down according to common arithmetic in case requested scale is smaller than original number's scale. + +**Example** + +Query: + +``` sql +SELECT toDecimalString(CAST('64.32', 'Float64'), 5); +``` + +Result: + +```response +┌toDecimalString(CAST('64.32', 'Float64'), 5)─┐ +│ 64.32000 │ +└─────────────────────────────────────────────┘ +``` + ## reinterpretAsUInt(8\|16\|32\|64) ## reinterpretAsInt(8\|16\|32\|64) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 088b1a9a1f1..e53104d8d71 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -762,6 +762,44 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut; └────────────┴───────┘ ``` +## toDecimalString + +Принимает любой численный тип первым аргументом, возвращает строковое десятичное представление числа с точностью, заданной вторым аргументом. + +**Синтаксис** + +``` sql +toDecimalString(number, scale) +``` + +**Параметры** + +- `number` — Значение любого числового типа: [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md), [Float](/docs/ru/sql-reference/data-types/float.md), [Decimal](/docs/ru/sql-reference/data-types/decimal.md), +- `scale` — Требуемое количество десятичных знаков после запятой, [UInt8](/docs/ru/sql-reference/data-types/int-uint.md). + * Значение `scale` для типов [Decimal](/docs/ru/sql-reference/data-types/decimal.md) и [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md) должно не превышать 77 (так как это наибольшее количество значимых символов для этих типов), + * Значение `scale` для типа [Float](/docs/ru/sql-reference/data-types/float.md) не должно превышать 60. + +**Возвращаемое значение** + +- Строка ([String](/docs/en/sql-reference/data-types/string.md)), представляющая собой десятичное представление входного числа с заданной длиной дробной части. + При необходимости число округляется по стандартным правилам арифметики. + +**Пример использования** + +Запрос: + +``` sql +SELECT toDecimalString(CAST('64.32', 'Float64'), 5); +``` + +Результат: + +```response +┌─toDecimalString(CAST('64.32', 'Float64'), 5)┐ +│ 64.32000 │ +└─────────────────────────────────────────────┘ +``` + ## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264} ## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264} diff --git a/src/Functions/FunctionToDecimalString.cpp b/src/Functions/FunctionToDecimalString.cpp new file mode 100644 index 00000000000..fe417b19137 --- /dev/null +++ b/src/Functions/FunctionToDecimalString.cpp @@ -0,0 +1,22 @@ +#include +#include +#include + +namespace DB +{ + +REGISTER_FUNCTION(ToDecimalString) +{ + factory.registerFunction( + FunctionDocumentation{ + .description=R"( +Returns string representation of a number. First argument is the number of any numeric type, +second argument is the desired number of digits in fractional part. Returns String. + + )", + .examples{{"toDecimalString", "SELECT toDecimalString(2.1456,2)", ""}}, + .categories{"String"} + }, FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/FunctionToDecimalString.h b/src/Functions/FunctionToDecimalString.h new file mode 100644 index 00000000000..6ae007e6b66 --- /dev/null +++ b/src/Functions/FunctionToDecimalString.h @@ -0,0 +1,312 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; + extern const int CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER; +} + +class FunctionToDecimalString : public IFunction +{ +public: + static constexpr auto name = "toDecimalString"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + size_t getNumberOfArguments() const override { return 2; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isNumber(*arguments[0])) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal first argument for formatDecimal function: got {}, expected numeric type", + arguments[0]->getName()); + + if (!isUInt8(*arguments[1])) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal second argument for formatDecimal function: got {}, expected UInt8", + arguments[1]->getName()); + + return std::make_shared(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + +private: + /// For operations with Integer/Float + template + void vectorConstant(const FromVectorType & vec_from, UInt8 precision, + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const + { + size_t input_rows_count = vec_from.size(); + result_offsets.resize(input_rows_count); + + /// Buffer is used here and in functions below because resulting size cannot be precisely anticipated, + /// and buffer resizes on-the-go. Also, .count() provided by buffer is convenient in this case. + WriteBufferFromVector buf_to(vec_to); + + for (size_t i = 0; i < input_rows_count; ++i) + { + format(vec_from[i], buf_to, precision); + result_offsets[i] = buf_to.count(); + } + + buf_to.finalize(); + } + + template + void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector::Container & vec_precision, + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const + { + size_t input_rows_count = vec_from.size(); + result_offsets.resize(input_rows_count); + + WriteBufferFromVector buf_to(vec_to); + + constexpr size_t max_digits = std::numeric_limits::digits10; + + for (size_t i = 0; i < input_rows_count; ++i) + { + if (vec_precision[i] > max_digits) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, + "Too many fractional digits requested, shall not be more than {}", max_digits); + format(vec_from[i], buf_to, vec_precision[i]); + result_offsets[i] = buf_to.count(); + } + + buf_to.finalize(); + } + + template + void constantVector(const FirstArgType & value_from, const ColumnVector::Container & vec_precision, + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const + { + size_t input_rows_count = vec_precision.size(); + result_offsets.resize(input_rows_count); + + WriteBufferFromVector buf_to(vec_to); + + constexpr size_t max_digits = std::numeric_limits::digits10; + + for (size_t i = 0; i < input_rows_count; ++i) + { + if (vec_precision[i] > max_digits) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, + "Too many fractional digits requested, shall not be more than {}", max_digits); + format(value_from, buf_to, vec_precision[i]); + result_offsets[i] = buf_to.count(); + } + + buf_to.finalize(); + } + + /// For operations with Decimal + template + void vectorConstant(const FirstArgVectorType & vec_from, UInt8 precision, + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const + { + /// There are no more than 77 meaning digits (as it is the max length of UInt256). So we can limit it with 77. + constexpr size_t max_digits = std::numeric_limits::digits10; + if (precision > max_digits) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, + "Too many fractional digits requested for Decimal, must not be more than {}", max_digits); + + WriteBufferFromVector buf_to(vec_to); + size_t input_rows_count = vec_from.size(); + result_offsets.resize(input_rows_count); + + for (size_t i = 0; i < input_rows_count; ++i) + { + writeText(vec_from[i], from_scale, buf_to, true, true, precision); + writeChar(0, buf_to); + result_offsets[i] = buf_to.count(); + } + buf_to.finalize(); + } + + template + void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector::Container & vec_precision, + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const + { + size_t input_rows_count = vec_from.size(); + result_offsets.resize(input_rows_count); + + WriteBufferFromVector buf_to(vec_to); + + constexpr size_t max_digits = std::numeric_limits::digits10; + + for (size_t i = 0; i < input_rows_count; ++i) + { + if (vec_precision[i] > max_digits) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, + "Too many fractional digits requested for Decimal, must not be more than {}", max_digits); + writeText(vec_from[i], from_scale, buf_to, true, true, vec_precision[i]); + writeChar(0, buf_to); + result_offsets[i] = buf_to.count(); + } + buf_to.finalize(); + } + + template + void constantVector(const FirstArgType & value_from, const ColumnVector::Container & vec_precision, + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const + { + size_t input_rows_count = vec_precision.size(); + result_offsets.resize(input_rows_count); + + WriteBufferFromVector buf_to(vec_to); + + constexpr size_t max_digits = std::numeric_limits::digits10; + + for (size_t i = 0; i < input_rows_count; ++i) + { + if (vec_precision[i] > max_digits) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, + "Too many fractional digits requested for Decimal, must not be more than {}", max_digits); + writeText(value_from, from_scale, buf_to, true, true, vec_precision[i]); + writeChar(0, buf_to); + result_offsets[i] = buf_to.count(); + } + buf_to.finalize(); + } + + template + static void format(T value, DB::WriteBuffer & out, UInt8 precision) + { + /// Maximum of 60 is hard-coded in 'double-conversion/double-conversion.h' for floating point values, + /// Catch this here to give user a more reasonable error. + if (precision > 60) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, + "Too high precision requested for Float, must not be more than 60, got {}", Int8(precision)); + + DB::DoubleConverter::BufferType buffer; + double_conversion::StringBuilder builder{buffer, sizeof(buffer)}; + + const auto result = DB::DoubleConverter::instance().ToFixed(value, precision, &builder); + + if (!result) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, "Error processing number: {}", value); + + out.write(buffer, builder.position()); + writeChar(0, out); + } + + template + static void format(T value, DB::WriteBuffer & out, UInt8 precision) + { + /// Fractional part for Integer is just trailing zeros. Let's limit it with 77 (like with Decimals). + constexpr size_t max_digits = std::numeric_limits::digits10; + if (precision > max_digits) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, + "Too many fractional digits requested, shall not be more than {}", max_digits); + writeText(value, out); + if (precision > 0) [[likely]] + { + writeChar('.', out); + for (int i = 0; i < precision; ++i) + writeChar('0', out); + writeChar(0, out); + } + } + +public: + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + switch (arguments[0].type->getTypeId()) + { + case TypeIndex::UInt8: return executeType(arguments); + case TypeIndex::UInt16: return executeType(arguments); + case TypeIndex::UInt32: return executeType(arguments); + case TypeIndex::UInt64: return executeType(arguments); + case TypeIndex::UInt128: return executeType(arguments); + case TypeIndex::UInt256: return executeType(arguments); + case TypeIndex::Int8: return executeType(arguments); + case TypeIndex::Int16: return executeType(arguments); + case TypeIndex::Int32: return executeType(arguments); + case TypeIndex::Int64: return executeType(arguments); + case TypeIndex::Int128: return executeType(arguments); + case TypeIndex::Int256: return executeType(arguments); + case TypeIndex::Float32: return executeType(arguments); + case TypeIndex::Float64: return executeType(arguments); + case TypeIndex::Decimal32: return executeType(arguments); + case TypeIndex::Decimal64: return executeType(arguments); + case TypeIndex::Decimal128: return executeType(arguments); + case TypeIndex::Decimal256: return executeType(arguments); + default: + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + arguments[0].column->getName(), getName()); + } + } + +private: + template + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const + { + const auto * from_col_const = typeid_cast(arguments[0].column.get()); + const auto * precision_col = checkAndGetColumn>(arguments[1].column.get()); + const auto * precision_col_const = typeid_cast(arguments[1].column.get()); + + auto result_col = ColumnString::create(); + auto * result_col_string = assert_cast(result_col.get()); + ColumnString::Chars & result_chars = result_col_string->getChars(); + ColumnString::Offsets & result_offsets = result_col_string->getOffsets(); + + if constexpr (is_decimal) + { + const auto * from_col = checkAndGetColumn>(arguments[0].column.get()); + UInt8 from_scale = from_col->getScale(); + + if (from_col) + { + if (precision_col_const) + vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets, from_scale); + else + vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, from_scale); + } + else if (from_col_const) + constantVector(from_col_const->template getValue(), precision_col->getData(), result_chars, result_offsets, from_scale); + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName()); + } + else + { + const auto * from_col = checkAndGetColumn>(arguments[0].column.get()); + if (from_col) + { + if (precision_col_const) + vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets); + else + vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets); + } + else if (from_col_const) + constantVector(from_col_const->template getValue(), precision_col->getData(), result_chars, result_offsets); + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName()); + } + + return result_col; + } +}; + +} diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 76778543bd0..aa4c9b17e48 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -905,26 +905,26 @@ inline void writeText(const IPv4 & x, WriteBuffer & buf) { writeIPv4Text(x, buf) inline void writeText(const IPv6 & x, WriteBuffer & buf) { writeIPv6Text(x, buf); } template -void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros) +void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros, + bool fixed_fractional_length, UInt32 fractional_length) { /// If it's big integer, but the number of digits is small, /// use the implementation for smaller integers for more efficient arithmetic. - if constexpr (std::is_same_v) { if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); return; } else if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); return; } else if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); return; } } @@ -932,24 +932,36 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool { if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); return; } else if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); return; } } constexpr size_t max_digits = std::numeric_limits::digits10; assert(scale <= max_digits); + assert(fractional_length <= max_digits); + char buf[max_digits]; - memset(buf, '0', scale); + memset(buf, '0', std::max(scale, fractional_length)); T value = x; Int32 last_nonzero_pos = 0; - for (Int32 pos = scale - 1; pos >= 0; --pos) + + if (fixed_fractional_length && fractional_length < scale) + { + T new_value = value / DecimalUtils::scaleMultiplier(scale - fractional_length - 1); + auto round_carry = new_value % 10; + value = new_value / 10; + if (round_carry >= 5) + value += 1; + } + + for (Int32 pos = fixed_fractional_length ? std::min(scale - 1, fractional_length - 1) : scale - 1; pos >= 0; --pos) { auto remainder = value % 10; value /= 10; @@ -961,11 +973,12 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool } writeChar('.', ostr); - ostr.write(buf, trailing_zeros ? scale : last_nonzero_pos + 1); + ostr.write(buf, fixed_fractional_length ? fractional_length : (trailing_zeros ? scale : last_nonzero_pos + 1)); } template -void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros) +void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros, + bool fixed_fractional_length = false, UInt32 fractional_length = 0) { T part = DecimalUtils::getWholePart(x, scale); @@ -976,7 +989,7 @@ void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer writeIntText(part, ostr); - if (scale) + if (scale || (fixed_fractional_length && fractional_length > 0)) { part = DecimalUtils::getFractionalPart(x, scale); if (part || trailing_zeros) @@ -984,7 +997,7 @@ void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer if (part < 0) part *= T(-1); - writeDecimalFractional(part, scale, ostr, trailing_zeros); + writeDecimalFractional(part, scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); } } } diff --git a/tests/queries/0_stateless/02676_to_decimal_string.reference b/tests/queries/0_stateless/02676_to_decimal_string.reference new file mode 100644 index 00000000000..4c27ee5b528 --- /dev/null +++ b/tests/queries/0_stateless/02676_to_decimal_string.reference @@ -0,0 +1,21 @@ +2.00000000000000000000000000000000000000000000000000000000000000000000000000000 +2.12 +-2.00000000000000000000000000000000000000000000000000000000000000000000000000000 +-2.12 +2.987600000000000033395508580724708735942840576171875000000000 +2.15 +-2.987600000000000033395508580724708735942840576171875000000000 +-2.15 +64.1230010986 +64.2340000000 +-64.1230010986 +-64.2340000000 +-32.345 +32.34500000000000000000000000000000000000000000000000000000000000000000000000000 +32.46 +-64.5671232345 +128.78932312332132985464 +-128.78932312332132985464 +128.78932312332132985464000000000000000000000000000000000000000000000000000000000 +128.7893231233 +-128.78932312332132985464123123789323123321329854600000000000000000000000000000000 diff --git a/tests/queries/0_stateless/02676_to_decimal_string.sql b/tests/queries/0_stateless/02676_to_decimal_string.sql new file mode 100644 index 00000000000..563d60c62c7 --- /dev/null +++ b/tests/queries/0_stateless/02676_to_decimal_string.sql @@ -0,0 +1,35 @@ +-- Regular types +SELECT toDecimalString(2, 77); -- more digits required than exist +SELECT toDecimalString(2.123456, 2); -- rounding +SELECT toDecimalString(-2, 77); -- more digits required than exist +SELECT toDecimalString(-2.123456, 2); -- rounding + +SELECT toDecimalString(2.9876, 60); -- more digits required than exist (took 60 as it is float by default) +SELECT toDecimalString(2.1456, 2); -- rounding +SELECT toDecimalString(-2.9876, 60); -- more digits required than exist +SELECT toDecimalString(-2.1456, 2); -- rounding + +-- Float32 and Float64 tests. No sense to test big float precision -- the result will be a mess anyway. +SELECT toDecimalString(64.123::Float32, 10); +SELECT toDecimalString(64.234::Float64, 10); +SELECT toDecimalString(-64.123::Float32, 10); +SELECT toDecimalString(-64.234::Float64, 10); + +-- Decimals +SELECT toDecimalString(-32.345::Decimal32(3), 3); +SELECT toDecimalString(32.345::Decimal32(3), 77); -- more digits required than exist +SELECT toDecimalString(32.456::Decimal32(3), 2); -- rounding +SELECT toDecimalString('-64.5671232345'::Decimal64(10), 10); +SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 20); +SELECT toDecimalString('-128.78932312332132985464123123'::Decimal128(26), 20); -- rounding +SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 77); -- more digits required than exist +SELECT toDecimalString('128.789323123321329854641231237893231233213298546'::Decimal256(45), 10); -- rounding +SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 77); -- more digits required than exist + +-- Max number of decimal fractional digits is defined as 77 for Int/UInt/Decimal and 60 for Float. +-- These values shall work OK. +SELECT toDecimalString('32.32'::Float32, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} +SELECT toDecimalString('64.64'::Float64, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} +SELECT toDecimalString('88'::UInt8, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} +SELECT toDecimalString('646464'::Int256, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} +SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} From df5ff1383c5c6f7e24cb6933246fc04cf5dfe702 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 24 Jul 2023 14:57:05 +0200 Subject: [PATCH 1946/1997] Fix settings not applied for explain query when format provided (#51859) --- src/Interpreters/InterpreterSetQuery.cpp | 3 +++ ..._explain_settings_not_applied_bug.reference | 11 +++++++++++ .../02798_explain_settings_not_applied_bug.sql | 18 ++++++++++++++++++ 3 files changed, 32 insertions(+) create mode 100644 tests/queries/0_stateless/02798_explain_settings_not_applied_bug.reference create mode 100644 tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp index 6db57a4f950..e9118b747e5 100644 --- a/src/Interpreters/InterpreterSetQuery.cpp +++ b/src/Interpreters/InterpreterSetQuery.cpp @@ -65,6 +65,9 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta } else if (const auto * explain_query = ast->as()) { + if (explain_query->settings_ast) + InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(); + applySettingsFromQuery(explain_query->getExplainedQuery(), context_); } else if (const auto * query_with_output = dynamic_cast(ast.get())) diff --git a/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.reference b/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.reference new file mode 100644 index 00000000000..6fc36a0ba01 --- /dev/null +++ b/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.reference @@ -0,0 +1,11 @@ + explain + + (Expression) + ExpressionTransform + (Aggregating) + FinalizeAggregatedTransform + AggregatingInOrderTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeInOrder 0 → 1 diff --git a/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql b/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql new file mode 100644 index 00000000000..76f2129abfa --- /dev/null +++ b/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql @@ -0,0 +1,18 @@ +SET read_in_order_two_level_merge_threshold=1000000; + +DROP TABLE IF EXISTS t; +CREATE TABLE t(a UInt64) +ENGINE = MergeTree +ORDER BY a; + +INSERT INTO t SELECT * FROM numbers_mt(1e3); +OPTIMIZE TABLE t FINAL; + +EXPLAIN PIPELINE +SELECT a +FROM t +GROUP BY a +FORMAT PrettySpace +SETTINGS optimize_aggregation_in_order = 1; + +DROP TABLE t; From c7239c64ea36a6994cd88d34edc3774243472a68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 24 Jul 2023 15:16:44 +0200 Subject: [PATCH 1947/1997] Remove unused code --- src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 48adf36e678..3eba9a9de24 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -145,9 +145,6 @@ bool IMergeTreeSelectAlgorithm::getNewTask() ChunkAndProgress IMergeTreeSelectAlgorithm::read() { - size_t num_read_rows = 0; - size_t num_read_bytes = 0; - while (!is_cancelled) { try @@ -178,10 +175,6 @@ ChunkAndProgress IMergeTreeSelectAlgorithm::read() ordered_columns.push_back(res.block.getByName(name).column); } - /// Account a progress from previous empty chunks. - res.num_read_rows += num_read_rows; - res.num_read_bytes += num_read_bytes; - return ChunkAndProgress{ .chunk = Chunk(ordered_columns, res.row_count), .num_read_rows = res.num_read_rows, @@ -194,7 +187,7 @@ ChunkAndProgress IMergeTreeSelectAlgorithm::read() } } - return {Chunk(), num_read_rows, num_read_bytes, true}; + return {Chunk(), 0, 0, true}; } void IMergeTreeSelectAlgorithm::initializeMergeTreeReadersForCurrentTask( From c6e6fd761317662c05532d695c20be72f8e847d2 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 24 Jul 2023 15:58:21 +0200 Subject: [PATCH 1948/1997] Shard `OpenedFileCache` to avoid lock contention (#51341) * shard OpenedFileCache to avoid lock contention * Update OpenedFileCache.h * fix build --------- Co-authored-by: Alexey Milovidov --- src/Common/ProfileEvents.cpp | 1 + src/IO/OpenedFileCache.h | 109 +++++++++++++++++++++-------------- 2 files changed, 68 insertions(+), 42 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 4a656e38edf..f18a67fa565 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -45,6 +45,7 @@ M(MMappedFileCacheMisses, "Number of times a file has not been found in the MMap cache (for the 'mmap' read_method), so we had to mmap it again.") \ M(OpenedFileCacheHits, "Number of times a file has been found in the opened file cache, so we didn't have to open it again.") \ M(OpenedFileCacheMisses, "Number of times a file has been found in the opened file cache, so we had to open it again.") \ + M(OpenedFileCacheMicroseconds, "Amount of time spent executing OpenedFileCache methods.") \ M(AIOWrite, "Number of writes with Linux or FreeBSD AIO interface") \ M(AIOWriteBytes, "Number of bytes written with Linux or FreeBSD AIO interface") \ M(AIORead, "Number of reads with Linux or FreeBSD AIO interface") \ diff --git a/src/IO/OpenedFileCache.h b/src/IO/OpenedFileCache.h index 61e502a494b..2cecc675af7 100644 --- a/src/IO/OpenedFileCache.h +++ b/src/IO/OpenedFileCache.h @@ -4,14 +4,18 @@ #include #include -#include #include +#include +#include + +#include namespace ProfileEvents { extern const Event OpenedFileCacheHits; extern const Event OpenedFileCacheMisses; + extern const Event OpenedFileCacheMicroseconds; } namespace DB @@ -26,57 +30,79 @@ namespace DB */ class OpenedFileCache { -private: - using Key = std::pair; + class OpenedFileMap + { + using Key = std::pair; - using OpenedFileWeakPtr = std::weak_ptr; - using Files = std::map; + using OpenedFileWeakPtr = std::weak_ptr; + using Files = std::map; - Files files; - std::mutex mutex; + Files files; + std::mutex mutex; + + public: + using OpenedFilePtr = std::shared_ptr; + + OpenedFilePtr get(const std::string & path, int flags) + { + Key key(path, flags); + + std::lock_guard lock(mutex); + + auto [it, inserted] = files.emplace(key, OpenedFilePtr{}); + if (!inserted) + { + if (auto res = it->second.lock()) + { + ProfileEvents::increment(ProfileEvents::OpenedFileCacheHits); + return res; + } + } + ProfileEvents::increment(ProfileEvents::OpenedFileCacheMisses); + + OpenedFilePtr res + { + new OpenedFile(path, flags), + [key, this](auto ptr) + { + { + std::lock_guard another_lock(mutex); + files.erase(key); + } + delete ptr; + } + }; + + it->second = res; + return res; + } + + void remove(const std::string & path, int flags) + { + Key key(path, flags); + std::lock_guard lock(mutex); + files.erase(key); + } + }; + + static constexpr size_t buckets = 1024; + std::vector impls{buckets}; public: - using OpenedFilePtr = std::shared_ptr; + using OpenedFilePtr = OpenedFileMap::OpenedFilePtr; OpenedFilePtr get(const std::string & path, int flags) { - Key key(path, flags); - - std::lock_guard lock(mutex); - - auto [it, inserted] = files.emplace(key, OpenedFilePtr{}); - if (!inserted) - { - if (auto res = it->second.lock()) - { - ProfileEvents::increment(ProfileEvents::OpenedFileCacheHits); - return res; - } - } - ProfileEvents::increment(ProfileEvents::OpenedFileCacheMisses); - - OpenedFilePtr res - { - new OpenedFile(path, flags), - [key, this](auto ptr) - { - { - std::lock_guard another_lock(mutex); - files.erase(key); - } - delete ptr; - } - }; - - it->second = res; - return res; + ProfileEventTimeIncrement watch(ProfileEvents::OpenedFileCacheMicroseconds); + const auto bucket = CityHash_v1_0_2::CityHash64(path.data(), path.length()) % buckets; + return impls[bucket].get(path, flags); } void remove(const std::string & path, int flags) { - Key key(path, flags); - std::lock_guard lock(mutex); - files.erase(key); + ProfileEventTimeIncrement watch(ProfileEvents::OpenedFileCacheMicroseconds); + const auto bucket = CityHash_v1_0_2::CityHash64(path.data(), path.length()) % buckets; + impls[bucket].remove(path, flags); } static OpenedFileCache & instance() @@ -87,5 +113,4 @@ public: }; using OpenedFileCachePtr = std::shared_ptr; - } From d2d100b68a4fc1765708a276b217faf403722fb4 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 24 Jul 2023 17:05:57 +0200 Subject: [PATCH 1949/1997] Cancel execution in PipelineExecutor in case of exception in graph->updateNode --- src/Processors/Executors/PipelineExecutor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index f523e7b7cf9..1508d834592 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -272,7 +272,7 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie /// Prepare processor after execution. if (!graph->updateNode(context.getProcessorID(), queue, async_queue)) - finish(); + cancel(); /// Push other tasks to global queue. tasks.pushTasks(queue, async_queue, context); From f067f8c46d2aec217c3f835441ca1a2a281c72fd Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 24 Jul 2023 15:37:16 +0000 Subject: [PATCH 1950/1997] Make 01951_distributed_push_down_limit analyzer agnostic --- tests/analyzer_tech_debt.txt | 1 - ...1951_distributed_push_down_limit.reference | 32 +++++++++---------- .../01951_distributed_push_down_limit.sql | 4 +-- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index b746d1610a4..1d56b2c3a71 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -72,7 +72,6 @@ 01925_test_storage_merge_aliases 01930_optimize_skip_unused_shards_rewrite_in 01947_mv_subquery -01951_distributed_push_down_limit 01952_optimize_distributed_group_by_sharding_key 02000_join_on_const 02001_shard_num_shard_count diff --git a/tests/queries/0_stateless/01951_distributed_push_down_limit.reference b/tests/queries/0_stateless/01951_distributed_push_down_limit.reference index b9a7d17e955..d175d31846b 100644 --- a/tests/queries/0_stateless/01951_distributed_push_down_limit.reference +++ b/tests/queries/0_stateless/01951_distributed_push_down_limit.reference @@ -1,19 +1,19 @@ -- { echo } -explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0; -Expression (Projection) - Limit (preliminary LIMIT (without OFFSET)) - Sorting (Merge sorted streams after aggregation stage for ORDER BY) +explain description=0 select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0; +Expression + Limit + Sorting Union - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY))))) - ReadFromStorage (SystemNumbers) - ReadFromRemote (Read from remote replica) -explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1; -Expression (Projection) - Limit (preliminary LIMIT (without OFFSET)) - Sorting (Merge sorted streams after aggregation stage for ORDER BY) + Sorting + Expression + ReadFromStorage + ReadFromRemote +explain description=0 select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1; +Expression + Limit + Sorting Union - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY))))) - ReadFromStorage (SystemNumbers) - ReadFromRemote (Read from remote replica) + Sorting + Expression + ReadFromStorage + ReadFromRemote diff --git a/tests/queries/0_stateless/01951_distributed_push_down_limit.sql b/tests/queries/0_stateless/01951_distributed_push_down_limit.sql index 184e6321988..aee714a494e 100644 --- a/tests/queries/0_stateless/01951_distributed_push_down_limit.sql +++ b/tests/queries/0_stateless/01951_distributed_push_down_limit.sql @@ -3,5 +3,5 @@ set prefer_localhost_replica = 1; -- { echo } -explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0; -explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1; +explain description=0 select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0; +explain description=0 select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1; From 0bbf26549f4fb49c599b4a58475c71bccfe9b37b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 18:13:15 +0200 Subject: [PATCH 1951/1997] Fix test --- tests/integration/test_drop_is_lock_free/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_drop_is_lock_free/test.py b/tests/integration/test_drop_is_lock_free/test.py index 8d92d784226..9f595800bea 100644 --- a/tests/integration/test_drop_is_lock_free/test.py +++ b/tests/integration/test_drop_is_lock_free/test.py @@ -104,7 +104,7 @@ def test_query_is_lock_free(lock_free_query, exclusive_table): select_handler = node.get_query_request( f""" - SELECT sleepEachRow(3) FROM {exclusive_table}; + SELECT sleepEachRow(3) FROM {exclusive_table} SETTINGS function_sleep_max_microseconds_per_block = 0; """, query_id=query_id, ) @@ -173,7 +173,7 @@ def test_query_is_permanent(transaction, permanent, exclusive_table): select_handler = node.get_query_request( f""" - SELECT sleepEachRow(3) FROM {exclusive_table}; + SELECT sleepEachRow(3) FROM {exclusive_table} SETTINGS function_sleep_max_microseconds_per_block = 0; """, query_id=query_id, ) From 032956dd1eeca994d6fa5a66f974cfa10203c205 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 24 Jul 2023 18:42:02 +0200 Subject: [PATCH 1952/1997] fix --- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +- .../test.py | 27 ++++++++++--------- .../01111_create_drop_replicated_db_stress.sh | 2 +- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 2393f45ebb6..e11913fc3d2 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -576,7 +576,7 @@ int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper /// It's ok if replica became readonly due to connection loss after we got current zookeeper (in this case zookeeper must be expired). /// And it's ok if replica became readonly after shutdown. /// In other cases it's likely that someone called pullLogsToQueue(...) when queue is not initialized yet by RestartingThread. - bool not_completely_initialized = storage.is_readonly && !zookeeper->expired() && !storage.shutdown_called; + bool not_completely_initialized = storage.is_readonly && !zookeeper->expired() && !storage.shutdown_prepared_called; if (not_completely_initialized) throw Exception(ErrorCodes::LOGICAL_ERROR, "Tried to pull logs to queue (reason: {}) on readonly replica {}, it's a bug", reason, storage.getStorageID().getNameForLogs()); diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py index a2a4ec92cf7..20b6a6c977f 100644 --- a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py @@ -49,25 +49,28 @@ def test_shutdown_and_wait(start_cluster): node.stop_clickhouse(kill=False, stop_wait_sec=60) p = Pool(50) - pm = PartitionManager() - - pm.partition_instances(node1, node2) def insert(value): node1.query(f"INSERT INTO test_table VALUES ({value})") - p.map(insert, range(1, 50)) + with PartitionManager() as pm: + pm.partition_instances(node1, node2) + # iptables rules must be applied immediately, but looks like sometimes they are not... + time.sleep(3) - # Start shutdown async - waiter = p.apply_async(soft_shutdown, (node1,)) - # to be sure that shutdown started - time.sleep(5) + p.map(insert, range(1, 50)) - # node 2 partitioned and don't see any data - assert node2.query("SELECT * FROM test_table") == "0\n" + # Start shutdown async + waiter = p.apply_async(soft_shutdown, (node1,)) + # to be sure that shutdown started + time.sleep(5) + + # node 2 partitioned and don't see any data + assert node2.query("SELECT * FROM test_table") == "0\n" + + # Restore network + pm.heal_all() - # Restore network - pm.heal_all() # wait for shutdown to finish waiter.get() diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh index cc63af3676b..59899e1c14a 100755 --- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh +++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh @@ -56,7 +56,7 @@ function create_table() if [ -z "$database" ]; then continue; fi $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=0 -q \ "create table $database.rmt_${RANDOM}_${RANDOM}_${RANDOM} (n int) engine=ReplicatedMergeTree order by tuple() -- suppress $CLICKHOUSE_TEST_ZOOKEEPER_PREFIX" \ - 2>&1| grep -Fa "Exception: " | grep -Fv "Macro 'uuid' and empty arguments" | grep -Fv "Cannot enqueue query" | grep -Fv "ZooKeeper session expired" | grep -Fv UNKNOWN_DATABASE + 2>&1| grep -Fa "Exception: " | grep -Fv "Macro 'uuid' and empty arguments" | grep -Fv "Cannot enqueue query" | grep -Fv "ZooKeeper session expired" | grep -Fv UNKNOWN_DATABASE | grep -Fv TABLE_IS_DROPPED sleep 0.$RANDOM done } From 22a2fa097f3795cb2a483e899482b97f80aa8189 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 19:40:02 +0200 Subject: [PATCH 1953/1997] Improve error messages --- src/Functions/GregorianDate.cpp | 2 +- src/Functions/parseDateTime.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Functions/GregorianDate.cpp b/src/Functions/GregorianDate.cpp index aaaeeb7339d..f28194781c2 100644 --- a/src/Functions/GregorianDate.cpp +++ b/src/Functions/GregorianDate.cpp @@ -125,7 +125,7 @@ void GregorianDate::init(ReadBuffer & in) assertEOF(in); if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year(year_), month_)) - throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date"); + throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date, out of range (year: {}, month: {}, day_of_month: {})."); } bool GregorianDate::tryInit(ReadBuffer & in) diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index c3fbc08c4a9..2381def9151 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -398,7 +398,7 @@ namespace static Int32 daysSinceEpochFromDayOfYear(Int32 year_, Int32 day_of_year_) { if (!isDayOfYearValid(year_, day_of_year_)) - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid day of year, year:{} day of year:{}", year_, day_of_year_); + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid day of year, out of range (year: {} day of year: {})", year_, day_of_year_); Int32 res = daysSinceEpochFromDate(year_, 1, 1); res += day_of_year_ - 1; @@ -408,7 +408,7 @@ namespace static Int32 daysSinceEpochFromDate(Int32 year_, Int32 month_, Int32 day_) { if (!isDateValid(year_, month_, day_)) - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid date, year:{} month:{} day:{}", year_, month_, day_); + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid date, out of range (year: {} month: {} day_of_month: {})", year_, month_, day_); Int32 res = cumulativeYearDays[year_ - 1970]; res += isLeapYear(year_) ? cumulativeLeapDays[month_ - 1] : cumulativeDays[month_ - 1]; From 654af41431423907fdffed93287e9160f78698b9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 19:45:55 +0200 Subject: [PATCH 1954/1997] Fix race --- src/Functions/transform.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index 79168d82c54..e03701327b1 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -658,13 +658,13 @@ namespace std::unique_ptr table_string_to_idx; std::unique_ptr table_anything_to_idx; - bool is_empty = false; - ColumnPtr from_column; ColumnPtr to_column; ColumnPtr default_column; - std::atomic initialized{false}; + bool is_empty = false; + bool initialized = false; + std::mutex mutex; }; @@ -697,13 +697,12 @@ namespace /// Can be called from different threads. It works only on the first call. void initialize(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const { + std::lock_guard lock(cache.mutex); if (cache.initialized) return; const DataTypePtr & from_type = arguments[0].type; - std::lock_guard lock(cache.mutex); - if (from_type->onlyNull()) { cache.is_empty = true; From c35da36ff2b78dff5b964774673b8c713aa22e95 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 24 Jul 2023 19:50:53 +0200 Subject: [PATCH 1955/1997] Fix default value --- base/poco/Foundation/include/Poco/URI.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/poco/Foundation/include/Poco/URI.h b/base/poco/Foundation/include/Poco/URI.h index f4505147ced..eba8109253d 100644 --- a/base/poco/Foundation/include/Poco/URI.h +++ b/base/poco/Foundation/include/Poco/URI.h @@ -57,7 +57,7 @@ public: URI(); /// Creates an empty URI. - explicit URI(const std::string & uri, bool disable_url_encoding = true); + explicit URI(const std::string & uri, bool disable_url_encoding = false); /// Parses an URI from the given string. Throws a /// SyntaxException if the uri is not valid. @@ -362,7 +362,7 @@ private: std::string _query; std::string _fragment; - bool _disable_url_encoding = true; + bool _disable_url_encoding = false; }; From 2f99363db0356f146db427934b63e9158b7b9858 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 24 Jul 2023 20:51:53 +0300 Subject: [PATCH 1956/1997] Update 02136_scalar_subquery_metrics.sql --- tests/queries/0_stateless/02136_scalar_subquery_metrics.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02136_scalar_subquery_metrics.sql b/tests/queries/0_stateless/02136_scalar_subquery_metrics.sql index 180610288aa..17ff367a58d 100644 --- a/tests/queries/0_stateless/02136_scalar_subquery_metrics.sql +++ b/tests/queries/0_stateless/02136_scalar_subquery_metrics.sql @@ -6,7 +6,7 @@ SELECT '#02136_scalar_subquery_4', (SELECT max(number) FROM numbers(1000)) as n SYSTEM FLUSH LOGS; SELECT read_rows, query FROM system.query_log WHERE - event_date > yesterday() + event_date >= yesterday() AND type = 'QueryFinish' AND current_database == currentDatabase() AND query LIKE 'SELECT ''#02136_scalar_subquery_%' From ab086f15d09048deb30bef84d5d3e7e62fefd898 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Sat, 3 Jun 2023 21:09:25 +0200 Subject: [PATCH 1957/1997] try to push down more --- src/Processors/QueryPlan/JoinStep.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp index 2ff8f161e99..33fa7955e0d 100644 --- a/src/Processors/QueryPlan/JoinStep.cpp +++ b/src/Processors/QueryPlan/JoinStep.cpp @@ -54,7 +54,7 @@ QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines bool JoinStep::allowPushDownToRight() const { - return join->pipelineType() == JoinPipelineType::YShaped; + return join->pipelineType() == JoinPipelineType::YShaped || join->pipelineType() == JoinPipelineType::FillRightFirst; } void JoinStep::describePipeline(FormatSettings & settings) const From b2acbe42b722f83c0ffde1c8697e5f19bb14747f Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Sat, 3 Jun 2023 21:15:19 +0200 Subject: [PATCH 1958/1997] add perf test --- tests/performance/join_filter_pushdown.xml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 tests/performance/join_filter_pushdown.xml diff --git a/tests/performance/join_filter_pushdown.xml b/tests/performance/join_filter_pushdown.xml new file mode 100644 index 00000000000..3adbbb3029e --- /dev/null +++ b/tests/performance/join_filter_pushdown.xml @@ -0,0 +1,9 @@ + + create table t(a UInt64) engine=MergeTree order by tuple() + insert into t select * from numbers_mt(5e6) + + select * from t as t0 inner join t as t1 using(a) where t1.a = 100 + + drop table t + + From d0894532feff599d1e73acca1a9010a53a26b004 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 26 Jun 2023 21:17:34 +0200 Subject: [PATCH 1959/1997] fix --- src/Processors/QueryPlan/Optimizations/filterPushDown.cpp | 4 ++++ .../queries/0_stateless/02514_analyzer_drop_join_on.reference | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 4336de41b7b..af47b6ff4cd 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -341,6 +341,10 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (table_join.kind() != JoinKind::Inner && table_join.kind() != JoinKind::Cross && table_join.kind() != kind) return 0; + /// There is no ASOF Right join, so we're talking about pushing to the right side + if (kind == JoinKind::Right && table_join.strictness() == JoinStrictness::Asof) + return 0; + bool is_left = kind == JoinKind::Left; const auto & input_header = is_left ? child->getInputStreams().front().header : child->getInputStreams().back().header; const auto & res_header = child->getOutputStream().header; diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference index 0037ab85c07..1b177b84afa 100644 --- a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference +++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference @@ -107,7 +107,7 @@ Header: bx String bx_0 String c2_5 String c1_3 UInt64 - Filter (( + (JOIN actions + DROP unused columns after JOIN))) + Expression Header: a2_6 String bx_0 String c2_5 String @@ -139,7 +139,7 @@ Header: bx String ReadFromMemoryStorage Header: b1 UInt64 b2 String - Expression ((JOIN actions + Change column names to column identifiers)) + Filter (( + (JOIN actions + Change column names to column identifiers))) Header: c1_3 UInt64 c2_5 String ReadFromMemoryStorage From 104d3bbbae82309d7d55d3a46a28e6f791791fba Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 24 Jul 2023 21:10:00 +0200 Subject: [PATCH 1960/1997] add test --- .../0_stateless/01763_filter_push_down_bugs.sql | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/queries/0_stateless/01763_filter_push_down_bugs.sql b/tests/queries/0_stateless/01763_filter_push_down_bugs.sql index 9a5ef4727c5..8470b4a3379 100644 --- a/tests/queries/0_stateless/01763_filter_push_down_bugs.sql +++ b/tests/queries/0_stateless/01763_filter_push_down_bugs.sql @@ -66,3 +66,17 @@ EXPLAIN indexes=1 SELECT id, delete_time FROM t1 DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; + +-- expected to get row (1, 3, 1, 4) from JOIN and empty result from the query +SELECT * +FROM +( + SELECT * + FROM Values('id UInt64, t UInt64', (1, 3)) +) AS t1 +ASOF INNER JOIN +( + SELECT * + FROM Values('id UInt64, t UInt64', (1, 1), (1, 2), (1, 3), (1, 4), (1, 5)) +) AS t2 ON (t1.id = t2.id) AND (t1.t < t2.t) +WHERE t2.t != 4; From edc479bbf8c72a7076b092dd880fc7d8d2252e4d Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 24 Jul 2023 23:40:27 +0200 Subject: [PATCH 1961/1997] fix --- src/Storages/StorageReplicatedMergeTree.cpp | 4 +++- src/Storages/StorageReplicatedMergeTree.h | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 6cdcffab50a..e6431927805 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4861,6 +4861,7 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) LOG_TRACE(log, "Waiting for RestartingThread to startup table"); } + std::lock_guard lock{flush_and_shutdown_mutex}; if (shutdown_prepared_called.load() || shutdown_called.load()) throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Cannot startup table because it is dropped"); @@ -4906,6 +4907,7 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) void StorageReplicatedMergeTree::flushAndPrepareForShutdown() { + std::lock_guard lock{flush_and_shutdown_mutex}; if (shutdown_prepared_called.exchange(true)) return; @@ -4922,7 +4924,7 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() attach_thread->shutdown(); restarting_thread.shutdown(/* part_of_full_shutdown */true); - /// Explicetly set the event, because the restarting thread will not set it again + /// Explicitly set the event, because the restarting thread will not set it again startup_event.set(); shutdown_deadline.emplace(std::chrono::system_clock::now() + std::chrono::milliseconds(settings_ptr->wait_for_unique_parts_send_before_shutdown_ms.totalMilliseconds())); } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 1c721e3724b..daa39536fa7 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -481,6 +481,9 @@ private: std::atomic shutdown_prepared_called {false}; std::optional shutdown_deadline; + /// We call flushAndPrepareForShutdown before acquiring DDLGuard, so we can shutdown a table that is being created right now + mutable std::mutex flush_and_shutdown_mutex; + mutable std::mutex last_sent_parts_mutex; std::condition_variable last_sent_parts_cv; From 21382afa2b2c686cde3ac0702b548d872373d3b1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 25 Jul 2023 06:10:04 +0200 Subject: [PATCH 1962/1997] Check for punctuation --- .../AggregateFunctionSimpleLinearRegression.cpp | 2 +- src/Common/ConcurrentBoundedQueue.h | 2 +- src/Common/parseRemoteDescription.cpp | 16 ++-------------- src/Common/parseRemoteDescription.h | 2 +- src/Common/tests/gtest_sensitive_data_masker.cpp | 6 +++--- src/Coordination/ZooKeeperDataReader.cpp | 2 +- src/Core/tests/gtest_settings.cpp | 3 +-- src/DataTypes/NumberTraits.h | 2 +- .../getDictionaryConfigurationFromAST.cpp | 2 +- src/Functions/FunctionsStringHash.cpp | 5 ++--- src/Functions/GatherUtils/sliceHasImplAnyAll.h | 4 ++-- src/IO/S3/PocoHTTPClient.cpp | 2 +- src/Interpreters/Aggregator.h | 2 +- src/Interpreters/DDLWorker.cpp | 2 +- src/Interpreters/InterpreterRenameQuery.cpp | 2 +- src/Interpreters/TransactionLog.cpp | 2 +- src/Parsers/Kusto/ParserKQLOperators.h | 16 ++++++++-------- src/Parsers/tests/gtest_Parser.cpp | 4 ++-- .../Formats/Impl/ArrowFieldIndexUtil.h | 2 +- .../Formats/Impl/JSONEachRowRowInputFormat.cpp | 4 ++-- .../QueryPlan/IntersectOrExceptStep.cpp | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 2 +- src/Server/HTTPHandler.cpp | 2 +- src/Storages/StorageFile.cpp | 4 ++-- src/Storages/StorageProxy.h | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- src/TableFunctions/TableFunctionFactory.cpp | 2 +- utils/check-style/check-style | 3 +++ 28 files changed, 45 insertions(+), 56 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp index 1ed6c83af7d..1489db55857 100644 --- a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp +++ b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp @@ -66,7 +66,7 @@ AggregateFunctionPtr createAggregateFunctionSimpleLinearRegression( #undef FOR_LEASTSQR_TYPES #undef DISPATCH - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT , + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal types ({}, {}) of arguments of aggregate function {}, must " "be Native Ints, Native UInts or Floats", x_arg->getName(), y_arg->getName(), name); } diff --git a/src/Common/ConcurrentBoundedQueue.h b/src/Common/ConcurrentBoundedQueue.h index 01910c4caff..922607da813 100644 --- a/src/Common/ConcurrentBoundedQueue.h +++ b/src/Common/ConcurrentBoundedQueue.h @@ -110,7 +110,7 @@ public: /// Returns false if queue is finished [[nodiscard]] bool pushFront(const T & x) { - return emplaceImpl(/* timeout_milliseconds= */ std::nullopt , x); + return emplaceImpl(/* timeout_milliseconds= */ std::nullopt, x); } /// Returns false if queue is finished diff --git a/src/Common/parseRemoteDescription.cpp b/src/Common/parseRemoteDescription.cpp index 0bcd62d30c7..8ea3f4a0aa5 100644 --- a/src/Common/parseRemoteDescription.cpp +++ b/src/Common/parseRemoteDescription.cpp @@ -52,20 +52,8 @@ static bool parseNumber(const String & description, size_t l, size_t r, size_t & } -/* Parse a string that generates shards and replicas. Separator - one of two characters | or , - * depending on whether shards or replicas are generated. - * For example: - * host1,host2,... - generates set of shards from host1, host2, ... - * host1|host2|... - generates set of replicas from host1, host2, ... - * abc{8..10}def - generates set of shards abc8def, abc9def, abc10def. - * abc{08..10}def - generates set of shards abc08def, abc09def, abc10def. - * abc{x,yy,z}def - generates set of shards abcxdef, abcyydef, abczdef. - * abc{x|yy|z} def - generates set of replicas abcxdef, abcyydef, abczdef. - * abc{1..9}de{f,g,h} - is a direct product, 27 shards. - * abc{1..9}de{0|1} - is a direct product, 9 shards, in each 2 replicas. - */ -std::vector -parseRemoteDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses, const String & func_name) +std::vector parseRemoteDescription( + const String & description, size_t l, size_t r, char separator, size_t max_addresses, const String & func_name) { std::vector res; std::vector cur; diff --git a/src/Common/parseRemoteDescription.h b/src/Common/parseRemoteDescription.h index e3e4a3f523c..d97558c4728 100644 --- a/src/Common/parseRemoteDescription.h +++ b/src/Common/parseRemoteDescription.h @@ -3,7 +3,7 @@ #include namespace DB { -/* Parse a string that generates shards and replicas. Separator - one of two characters | or , +/* Parse a string that generates shards and replicas. Separator - one of two characters '|' or ',' * depending on whether shards or replicas are generated. * For example: * host1,host2,... - generates set of shards from host1, host2, ... diff --git a/src/Common/tests/gtest_sensitive_data_masker.cpp b/src/Common/tests/gtest_sensitive_data_masker.cpp index 92c4edbac2a..f36c4154684 100644 --- a/src/Common/tests/gtest_sensitive_data_masker.cpp +++ b/src/Common/tests/gtest_sensitive_data_masker.cpp @@ -27,7 +27,7 @@ TEST(Common, SensitiveDataMasker) { Poco::AutoPtr empty_xml_config = new Poco::Util::XMLConfiguration(); - DB::SensitiveDataMasker masker(*empty_xml_config , ""); + DB::SensitiveDataMasker masker(*empty_xml_config, ""); masker.addMaskingRule("all a letters", "a+", "--a--"); masker.addMaskingRule("all b letters", "b+", "--b--"); masker.addMaskingRule("all d letters", "d+", "--d--"); @@ -45,7 +45,7 @@ TEST(Common, SensitiveDataMasker) masker.printStats(); #endif - DB::SensitiveDataMasker masker2(*empty_xml_config , ""); + DB::SensitiveDataMasker masker2(*empty_xml_config, ""); masker2.addMaskingRule("hide root password", "qwerty123", "******"); masker2.addMaskingRule("hide SSN", "[0-9]{3}-[0-9]{2}-[0-9]{4}", "000-00-0000"); masker2.addMaskingRule("hide email", "[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,4}", "hidden@hidden.test"); @@ -58,7 +58,7 @@ TEST(Common, SensitiveDataMasker) "SELECT id FROM mysql('localhost:3308', 'database', 'table', 'root', '******') WHERE " "ssn='000-00-0000' or email='hidden@hidden.test'"); - DB::SensitiveDataMasker maskerbad(*empty_xml_config , ""); + DB::SensitiveDataMasker maskerbad(*empty_xml_config, ""); // gtest has not good way to check exception content, so just do it manually (see https://github.com/google/googletest/issues/952 ) try diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index 94fc07bcc4a..79929c4e66e 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -40,7 +40,7 @@ void deserializeSnapshotMagic(ReadBuffer & in) Coordination::read(dbid, in); static constexpr int32_t SNP_HEADER = 1514885966; /// "ZKSN" if (magic_header != SNP_HEADER) - throw Exception(ErrorCodes::CORRUPTED_DATA ,"Incorrect magic header in file, expected {}, got {}", SNP_HEADER, magic_header); + throw Exception(ErrorCodes::CORRUPTED_DATA, "Incorrect magic header in file, expected {}, got {}", SNP_HEADER, magic_header); } int64_t deserializeSessionAndTimeout(KeeperStorage & storage, ReadBuffer & in) diff --git a/src/Core/tests/gtest_settings.cpp b/src/Core/tests/gtest_settings.cpp index cbeb84ef2e7..a6d8763bfb8 100644 --- a/src/Core/tests/gtest_settings.cpp +++ b/src/Core/tests/gtest_settings.cpp @@ -121,7 +121,7 @@ GTEST_TEST(SettingMySQLDataTypesSupport, SetString) ASSERT_EQ(Field("decimal,datetime64"), setting); // comma with spaces - setting = " datetime64 , decimal "; + setting = " datetime64 , decimal "; /// bad punctuation is ok here ASSERT_TRUE(setting.changed); ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL)); ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64)); @@ -166,4 +166,3 @@ GTEST_TEST(SettingMySQLDataTypesSupport, SetInvalidString) ASSERT_TRUE(setting.changed); ASSERT_EQ(0, setting.value.getValue()); } - diff --git a/src/DataTypes/NumberTraits.h b/src/DataTypes/NumberTraits.h index 6b068b0d8b1..cf283d3358c 100644 --- a/src/DataTypes/NumberTraits.h +++ b/src/DataTypes/NumberTraits.h @@ -174,7 +174,7 @@ template struct ResultOfBitNot * Float, [U]Int -> Float * Decimal, Decimal -> Decimal * UUID, UUID -> UUID - * UInt64 , Int -> Error + * UInt64, Int -> Error * Float, [U]Int64 -> Error */ template diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 0b7352e9cbb..b12ffc555d4 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -322,7 +322,7 @@ void buildSingleAttribute( /** Transforms - * PRIMARY KEY Attr1 ,..., AttrN + * PRIMARY KEY Attr1, ..., AttrN * to the next configuration * Attr1 * or diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp index d6873d9490e..ff8ff2d2651 100644 --- a/src/Functions/FunctionsStringHash.cpp +++ b/src/Functions/FunctionsStringHash.cpp @@ -292,8 +292,8 @@ struct SimHashImpl continue; // we need to store the new word hash value to the oldest location. - // for example, N = 5, array |a0|a1|a2|a3|a4|, now , a0 is the oldest location, - // so we need to store new word hash into location of a0, then ,this array become + // for example, N = 5, array |a0|a1|a2|a3|a4|, now, a0 is the oldest location, + // so we need to store new word hash into location of a0, then this array become // |a5|a1|a2|a3|a4|, next time, a1 become the oldest location, we need to store new // word hash value into location of a1, then array become |a5|a6|a2|a3|a4| words[offset] = BytesRef{word_start, length}; @@ -793,4 +793,3 @@ REGISTER_FUNCTION(StringHash) factory.registerFunction(); } } - diff --git a/src/Functions/GatherUtils/sliceHasImplAnyAll.h b/src/Functions/GatherUtils/sliceHasImplAnyAll.h index 21c80b742fd..99bf1a7cc33 100644 --- a/src/Functions/GatherUtils/sliceHasImplAnyAll.h +++ b/src/Functions/GatherUtils/sliceHasImplAnyAll.h @@ -375,14 +375,14 @@ bool sliceHasImplAnyAllImplInt16( _mm256_or_si256( _mm256_andnot_si256( _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)), - _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data ,first_data, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)))), + _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)))), _mm256_andnot_si256( _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6)), _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6))))), _mm256_or_si256( _mm256_andnot_si256( _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)), - _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data ,first_data ,1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)))), + _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)))), _mm256_andnot_si256( _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2)), _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2)))))) diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 1a367a8199d..fd825720ac9 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -258,7 +258,7 @@ void PocoHTTPClient::addMetric(const Aws::Http::HttpRequest & request, S3MetricT void PocoHTTPClient::makeRequestInternal( Aws::Http::HttpRequest & request, std::shared_ptr & response, - Aws::Utils::RateLimits::RateLimiterInterface * readLimiter , + Aws::Utils::RateLimits::RateLimiterInterface * readLimiter, Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const { /// Most sessions in pool are already connected and it is not possible to set proxy host/port to a connected session. diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 05b34e8460f..29096a38be6 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -292,7 +292,7 @@ struct AggregationMethodStringNoCache { } - using State = ColumnsHashing::HashMethodString; + using State = ColumnsHashing::HashMethodString; static const bool low_cardinality_optimization = false; static const bool one_key_nullable_optimization = nullable; diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 193bb5b6ab0..92e6bcb326c 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -551,7 +551,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) chassert(!task.completely_processed); /// Setup tracing context on current thread for current DDL - OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__ , + OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__, task.entry.tracing_context, this->context->getOpenTelemetrySpanLog()); tracing_ctx_holder.root_span.kind = OpenTelemetry::CONSUMER; diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index 75d43b541e1..ae79b3f932e 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -193,7 +193,7 @@ AccessRightsElements InterpreterRenameQuery::getRequiredAccess(InterpreterRename required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT, elem.to.getDatabase(), elem.to.getTable()); if (rename.exchange) { - required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT , elem.from.getDatabase(), elem.from.getTable()); + required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT, elem.from.getDatabase(), elem.from.getTable()); required_access.emplace_back(AccessType::SELECT | AccessType::DROP_TABLE, elem.to.getDatabase(), elem.to.getTable()); } } diff --git a/src/Interpreters/TransactionLog.cpp b/src/Interpreters/TransactionLog.cpp index 6257e617d4a..2ef4f4d6218 100644 --- a/src/Interpreters/TransactionLog.cpp +++ b/src/Interpreters/TransactionLog.cpp @@ -482,7 +482,7 @@ CSN TransactionLog::finalizeCommittedTransaction(MergeTreeTransaction * txn, CSN bool removed = running_list.erase(txn->tid.getHash()); if (!removed) { - LOG_ERROR(log , "I's a bug: TID {} {} doesn't exist", txn->tid.getHash(), txn->tid); + LOG_ERROR(log, "It's a bug: TID {} {} doesn't exist", txn->tid.getHash(), txn->tid); abort(); } } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 9796ae10c07..72e25cc3cf9 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -31,10 +31,10 @@ protected: not_endswith, endswith_cs, not_endswith_cs, - equal, //=~ - not_equal,//!~ - equal_cs, //= - not_equal_cs,//!= + equal, /// =~ + not_equal, /// !~ + equal_cs, /// = + not_equal_cs, /// != has, not_has, has_all, @@ -49,10 +49,10 @@ protected: not_hassuffix, hassuffix_cs, not_hassuffix_cs, - in_cs, //in - not_in_cs, //!in - in, //in~ - not_in ,//!in~ + in_cs, /// in + not_in_cs, /// !in + in, /// in~ + not_in, /// !in~ matches_regex, startswith, not_startswith, diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index d77ae8d3a27..18e91c533e0 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -359,11 +359,11 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "SELECT *\nFROM Customers\nORDER BY LastName DESC" }, { - "Customers | order by Age desc , FirstName asc ", + "Customers | order by Age desc, FirstName asc ", "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName ASC" }, { - "Customers | order by Age asc , FirstName desc", + "Customers | order by Age asc, FirstName desc", "SELECT *\nFROM Customers\nORDER BY\n Age ASC,\n FirstName DESC" }, { diff --git a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h index b7adaa35335..676ce50d04f 100644 --- a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h +++ b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h @@ -35,7 +35,7 @@ public: /// - key: field name with full path. eg. a struct field's name is like a.x.i /// - value: a pair, first value refers to this field's start index, second value refers to how many /// indices this field take. eg. - /// For a parquet schema {x: int , y: {i: int, j: int}}, the return will be + /// For a parquet schema {x: int, y: {i: int, j: int}}, the return will be /// - x: (0, 1) /// - y: (1, 2) /// - y.i: (1, 1) diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index e5f52936021..b1b08cdf256 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -236,10 +236,10 @@ bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi bool JSONEachRowRowInputFormat::checkEndOfData(bool is_first_row) { - /// We consume , or \n before scanning a new row, instead scanning to next row at the end. + /// We consume ',' or '\n' before scanning a new row, instead scanning to next row at the end. /// The reason is that if we want an exact number of rows read with LIMIT x /// from a streaming table engine with text data format, like File or Kafka - /// then seeking to next ;, or \n would trigger reading of an extra row at the end. + /// then seeking to next ';,' or '\n' would trigger reading of an extra row at the end. /// Semicolon is added for convenience as it could be used at end of INSERT query. if (!in->eof()) diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp index afdff44020f..b132d27670d 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp @@ -30,7 +30,7 @@ static Block checkHeaders(const DataStreams & input_streams_) } IntersectOrExceptStep::IntersectOrExceptStep( - DataStreams input_streams_ , Operator operator_ , size_t max_threads_) + DataStreams input_streams_, Operator operator_, size_t max_threads_) : header(checkHeaders(input_streams_)) , current_operator(operator_) , max_threads(max_threads_) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 7f7f9058f1b..1b20778877d 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -72,7 +72,7 @@ struct ViewsData std::atomic_bool has_exception = false; std::exception_ptr first_exception; - ViewsData(ThreadStatusesHolderPtr thread_status_holder_, ContextPtr context_, StorageID source_storage_id_, StorageMetadataPtr source_metadata_snapshot_ , StoragePtr source_storage_) + ViewsData(ThreadStatusesHolderPtr thread_status_holder_, ContextPtr context_, StorageID source_storage_id_, StorageMetadataPtr source_metadata_snapshot_, StoragePtr source_storage_) : thread_status_holder(std::move(thread_status_holder_)) , context(std::move(context_)) , source_storage_id(std::move(source_storage_id_)) diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 069670c84a5..29b75fa6552 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -638,7 +638,7 @@ void HTTPHandler::processQuery( throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected MemoryWriteBuffer"); auto rdbuf = prev_memory_buffer->tryGetReadBuffer(); - copyData(*rdbuf , *next_buffer); + copyData(*rdbuf, *next_buffer); return next_buffer; }; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index cbd32460f7e..3126d584964 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -242,8 +242,8 @@ void listFilesWithRegexpMatchingImpl( { if (recursive) { - listFilesWithRegexpMatchingImpl(fs::path(full_path).append(it->path().string()) / "" , - looking_for_directory ? suffix_with_globs.substr(next_slash_after_glob_pos) : current_glob , + listFilesWithRegexpMatchingImpl(fs::path(full_path).append(it->path().string()) / "", + looking_for_directory ? suffix_with_globs.substr(next_slash_after_glob_pos) : current_glob, total_bytes_to_read, result, recursive); } else if (looking_for_directory && re2::RE2::FullMatch(file_name, matcher)) diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 14b7fc15af2..582dc6f882d 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -149,7 +149,7 @@ public: return getNested()->mayBenefitFromIndexForIn(left_in_operand, query_context, metadata_snapshot); } - CheckResults checkData(const ASTPtr & query , ContextPtr context) override { return getNested()->checkData(query, context); } + CheckResults checkData(const ASTPtr & query, ContextPtr context) override { return getNested()->checkData(query, context); } void checkTableCanBeDropped() const override { getNested()->checkTableCanBeDropped(); } bool storesDataOnDisk() const override { return getNested()->storesDataOnDisk(); } Strings getDataPaths() const override { return getNested()->getDataPaths(); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 4e053c4598c..c3dedd69d0d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6569,7 +6569,7 @@ void StorageReplicatedMergeTree::fetchPartition( try { - /// part name , metadata, part_path , true, 0, zookeeper + /// part name, metadata, part_path, true, 0, zookeeper if (!fetchPart(part_name, metadata_snapshot, from_zookeeper_name, part_path, true, 0, zookeeper, /* try_fetch_shared = */ false)) throw Exception(ErrorCodes::UNFINISHED, "Failed to fetch part {} from {}", part_name, from_); } diff --git a/src/TableFunctions/TableFunctionFactory.cpp b/src/TableFunctions/TableFunctionFactory.cpp index 76108f1cdd4..ce3daff0785 100644 --- a/src/TableFunctions/TableFunctionFactory.cpp +++ b/src/TableFunctions/TableFunctionFactory.cpp @@ -41,7 +41,7 @@ TableFunctionPtr TableFunctionFactory::get( { auto hints = getHints(table_function->name); if (!hints.empty()) - throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown table function {}. Maybe you meant: {}", table_function->name , toString(hints)); + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown table function {}. Maybe you meant: {}", table_function->name, toString(hints)); else throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown table function {}", table_function->name); } diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 0b3b86b4772..c28ca1cfc8a 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -410,3 +410,6 @@ find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep # The stateful directory should only contain the tests that depend on the test dataset (hits or visits). find $ROOT_PATH/tests/queries/1_stateful -name '*.sql' -or -name '*.sh' | grep -v '00076_system_columns_bytes' | xargs -I{} bash -c 'grep -q -P "hits|visits" "{}" || echo "The test {} does not depend on the test dataset (hits or visits table) and should be located in the 0_stateless directory. You can also add an exception to the check-style script."' + +# Check for bad punctuation: whitespace before comma. +find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '\w ,' | grep -v 'bad punctuation is ok here' && echo "^ There is bad punctuation: whitespace before comma. You should write it like this: 'Hello, world!'" From 3e3adc7fecd5f6c409320727bec3a0291aa2430b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 25 Jul 2023 10:29:59 +0200 Subject: [PATCH 1963/1997] tests: increase throttling for 01923_network_receive_time_metric_insert In debug builds launching the client can take a while, so let's increase the throttling to avoid flakiness CI: https://s3.amazonaws.com/clickhouse-test-reports/52490/9e2526a5f04861fcfac49c2ce85560d08c68af66/stateless_tests__debug__[1_5].html Signed-off-by: Azat Khuzhin --- .../0_stateless/01923_network_receive_time_metric_insert.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh index ec5aa141859..4d7e79fae52 100755 --- a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh +++ b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE IF EXISTS t; CREATE TABLE t (x UInt64) ENGINE = Memory;" # Rate limit is chosen for operation to spent more than one second. -seq 1 1000 | pv --quiet --rate-limit 1000 | ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT TSV" +seq 1 1000 | pv --quiet --rate-limit 500 | ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT TSV" # We check that the value of NetworkReceiveElapsedMicroseconds correctly includes the time spent waiting data from the client. ${CLICKHOUSE_CLIENT} --multiquery --query "SYSTEM FLUSH LOGS; From b02e290d5507419e6166433b0a045eaeb3d124d9 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 25 Jul 2023 10:37:02 +0200 Subject: [PATCH 1964/1997] tests: fix 01035_avg_weighted_long flakiness Use one clickhouse-client invocation instead of 300, in debug builds it is significant - each spawn is ~1 second Signed-off-by: Azat Khuzhin --- .../0_stateless/01035_avg_weighted_long.sh | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/tests/queries/0_stateless/01035_avg_weighted_long.sh b/tests/queries/0_stateless/01035_avg_weighted_long.sh index 138aa03fbb3..8838b07a3d7 100755 --- a/tests/queries/0_stateless/01035_avg_weighted_long.sh +++ b/tests/queries/0_stateless/01035_avg_weighted_long.sh @@ -11,36 +11,36 @@ ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(x, y) FROM (select toDecimal256 ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(x, y) FROM (select toDecimal32(1, 0) x, toDecimal256(1, 1) y);" types=("Int8" "Int16" "Int32" "Int64" "UInt8" "UInt16" "UInt32" "UInt64" "Float32" "Float64") - -for left in "${types[@]}" -do - for right in "${types[@]}" - do - ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(x, w) FROM values('x ${left}, w ${right}', (4, 1), (1, 0), (10, 2))" - ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(x, w) FROM values('x ${left}, w ${right}', (0, 0), (1, 0))" - done -done - exttypes=("Int128" "Int256" "UInt256") - -for left in "${exttypes[@]}" -do - for right in "${exttypes[@]}" - do - ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(to${left}(1), to${right}(2))" - done -done - # Decimal types dtypes=("32" "64" "128" "256") -for left in "${dtypes[@]}" -do - for right in "${dtypes[@]}" +( + for left in "${types[@]}" do - ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(toDecimal${left}(2, 4), toDecimal${right}(1, 4))" + for right in "${types[@]}" + do + echo "SELECT avgWeighted(x, w) FROM values('x ${left}, w ${right}', (4, 1), (1, 0), (10, 2));" + echo "SELECT avgWeighted(x, w) FROM values('x ${left}, w ${right}', (0, 0), (1, 0));" + done done -done + + for left in "${exttypes[@]}" + do + for right in "${exttypes[@]}" + do + echo "SELECT avgWeighted(to${left}(1), to${right}(2));" + done + done + + for left in "${dtypes[@]}" + do + for right in "${dtypes[@]}" + do + echo "SELECT avgWeighted(toDecimal${left}(2, 4), toDecimal${right}(1, 4));" + done + done +) | clickhouse-client -nm echo "$(${CLICKHOUSE_CLIENT} --server_logs_file=/dev/null --query="SELECT avgWeighted(['string'], toFloat64(0))" 2>&1)" \ | grep -c 'Code: 43. DB::Exception: .* DB::Exception:.* Types .* are non-conforming as arguments for aggregate function avgWeighted' From 2efbeab5afe50fbd734a6729e4cffa7ef12fff04 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 25 Jul 2023 10:43:29 +0200 Subject: [PATCH 1965/1997] tests: fix 00719_parallel_ddl_table flakiness in debug builds In debug bulds each client invocation takes ~1 second, and on CI it can take more if the node is under some load, so let's decrease number of iterations. Anyway CI runs each test ~1K times daily, and if there will be something even this number of iterations should be enough. Signed-off-by: Azat Khuzhin --- tests/queries/0_stateless/00719_parallel_ddl_table.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00719_parallel_ddl_table.sh b/tests/queries/0_stateless/00719_parallel_ddl_table.sh index fdc994aec33..57a7e228341 100755 --- a/tests/queries/0_stateless/00719_parallel_ddl_table.sh +++ b/tests/queries/0_stateless/00719_parallel_ddl_table.sh @@ -10,7 +10,7 @@ ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS parallel_ddl" function query() { - for _ in {1..100}; do + for _ in {1..50}; do ${CLICKHOUSE_CLIENT} --query "CREATE TABLE IF NOT EXISTS parallel_ddl(a Int) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS parallel_ddl" done From d500e75569c59d1f91ae3de9c43f24f2be703e21 Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 25 Jul 2023 12:07:47 +0200 Subject: [PATCH 1966/1997] fix --- src/Functions/FunctionToDecimalString.h | 67 ++----------------- .../0_stateless/02676_to_decimal_string.sql | 6 ++ 2 files changed, 13 insertions(+), 60 deletions(-) diff --git a/src/Functions/FunctionToDecimalString.h b/src/Functions/FunctionToDecimalString.h index 6ae007e6b66..68ad978632e 100644 --- a/src/Functions/FunctionToDecimalString.h +++ b/src/Functions/FunctionToDecimalString.h @@ -22,6 +22,7 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_COLUMN; extern const int CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } class FunctionToDecimalString : public IFunction @@ -36,17 +37,14 @@ public: size_t getNumberOfArguments() const override { return 2; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (!isNumber(*arguments[0])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal first argument for formatDecimal function: got {}, expected numeric type", - arguments[0]->getName()); + FunctionArgumentDescriptors mandatory_args = { + {"Value", nullptr, nullptr, nullptr}, + {"precision", &isNativeInteger, &isColumnConst, "const Integer [0-77]"} + }; - if (!isUInt8(*arguments[1])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal second argument for formatDecimal function: got {}, expected UInt8", - arguments[1]->getName()); + validateFunctionArgumentTypes(*this, arguments, mandatory_args, {}); return std::make_shared(); } @@ -98,29 +96,6 @@ private: buf_to.finalize(); } - template - void constantVector(const FirstArgType & value_from, const ColumnVector::Container & vec_precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const - { - size_t input_rows_count = vec_precision.size(); - result_offsets.resize(input_rows_count); - - WriteBufferFromVector buf_to(vec_to); - - constexpr size_t max_digits = std::numeric_limits::digits10; - - for (size_t i = 0; i < input_rows_count; ++i) - { - if (vec_precision[i] > max_digits) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, - "Too many fractional digits requested, shall not be more than {}", max_digits); - format(value_from, buf_to, vec_precision[i]); - result_offsets[i] = buf_to.count(); - } - - buf_to.finalize(); - } - /// For operations with Decimal template void vectorConstant(const FirstArgVectorType & vec_from, UInt8 precision, @@ -168,29 +143,6 @@ private: buf_to.finalize(); } - template - void constantVector(const FirstArgType & value_from, const ColumnVector::Container & vec_precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const - { - size_t input_rows_count = vec_precision.size(); - result_offsets.resize(input_rows_count); - - WriteBufferFromVector buf_to(vec_to); - - constexpr size_t max_digits = std::numeric_limits::digits10; - - for (size_t i = 0; i < input_rows_count; ++i) - { - if (vec_precision[i] > max_digits) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, - "Too many fractional digits requested for Decimal, must not be more than {}", max_digits); - writeText(value_from, from_scale, buf_to, true, true, vec_precision[i]); - writeChar(0, buf_to); - result_offsets[i] = buf_to.count(); - } - buf_to.finalize(); - } - template static void format(T value, DB::WriteBuffer & out, UInt8 precision) { @@ -263,7 +215,6 @@ private: template ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const { - const auto * from_col_const = typeid_cast(arguments[0].column.get()); const auto * precision_col = checkAndGetColumn>(arguments[1].column.get()); const auto * precision_col_const = typeid_cast(arguments[1].column.get()); @@ -284,8 +235,6 @@ private: else vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, from_scale); } - else if (from_col_const) - constantVector(from_col_const->template getValue(), precision_col->getData(), result_chars, result_offsets, from_scale); else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName()); } @@ -299,8 +248,6 @@ private: else vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets); } - else if (from_col_const) - constantVector(from_col_const->template getValue(), precision_col->getData(), result_chars, result_offsets); else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName()); } diff --git a/tests/queries/0_stateless/02676_to_decimal_string.sql b/tests/queries/0_stateless/02676_to_decimal_string.sql index 563d60c62c7..1dae139deb1 100644 --- a/tests/queries/0_stateless/02676_to_decimal_string.sql +++ b/tests/queries/0_stateless/02676_to_decimal_string.sql @@ -33,3 +33,9 @@ SELECT toDecimalString('64.64'::Float64, 61); -- {serverError CANNOT_PRINT_FLOAT SELECT toDecimalString('88'::UInt8, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} SELECT toDecimalString('646464'::Int256, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} + +-- wrong types: #52407 and similar +SELECT toDecimalString('256.256'::Decimal256(45), *); -- {serverError ILLEGAL_COLUMN} +SELECT toDecimalString('128.128'::Decimal128(30), 'str'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT toDecimalString('64.64'::Decimal64(10)); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT toDecimalString('64.64'::Decimal64(10), 3, 3); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} From 8184a289e5441208110bcd2f8f63b57e31ccde33 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Mon, 1 May 2023 01:53:20 +0000 Subject: [PATCH 1967/1997] Partially reimplement Parquet encoder to make it faster and parallelizable --- src/CMakeLists.txt | 4 + src/Common/CurrentMetrics.cpp | 6 +- src/Common/PODArray.cpp | 10 + src/Common/PODArray.h | 11 + src/Core/Settings.h | 4 + src/Formats/FormatFactory.cpp | 7 +- src/Formats/FormatSettings.h | 6 + .../Formats/Impl/CHColumnToArrowColumn.cpp | 5 +- .../Formats/Impl/Parquet/PrepareForWrite.cpp | 618 +++++++++++++ .../Formats/Impl/Parquet/ThriftUtil.cpp | 35 + .../Formats/Impl/Parquet/ThriftUtil.h | 17 + src/Processors/Formats/Impl/Parquet/Write.cpp | 816 ++++++++++++++++++ src/Processors/Formats/Impl/Parquet/Write.h | 135 +++ .../Formats/Impl/ParquetBlockOutputFormat.cpp | 467 +++++++++- .../Formats/Impl/ParquetBlockOutputFormat.h | 116 ++- .../02735_parquet_encoder.reference | 55 ++ .../0_stateless/02735_parquet_encoder.sql | 168 ++++ 17 files changed, 2425 insertions(+), 55 deletions(-) create mode 100644 src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp create mode 100644 src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp create mode 100644 src/Processors/Formats/Impl/Parquet/ThriftUtil.h create mode 100644 src/Processors/Formats/Impl/Parquet/Write.cpp create mode 100644 src/Processors/Formats/Impl/Parquet/Write.h create mode 100644 tests/queries/0_stateless/02735_parquet_encoder.reference create mode 100644 tests/queries/0_stateless/02735_parquet_encoder.sql diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 975bf9bb618..5c66c7e9495 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -267,6 +267,10 @@ add_object_library(clickhouse_processors_queryplan Processors/QueryPlan) add_object_library(clickhouse_processors_queryplan_optimizations Processors/QueryPlan/Optimizations) add_object_library(clickhouse_user_defined_functions Functions/UserDefined) +if (USE_PARQUET) + add_object_library(clickhouse_processors_formats_impl_parquet Processors/Formats/Impl/Parquet) +endif() + if (TARGET ch_contrib::nuraft) add_object_library(clickhouse_coordination Coordination) endif() diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 583b13cf79d..9a4ffb0577a 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -149,8 +149,10 @@ M(RestartReplicaThreadsActive, "Number of threads in the RESTART REPLICA thread pool running a task.") \ M(QueryPipelineExecutorThreads, "Number of threads in the PipelineExecutor thread pool.") \ M(QueryPipelineExecutorThreadsActive, "Number of threads in the PipelineExecutor thread pool running a task.") \ - M(ParquetDecoderThreads, "Number of threads in the ParquetBlockInputFormat thread pool running a task.") \ - M(ParquetDecoderThreadsActive, "Number of threads in the ParquetBlockInputFormat thread pool.") \ + M(ParquetDecoderThreads, "Number of threads in the ParquetBlockInputFormat thread pool.") \ + M(ParquetDecoderThreadsActive, "Number of threads in the ParquetBlockInputFormat thread pool running a task.") \ + M(ParquetEncoderThreads, "Number of threads in ParquetBlockOutputFormat thread pool.") \ + M(ParquetEncoderThreadsActive, "Number of threads in ParquetBlockOutputFormat thread pool running a task.") \ M(OutdatedPartsLoadingThreads, "Number of threads in the threadpool for loading Outdated data parts.") \ M(OutdatedPartsLoadingThreadsActive, "Number of active threads in the threadpool for loading Outdated data parts.") \ M(DistributedBytesToInsert, "Number of pending bytes to process for asynchronous insertion into Distributed tables. Number of bytes for every shard is summed.") \ diff --git a/src/Common/PODArray.cpp b/src/Common/PODArray.cpp index 07c3cf1af1a..d21dc40867d 100644 --- a/src/Common/PODArray.cpp +++ b/src/Common/PODArray.cpp @@ -15,4 +15,14 @@ template class PODArray, PADDING_FOR_SIMD - 1, PADD template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; + +template class PODArray, 0, 0>; +template class PODArray, 0, 0>; +template class PODArray, 0, 0>; +template class PODArray, 0, 0>; + +template class PODArray, 0, 0>; +template class PODArray, 0, 0>; +template class PODArray, 0, 0>; +template class PODArray, 0, 0>; } diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h index b126afd2a37..68c1e325f0c 100644 --- a/src/Common/PODArray.h +++ b/src/Common/PODArray.h @@ -783,4 +783,15 @@ extern template class PODArray, PADDING_FOR_SIMD - extern template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; extern template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; extern template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; + +extern template class PODArray, 0, 0>; +extern template class PODArray, 0, 0>; +extern template class PODArray, 0, 0>; +extern template class PODArray, 0, 0>; + +extern template class PODArray, 0, 0>; +extern template class PODArray, 0, 0>; +extern template class PODArray, 0, 0>; +extern template class PODArray, 0, 0>; + } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 97c64ba133c..98f7f212aa5 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -953,6 +953,10 @@ class IColumn; M(ParquetVersion, output_format_parquet_version, "2.latest", "Parquet format version for output format. Supported versions: 1.0, 2.4, 2.6 and 2.latest (default)", 0) \ M(ParquetCompression, output_format_parquet_compression_method, "lz4", "Compression method for Parquet output format. Supported codecs: snappy, lz4, brotli, zstd, gzip, none (uncompressed)", 0) \ M(Bool, output_format_parquet_compliant_nested_types, true, "In parquet file schema, use name 'element' instead of 'item' for list elements. This is a historical artifact of Arrow library implementation. Generally increases compatibility, except perhaps with some old versions of Arrow.", 0) \ + M(Bool, output_format_parquet_use_custom_encoder, true, "Use experimental faster Parquet encoder implementation.", 0) \ + M(Bool, output_format_parquet_parallel_encoding, true, "Do Parquet encoding in multiple threads. Requires output_format_parquet_use_custom_encoder.", 0) \ + M(UInt64, output_format_parquet_data_page_size, 1024 * 1024, "Target page size in bytes, before compression.", 0) \ + M(UInt64, output_format_parquet_batch_size, 1024, "Check page size every this many rows. Consider decreasing if you have columns with average values size above a few KBs.", 0) \ M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \ M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \ M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 6e3e086859b..663b7f1ba95 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -130,6 +130,10 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.parquet.max_block_size = settings.input_format_parquet_max_block_size; format_settings.parquet.output_compression_method = settings.output_format_parquet_compression_method; format_settings.parquet.output_compliant_nested_types = settings.output_format_parquet_compliant_nested_types; + format_settings.parquet.use_custom_encoder = settings.output_format_parquet_use_custom_encoder; + format_settings.parquet.parallel_encoding = settings.output_format_parquet_parallel_encoding; + format_settings.parquet.data_page_size = settings.output_format_parquet_data_page_size; + format_settings.parquet.write_batch_size = settings.output_format_parquet_batch_size; format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; format_settings.pretty.color = settings.output_format_pretty_color; format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width; @@ -434,7 +438,7 @@ OutputFormatPtr FormatFactory::getOutputFormatParallelIfPossible( return format; } - return getOutputFormat(name, buf, sample, context, _format_settings); + return getOutputFormat(name, buf, sample, context, format_settings); } @@ -453,6 +457,7 @@ OutputFormatPtr FormatFactory::getOutputFormat( context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Format, name); auto format_settings = _format_settings ? *_format_settings : getFormatSettings(context); + format_settings.max_threads = context->getSettingsRef().max_threads; /** TODO: Materialization is needed, because formats can use the functions `IDataType`, * which only work with full columns. diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index e321e5264ca..3259c46e5ff 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -100,6 +100,8 @@ struct FormatSettings UInt64 max_parser_depth = DBMS_DEFAULT_MAX_PARSER_DEPTH; + size_t max_threads = 1; + enum class ArrowCompression { NONE, @@ -233,10 +235,14 @@ struct FormatSettings bool output_string_as_string = false; bool output_fixed_string_as_fixed_byte_array = true; bool preserve_order = false; + bool use_custom_encoder = true; + bool parallel_encoding = true; UInt64 max_block_size = 8192; ParquetVersion output_version; ParquetCompression output_compression_method = ParquetCompression::SNAPPY; bool output_compliant_nested_types = true; + size_t data_page_size = 1024 * 1024; + size_t write_batch_size = 1024; } parquet; struct Pretty diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index f688efa3290..e2383d1bfab 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -684,9 +684,6 @@ namespace DB bool output_fixed_string_as_fixed_byte_array, std::unordered_map & dictionary_values) { - const String column_type_name = column_type->getFamilyName(); - WhichDataType which(column_type); - switch (column_type->getTypeId()) { case TypeIndex::Nullable: @@ -796,7 +793,7 @@ namespace DB FOR_INTERNAL_NUMERIC_TYPES(DISPATCH) #undef DISPATCH default: - throw Exception(ErrorCodes::UNKNOWN_TYPE, "Internal type '{}' of a column '{}' is not supported for conversion into {} data format.", column_type_name, column_name, format_name); + throw Exception(ErrorCodes::UNKNOWN_TYPE, "Internal type '{}' of a column '{}' is not supported for conversion into {} data format.", column_type->getFamilyName(), column_name, format_name); } } diff --git a/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp new file mode 100644 index 00000000000..a70b6fcfc81 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp @@ -0,0 +1,618 @@ +#include "Processors/Formats/Impl/Parquet/Write.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/// This file deals with schema conversion and with repetition and definition levels. + +/// Schema conversion is pretty straightforward. + +/// "Repetition and definition levels" are a somewhat tricky way of encoding information about +/// optional fields and lists. +/// +/// If you don't want to learn how these work, feel free to skip the updateRepDefLevels* functions. +/// All you need to know is: +/// * values for nulls are not encoded, so we have to filter nullable columns, +/// * information about all array lengths and nulls is encoded in the arrays `def` and `rep`, +/// which need to be encoded next to the data, +/// * `def` and `rep` arrays can be longer than `primitive_column`, because they include nulls and +/// empty arrays; the values in primitive_column correspond to positions where def[i] == max_def. +/// +/// If you do want to learn it, dremel paper: https://research.google/pubs/pub36632/ +/// Instead of reading the whole paper, try staring at figures 2-3 for a while - it might be enough. +/// (Why does Parquet do all this instead of just storing array lengths and null masks? I'm not +/// really sure.) +/// +/// We calculate the levels recursively, from inner to outer columns. +/// This means scanning the whole array for each Array/Nullable nesting level, which is probably not +/// the most efficient way to do it. But there's usually at most one nesting level, so it's fine. +/// +/// Most of this is moot because ClickHouse doesn't support nullable arrays or tuples right now, so +/// almost none of the tricky cases can happen. We implement it in full generality anyway (mostly +/// because I only learned the previous sentence after writing most of the code). + + +namespace DB::ErrorCodes +{ + extern const int UNKNOWN_TYPE; + extern const int TOO_DEEP_RECURSION; // I'm 14 and this is deep + extern const int UNKNOWN_COMPRESSION_METHOD; + extern const int LOGICAL_ERROR; +} + +namespace DB::Parquet +{ + +/// Thrift structs that Parquet uses for various metadata inside the parquet file. +namespace parq = parquet::format; + +namespace +{ + +void assertNoDefOverflow(ColumnChunkWriteState & s) +{ + if (s.max_def == UINT8_MAX) + throw Exception(ErrorCodes::TOO_DEEP_RECURSION, + "Column has more than 255 levels of nested Array/Nullable. Impressive! Unfortunately, " + "this is not supported by this Parquet encoder (but is supported by Parquet, if you " + "really need this for some reason)."); +} + +void updateRepDefLevelsAndFilterColumnForNullable(ColumnChunkWriteState & s, const NullMap & null_map) +{ + /// Increment definition levels for non-nulls. + /// Filter the column to contain only non-null values. + + assertNoDefOverflow(s); + ++s.max_def; + + /// Normal case: no arrays or nullables inside this nullable. + if (s.max_def == 1) + { + chassert(s.def.empty()); + s.def.resize(null_map.size()); + for (size_t i = 0; i < s.def.size(); ++i) + s.def[i] = !null_map[i]; + + /// We could be more efficient with this: + /// * Instead of doing the filter() here, we could defer it to writeColumnChunkBody(), at + /// least in the simple case of Nullable(Primitive). Then it'll parallelize if the table + /// consists of one big tuple. + /// * Instead of filtering explicitly, we could build filtering into the data encoder. + /// * Instead of filling out the `def` values above, we could point to null_map and build + /// the '!' into the encoder. + /// None of these seem worth the complexity right now. + s.primitive_column = s.primitive_column->filter(s.def, /*result_size_hint*/ -1); + + return; + } + + /// Weird general case: Nullable(Array), Nullable(Nullable), or any arbitrary nesting like that. + /// This is currently not allowed in ClickHouse, but let's support it anyway just in case. + + IColumn::Filter filter; + size_t row_idx = static_cast(-1); + for (size_t i = 0; i < s.def.size(); ++i) + { + row_idx += s.max_rep == 0 || s.rep[i] == 0; + if (s.def[i] == s.max_def - 1) + filter.push_back(!null_map[row_idx]); + s.def[i] += !null_map[row_idx]; + } + s.primitive_column = s.primitive_column->filter(filter, /*result_size_hint*/ -1); +} + +void updateRepDefLevelsForArray(ColumnChunkWriteState & s, const IColumn::Offsets & offsets) +{ + /// Increment all definition levels. + /// For non-first elements of arrays, increment repetition levels. + /// For empty arrays, insert a zero into repetition and definition levels arrays. + + assertNoDefOverflow(s); + ++s.max_def; + ++s.max_rep; + + /// Common case: no arrays or nullables inside this array. + if (s.max_rep == 1 && s.max_def == 1) + { + s.def.resize_fill(s.primitive_column->size(), 1); + s.rep.resize_fill(s.primitive_column->size(), 1); + size_t i = 0; + for (ssize_t row = 0; row < static_cast(offsets.size()); ++row) + { + size_t n = offsets[row] - offsets[row - 1]; + if (n) + { + s.rep[i] = 0; + i += n; + } + else + { + s.def.push_back(1); + s.rep.push_back(1); + s.def[i] = 0; + s.rep[i] = 0; + i += 1; + } + } + return; + } + + /// General case: Array(Array), Array(Nullable), or any arbitrary nesting like that. + + for (auto & x : s.def) + ++x; + + if (s.max_rep == 1) + s.rep.resize_fill(s.def.size(), 1); + else + for (auto & x : s.rep) + ++x; + + PaddedPODArray mask(s.def.size(), 1); // for inserting zeroes to rep and def + size_t i = 0; // in the input (s.def/s.rep) + size_t empty_arrays = 0; + for (ssize_t row = 0; row < static_cast(offsets.size()); ++row) + { + size_t n = offsets[row] - offsets[row - 1]; + if (n) + { + /// Un-increment the first rep of the array. + /// Skip n "items" in the nested column; first element of each item has rep = 1 + /// (we incremented it above). + chassert(s.rep[i] == 1); + --s.rep[i]; + do + { + ++i; + if (i == s.rep.size()) + { + --n; + chassert(n == 0); + break; + } + n -= s.rep[i] == 1; + } while (n); + } + else + { + mask.push_back(1); + mask[i + empty_arrays] = 0; + ++empty_arrays; + } + } + + if (empty_arrays != 0) + { + expandDataByMask(s.def, mask, false); + expandDataByMask(s.rep, mask, false); + } +} + +parq::CompressionCodec::type compressionMethodToParquet(CompressionMethod c) +{ + switch (c) + { + case CompressionMethod::None: return parq::CompressionCodec::UNCOMPRESSED; + case CompressionMethod::Snappy: return parq::CompressionCodec::SNAPPY; + case CompressionMethod::Gzip: return parq::CompressionCodec::GZIP; + case CompressionMethod::Brotli: return parq::CompressionCodec::BROTLI; + case CompressionMethod::Lz4: return parq::CompressionCodec::LZ4_RAW; + case CompressionMethod::Zstd: return parq::CompressionCodec::ZSTD; + + default: + throw Exception(ErrorCodes::UNKNOWN_COMPRESSION_METHOD, "Compression method {} is not supported by Parquet", toContentEncodingName(c)); + } +} + +/// Depth-first traversal of the schema tree for this column. +void prepareColumnRecursive( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas); + +void preparePrimitiveColumn(ColumnPtr column, DataTypePtr type, const std::string & name, + const WriteOptions & options, ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + /// Add physical column info. + auto & state = states.emplace_back(); + state.primitive_column = column; + state.compression = options.compression; + + state.column_chunk.__isset.meta_data = true; + state.column_chunk.meta_data.__set_path_in_schema({name}); + state.column_chunk.meta_data.__set_codec(compressionMethodToParquet(state.compression)); + + /// Add logical schema leaf. + auto & schema = schemas.emplace_back(); + schema.__set_repetition_type(parq::FieldRepetitionType::REQUIRED); + schema.__set_name(name); + + /// Convert the type enums. + + using T = parq::Type; + using C = parq::ConvertedType; + + auto types = [&](T::type type_, std::optional converted = std::nullopt, std::optional logical = std::nullopt) + { + state.column_chunk.meta_data.__set_type(type_); + schema.__set_type(type_); + if (converted) + schema.__set_converted_type(*converted); + if (logical) + schema.__set_logicalType(*logical); + }; + + auto int_type = [](Int8 bits, bool signed_) + { + parq::LogicalType t; + t.__isset.INTEGER = true; + t.INTEGER.__set_bitWidth(bits); + t.INTEGER.__set_isSigned(signed_); + return t; + }; + + auto fixed_string = [&](size_t size, std::optional converted = std::nullopt, std::optional logical = std::nullopt) + { + state.column_chunk.meta_data.__set_type(parq::Type::FIXED_LEN_BYTE_ARRAY); + schema.__set_type(parq::Type::FIXED_LEN_BYTE_ARRAY); + schema.__set_type_length(static_cast(size)); + if (converted) + schema.__set_converted_type(*converted); + if (logical) + schema.__set_logicalType(*logical); + }; + + auto decimal = [&](Int32 bytes, UInt32 precision, UInt32 scale) + { + state.column_chunk.meta_data.__set_type(parq::Type::FIXED_LEN_BYTE_ARRAY); + schema.__set_type(parq::Type::FIXED_LEN_BYTE_ARRAY); + schema.__set_type_length(bytes); + schema.__set_scale(static_cast(scale)); + schema.__set_precision(static_cast(precision)); + schema.__set_converted_type(parq::ConvertedType::DECIMAL); + parq::DecimalType d; + d.__set_scale(static_cast(scale)); + d.__set_precision(static_cast(precision)); + parq::LogicalType t; + t.__set_DECIMAL(d); + schema.__set_logicalType(t); + }; + + switch (type->getTypeId()) + { + case TypeIndex::UInt8: types(T::INT32, C::UINT_8 , int_type(8 , false)); break; + case TypeIndex::UInt16: types(T::INT32, C::UINT_16, int_type(16, false)); break; + case TypeIndex::UInt32: types(T::INT32, C::UINT_32, int_type(32, false)); break; + case TypeIndex::UInt64: types(T::INT64, C::UINT_64, int_type(64, false)); break; + case TypeIndex::Int8: types(T::INT32, C::INT_8 , int_type(8 , true)); break; + case TypeIndex::Int16: types(T::INT32, C::INT_16 , int_type(16, true)); break; + case TypeIndex::Int32: types(T::INT32); break; + case TypeIndex::Int64: types(T::INT64); break; + case TypeIndex::Float32: types(T::FLOAT); break; + case TypeIndex::Float64: types(T::DOUBLE); break; + + /// These don't have suitable parquet logical types, so we write them as plain numbers. + /// (Parquet has "enums" but they're just strings, with nowhere to declare all possible enum + /// values in advance as part of the data type.) + case TypeIndex::Enum8: types(T::INT32, C::INT_8 , int_type(8 , true)); break; // Int8 + case TypeIndex::Enum16: types(T::INT32, C::INT_16 , int_type(16, true)); break; // Int16 + case TypeIndex::IPv4: types(T::INT32, C::UINT_32, int_type(32, false)); break; // UInt32 + case TypeIndex::Date: types(T::INT32, C::UINT_16, int_type(16, false)); break; // UInt16 + case TypeIndex::DateTime: types(T::INT32, C::UINT_32, int_type(32, false)); break; // UInt32 + + case TypeIndex::Date32: + { + parq::LogicalType t; + t.__set_DATE({}); + types(T::INT32, C::DATE, t); + break; + } + + case TypeIndex::DateTime64: + { + std::optional converted; + std::optional unit; + switch (assert_cast(*type).getScale()) + { + case 3: + converted = parq::ConvertedType::TIMESTAMP_MILLIS; + unit.emplace().__set_MILLIS({}); + break; + case 6: + converted = parq::ConvertedType::TIMESTAMP_MICROS; + unit.emplace().__set_MICROS({}); + break; + case 9: + unit.emplace().__set_NANOS({}); + break; + } + + std::optional t; + if (unit) + { + parq::TimestampType tt; + tt.__set_isAdjustedToUTC(true); + tt.__set_unit(*unit); + t.emplace().__set_TIMESTAMP(tt); + } + types(T::INT64, converted, t); + break; + } + + case TypeIndex::String: + case TypeIndex::FixedString: + { + if (options.output_fixed_string_as_fixed_byte_array && + type->getTypeId() == TypeIndex::FixedString) + { + fixed_string(assert_cast(*type).getN()); + } + else if (options.output_string_as_string) + { + parq::LogicalType t; + t.__set_STRING({}); + types(T::BYTE_ARRAY, C::UTF8, t); + } + else + { + types(T::BYTE_ARRAY); + } + break; + } + + /// Parquet doesn't have logical types for these. + case TypeIndex::UInt128: fixed_string(16); break; + case TypeIndex::UInt256: fixed_string(32); break; + case TypeIndex::Int128: fixed_string(16); break; + case TypeIndex::Int256: fixed_string(32); break; + case TypeIndex::IPv6: fixed_string(16); break; + + case TypeIndex::Decimal32: decimal(4 , getDecimalPrecision(*type), getDecimalScale(*type)); break; + case TypeIndex::Decimal64: decimal(8 , getDecimalPrecision(*type), getDecimalScale(*type)); break; + case TypeIndex::Decimal128: decimal(16, getDecimalPrecision(*type), getDecimalScale(*type)); break; + case TypeIndex::Decimal256: decimal(32, getDecimalPrecision(*type), getDecimalScale(*type)); break; + + default: + throw Exception(ErrorCodes::UNKNOWN_TYPE, "Internal type '{}' of column '{}' is not supported for conversion into Parquet data format.", type->getFamilyName(), name); + } +} + +void prepareColumnNullable( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + const ColumnNullable * column_nullable = assert_cast(column.get()); + ColumnPtr nested_column = column_nullable->getNestedColumnPtr(); + DataTypePtr nested_type = assert_cast(type.get())->getNestedType(); + const NullMap & null_map = column_nullable->getNullMapData(); + + size_t child_states_begin = states.size(); + size_t child_schema_idx = schemas.size(); + + prepareColumnRecursive(nested_column, nested_type, name, options, states, schemas); + + if (schemas[child_schema_idx].repetition_type == parq::FieldRepetitionType::REQUIRED) + { + /// Normal case: we just slap a FieldRepetitionType::OPTIONAL onto the nested column. + schemas[child_schema_idx].repetition_type = parq::FieldRepetitionType::OPTIONAL; + } + else + { + /// Weird case: Nullable(Nullable(...)). Or Nullable(Tuple(Nullable(...))), etc. + /// This is probably not allowed in ClickHouse, but let's support it just in case. + auto & schema = *schemas.insert(schemas.begin() + child_schema_idx, {}); + schema.__set_repetition_type(parq::FieldRepetitionType::OPTIONAL); + schema.__set_name("nullable"); + schema.__set_num_children(1); + for (size_t i = child_states_begin; i < states.size(); ++i) + { + Strings & path = states[i].column_chunk.meta_data.path_in_schema; + path.insert(path.begin(), schema.name + "."); + } + } + + for (size_t i = child_states_begin; i < states.size(); ++i) + { + auto & s = states[i]; + updateRepDefLevelsAndFilterColumnForNullable(s, null_map); + } +} + +void prepareColumnTuple( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + const auto * column_tuple = assert_cast(column.get()); + const auto * type_tuple = assert_cast(type.get()); + + auto & tuple_schema = schemas.emplace_back(); + tuple_schema.__set_repetition_type(parq::FieldRepetitionType::REQUIRED); + tuple_schema.__set_name(name); + tuple_schema.__set_num_children(static_cast(type_tuple->getElements().size())); + + size_t child_states_begin = states.size(); + + for (size_t i = 0; i < type_tuple->getElements().size(); ++i) + prepareColumnRecursive(column_tuple->getColumnPtr(i), type_tuple->getElement(i), type_tuple->getNameByPosition(i + 1), options, states, schemas); + + for (size_t i = child_states_begin; i < states.size(); ++i) + { + Strings & path = states[i].column_chunk.meta_data.path_in_schema; + /// O(nesting_depth^2), but who cares. + path.insert(path.begin(), name); + } +} + +void prepareColumnArray( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + const auto * column_array = assert_cast(column.get()); + ColumnPtr nested_column = column_array->getDataPtr(); + DataTypePtr nested_type = assert_cast(type.get())->getNestedType(); + const auto & offsets = column_array->getOffsets(); + + /// Schema for lists https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists + /// + /// required group `name` (List): + /// repeated group "list": + /// "element" + + /// Add the groups schema. + + schemas.emplace_back(); + schemas.emplace_back(); + auto & list_schema = schemas[schemas.size() - 2]; + auto & item_schema = schemas[schemas.size() - 1]; + + list_schema.__set_repetition_type(parq::FieldRepetitionType::REQUIRED); + list_schema.__set_name(name); + list_schema.__set_num_children(1); + list_schema.__set_converted_type(parq::ConvertedType::LIST); + list_schema.__isset.logicalType = true; + list_schema.logicalType.__set_LIST({}); + + item_schema.__set_repetition_type(parq::FieldRepetitionType::REPEATED); + item_schema.__set_name("list"); + item_schema.__set_num_children(1); + + std::array path_prefix = {list_schema.name, item_schema.name}; + size_t child_states_begin = states.size(); + + /// Recurse. + prepareColumnRecursive(nested_column, nested_type, "element", options, states, schemas); + + /// Update repetition+definition levels and fully-qualified column names (x -> myarray.list.x). + for (size_t i = child_states_begin; i < states.size(); ++i) + { + Strings & path = states[i].column_chunk.meta_data.path_in_schema; + path.insert(path.begin(), path_prefix.begin(), path_prefix.end()); + + updateRepDefLevelsForArray(states[i], offsets); + } +} + +void prepareColumnMap( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + const auto * column_map = assert_cast(column.get()); + const auto * column_array = &column_map->getNestedColumn(); + const auto & offsets = column_array->getOffsets(); + ColumnPtr column_tuple = column_array->getDataPtr(); + + const auto * map_type = assert_cast(type.get()); + DataTypePtr tuple_type = std::make_shared(map_type->getKeyValueTypes(), Strings{"key", "value"}); + + /// Map is an array of tuples + /// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#maps + /// + /// required group `name` (Map): + /// repeated group "key_value": + /// reqiured <...> "key" + /// <...> "value" + + auto & map_schema = schemas.emplace_back(); + map_schema.__set_repetition_type(parq::FieldRepetitionType::REQUIRED); + map_schema.__set_name(name); + map_schema.__set_num_children(1); + map_schema.__set_converted_type(parq::ConvertedType::MAP); + map_schema.__set_logicalType({}); + map_schema.logicalType.__set_MAP({}); + + size_t tuple_schema_idx = schemas.size(); + size_t child_states_begin = states.size(); + + prepareColumnTuple(column_tuple, tuple_type, "key_value", options, states, schemas); + + schemas[tuple_schema_idx].__set_repetition_type(parq::FieldRepetitionType::REPEATED); + schemas[tuple_schema_idx].__set_converted_type(parq::ConvertedType::MAP_KEY_VALUE); + + for (size_t i = child_states_begin; i < states.size(); ++i) + { + Strings & path = states[i].column_chunk.meta_data.path_in_schema; + path.insert(path.begin(), name); + + updateRepDefLevelsForArray(states[i], offsets); + } +} + +void prepareColumnRecursive( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + switch (type->getTypeId()) + { + case TypeIndex::Nullable: prepareColumnNullable(column, type, name, options, states, schemas); break; + case TypeIndex::Array: prepareColumnArray(column, type, name, options, states, schemas); break; + case TypeIndex::Tuple: prepareColumnTuple(column, type, name, options, states, schemas); break; + case TypeIndex::Map: prepareColumnMap(column, type, name, options, states, schemas); break; + case TypeIndex::LowCardinality: + { + auto nested_type = assert_cast(*type).getDictionaryType(); + if (nested_type->isNullable()) + prepareColumnNullable( + column->convertToFullColumnIfLowCardinality(), nested_type, name, options, states, schemas); + else + /// Use nested data type, but keep ColumnLowCardinality. The encoder can deal with it. + preparePrimitiveColumn(column, nested_type, name, options, states, schemas); + break; + } + default: + preparePrimitiveColumn(column, type, name, options, states, schemas); + break; + } +} + +} + +SchemaElements convertSchema(const Block & sample, const WriteOptions & options) +{ + SchemaElements schema; + auto & root = schema.emplace_back(); + root.__set_name("schema"); + root.__set_num_children(static_cast(sample.columns())); + + for (auto & c : sample) + prepareColumnForWrite(c.column, c.type, c.name, options, nullptr, &schema); + + return schema; +} + +void prepareColumnForWrite( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates * out_columns_to_write, SchemaElements * out_schema) +{ + if (column->size() == 0 && out_columns_to_write != nullptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty column passed to Parquet encoder"); + + ColumnChunkWriteStates states; + SchemaElements schemas; + prepareColumnRecursive(column, type, name, options, states, schemas); + + if (out_columns_to_write) + for (auto & s : states) + out_columns_to_write->push_back(std::move(s)); + if (out_schema) + out_schema->insert(out_schema->end(), schemas.begin(), schemas.end()); + + if (column->empty()) + states.clear(); +} + +} diff --git a/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp b/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp new file mode 100644 index 00000000000..2a99b028ae0 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp @@ -0,0 +1,35 @@ +#include +#include + +namespace DB::Parquet +{ + +class WriteBufferTransport : public apache::thrift::transport::TTransport +{ +public: + WriteBuffer & out; + size_t bytes = 0; + + explicit WriteBufferTransport(WriteBuffer & out_) : out(out_) {} + + void write(const uint8_t* buf, uint32_t len) + { + out.write(reinterpret_cast(buf), len); + bytes += len; + } +}; + +template +size_t serializeThriftStruct(const T & obj, WriteBuffer & out) +{ + auto trans = std::make_shared(out); + auto proto = apache::thrift::protocol::TCompactProtocolFactoryT().getProtocol(trans); + obj.write(proto.get()); + return trans->bytes; +} + +template size_t serializeThriftStruct(const parquet::format::PageHeader &, WriteBuffer & out); +template size_t serializeThriftStruct(const parquet::format::ColumnChunk &, WriteBuffer & out); +template size_t serializeThriftStruct(const parquet::format::FileMetaData &, WriteBuffer & out); + +} diff --git a/src/Processors/Formats/Impl/Parquet/ThriftUtil.h b/src/Processors/Formats/Impl/Parquet/ThriftUtil.h new file mode 100644 index 00000000000..1efbe0002d4 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ThriftUtil.h @@ -0,0 +1,17 @@ +#pragma once + +#include // in contrib/arrow/cpp/src/ , generated from parquet.thrift +#include + +namespace DB::Parquet +{ + +/// Returns number of bytes written. +template +size_t serializeThriftStruct(const T & obj, WriteBuffer & out); + +extern template size_t serializeThriftStruct(const parquet::format::PageHeader &, WriteBuffer & out); +extern template size_t serializeThriftStruct(const parquet::format::ColumnChunk &, WriteBuffer & out); +extern template size_t serializeThriftStruct(const parquet::format::FileMetaData &, WriteBuffer & out); + +} diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp new file mode 100644 index 00000000000..a29bb81f8dc --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/Write.cpp @@ -0,0 +1,816 @@ +#include "Processors/Formats/Impl/Parquet/Write.h" +#include "Processors/Formats/Impl/Parquet/ThriftUtil.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "config_version.h" + +namespace DB::ErrorCodes +{ + extern const int CANNOT_COMPRESS; + extern const int LIMIT_EXCEEDED; + extern const int LOGICAL_ERROR; +} + +namespace DB::Parquet +{ + +namespace parq = parquet::format; + +namespace +{ + +template +struct StatisticsNumeric +{ + T min = std::numeric_limits::max(); + T max = std::numeric_limits::min(); + + void add(SourceType x) + { + min = std::min(min, static_cast(x)); + max = std::max(max, static_cast(x)); + } + + void merge(const StatisticsNumeric & s) + { + min = std::min(min, s.min); + max = std::max(max, s.max); + } + + void clear() { *this = {}; } + + parq::Statistics get(const WriteOptions &) + { + parq::Statistics s; + s.__isset.min_value = s.__isset.max_value = true; + s.min_value.resize(sizeof(T)); + s.max_value.resize(sizeof(T)); + memcpy(s.min_value.data(), &min, sizeof(T)); + memcpy(s.max_value.data(), &max, sizeof(T)); + + if constexpr (std::is_signed::value) + { + s.__set_min(s.min_value); + s.__set_max(s.max_value); + } + return s; + } +}; + +struct StatisticsFixedString +{ + size_t fixed_string_size = UINT64_MAX; + const uint8_t * min = nullptr; + const uint8_t * max = nullptr; + + void add(parquet::FixedLenByteArray a) + { + chassert(fixed_string_size != UINT64_MAX); + addMin(a.ptr); + addMax(a.ptr); + } + + void merge(const StatisticsFixedString & s) + { + chassert(fixed_string_size == UINT64_MAX || fixed_string_size == s.fixed_string_size); + fixed_string_size = s.fixed_string_size; + if (s.min == nullptr) + return; + addMin(s.min); + addMax(s.max); + } + + void clear() { min = max = nullptr; } + + parq::Statistics get(const WriteOptions & options) + { + parq::Statistics s; + if (min == nullptr || fixed_string_size > options.max_statistics_size) + return s; + s.__set_min_value(std::string(reinterpret_cast(min), fixed_string_size)); + s.__set_max_value(std::string(reinterpret_cast(max), fixed_string_size)); + return s; + } + + void addMin(const uint8_t * p) + { + if (min == nullptr || memcmp(p, min, fixed_string_size) < 0) + min = p; + } + void addMax(const uint8_t * p) + { + if (max == nullptr || memcmp(p, max, fixed_string_size) > 0) + max = p; + } +}; + +struct StatisticsString +{ + parquet::ByteArray min; + parquet::ByteArray max; + + void add(parquet::ByteArray x) + { + addMin(x); + addMax(x); + } + + void merge(const StatisticsString & s) + { + if (s.min.ptr == nullptr) + return; + addMin(s.min); + addMax(s.max); + } + + void clear() { *this = {}; } + + parq::Statistics get(const WriteOptions & options) + { + parq::Statistics s; + if (min.ptr == nullptr) + return s; + if (static_cast(min.len) <= options.max_statistics_size) + s.__set_min_value(std::string(reinterpret_cast(min.ptr), static_cast(min.len))); + if (static_cast(max.len) <= options.max_statistics_size) + s.__set_max_value(std::string(reinterpret_cast(max.ptr), static_cast(max.len))); + return s; + } + + void addMin(parquet::ByteArray x) + { + if (min.ptr == nullptr || compare(x, min) < 0) + min = x; + } + + void addMax(parquet::ByteArray x) + { + if (max.ptr == nullptr || compare(x, max) > 0) + max = x; + } + + static int compare(parquet::ByteArray a, parquet::ByteArray b) + { + int t = memcmp(a.ptr, b.ptr, std::min(a.len, b.len)); + if (t != 0) + return t; + return a.len - b.len; + } +}; + +/// The column usually needs to be converted to one of Parquet physical types, e.g. UInt16 -> Int32 +/// or [element of ColumnString] -> std::string_view. +/// We do this conversion in small batches rather than all at once, just before encoding the batch, +/// in hopes of getting better performance through cache locality. +/// The Coverter* structs below are responsible for that. +/// When conversion is not needed, getBatch() will just return pointer into original data. + +template ::value, + To, + typename std::make_unsigned::type>::type> +struct ConverterNumeric +{ + using Statistics = StatisticsNumeric; + + const Col & column; + PODArray buf; + + explicit ConverterNumeric(const ColumnPtr & c) : column(assert_cast(*c)) {} + + const To * getBatch(size_t offset, size_t count) + { + if constexpr (sizeof(*column.getData().data()) == sizeof(To)) + return reinterpret_cast(column.getData().data() + offset); + else + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + buf[i] = static_cast(column.getData()[offset + i]); + return buf.data(); + } + } +}; + +struct ConverterString +{ + using Statistics = StatisticsString; + + const ColumnString & column; + PODArray buf; + + explicit ConverterString(const ColumnPtr & c) : column(assert_cast(*c)) {} + + const parquet::ByteArray * getBatch(size_t offset, size_t count) + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + { + StringRef s = column.getDataAt(offset + i); + buf[i] = parquet::ByteArray(static_cast(s.size), reinterpret_cast(s.data)); + } + return buf.data(); + } +}; + +struct ConverterFixedString +{ + using Statistics = StatisticsFixedString; + + const ColumnFixedString & column; + PODArray buf; + + explicit ConverterFixedString(const ColumnPtr & c) : column(assert_cast(*c)) {} + + const parquet::FixedLenByteArray * getBatch(size_t offset, size_t count) + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + buf[i].ptr = reinterpret_cast(column.getChars().data() + (offset + i) * column.getN()); + return buf.data(); + } + + size_t fixedStringSize() { return column.getN(); } +}; + +struct ConverterFixedStringAsString +{ + using Statistics = StatisticsString; + + const ColumnFixedString & column; + PODArray buf; + + explicit ConverterFixedStringAsString(const ColumnPtr & c) : column(assert_cast(*c)) {} + + const parquet::ByteArray * getBatch(size_t offset, size_t count) + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + buf[i] = parquet::ByteArray(static_cast(column.getN()), reinterpret_cast(column.getChars().data() + (offset + i) * column.getN())); + return buf.data(); + } +}; + +template +struct ConverterNumberAsFixedString +{ + /// Calculate min/max statistics for little-endian fixed strings, not numbers, because parquet + /// doesn't know it's numbers. + using Statistics = StatisticsFixedString; + + const ColumnVector & column; + PODArray buf; + + explicit ConverterNumberAsFixedString(const ColumnPtr & c) : column(assert_cast &>(*c)) {} + + const parquet::FixedLenByteArray * getBatch(size_t offset, size_t count) + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + buf[i].ptr = reinterpret_cast(column.getData().data() + offset + i); + return buf.data(); + } + + size_t fixedStringSize() { return sizeof(T); } +}; + +/// Like ConverterNumberAsFixedString, but converts to big-endian. Because that's the byte order +/// Parquet uses for decimal types and literally nothing else, for some reason. +template +struct ConverterDecimal +{ + using Statistics = StatisticsFixedString; + + const ColumnDecimal & column; + PODArray data_buf; + PODArray ptr_buf; + + explicit ConverterDecimal(const ColumnPtr & c) : column(assert_cast &>(*c)) {} + + const parquet::FixedLenByteArray * getBatch(size_t offset, size_t count) + { + data_buf.resize(count * sizeof(T)); + ptr_buf.resize(count); + memcpy(data_buf.data(), reinterpret_cast(column.getData().data() + offset), count * sizeof(T)); + for (size_t i = 0; i < count; ++i) + { + std::reverse(data_buf.data() + i * sizeof(T), data_buf.data() + (i + 1) * sizeof(T)); + ptr_buf[i].ptr = data_buf.data() + i * sizeof(T); + } + return ptr_buf.data(); + } + + size_t fixedStringSize() { return sizeof(T); } +}; + +/// Returns either `source` or `scratch`. +PODArray & compress(PODArray & source, PODArray & scratch, CompressionMethod method) +{ + /// We could use wrapWriteBufferWithCompressionMethod() for everything, but I worry about the + /// overhead of creating a bunch of WriteBuffers on each page (thousands of values). + switch (method) + { + case CompressionMethod::None: + return source; + + case CompressionMethod::Lz4: + { + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wold-style-cast" + + size_t max_dest_size = LZ4_COMPRESSBOUND(source.size()); + + #pragma clang diagnostic pop + + if (max_dest_size > std::numeric_limits::max()) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column of size {}", formatReadableSizeWithBinarySuffix(source.size())); + + scratch.resize(max_dest_size); + + int compressed_size = LZ4_compress_default( + source.data(), + scratch.data(), + static_cast(source.size()), + static_cast(max_dest_size)); + + scratch.resize(static_cast(compressed_size)); + return scratch; + } + + default: + { + auto dest_buf = std::make_unique>>(scratch); + auto compressed_buf = wrapWriteBufferWithCompressionMethod( + std::move(dest_buf), + method, + /*level*/ 3, + source.size(), + /*existing_memory*/ source.data()); + chassert(compressed_buf->position() == source.data()); + chassert(compressed_buf->available() == source.size()); + compressed_buf->position() += source.size(); + compressed_buf->finalize(); + return scratch; + } + } +} + +void encodeRepDefLevelsRLE(const UInt8 * data, size_t size, UInt8 max_level, PODArray & out) +{ + using arrow::util::RleEncoder; + + chassert(max_level > 0); + size_t offset = out.size(); + size_t prefix_size = sizeof(Int32); + + int bit_width = bitScanReverse(max_level) + 1; + int max_rle_size = RleEncoder::MaxBufferSize(bit_width, static_cast(size)) + + RleEncoder::MinBufferSize(bit_width); + + out.resize(offset + prefix_size + max_rle_size); + + RleEncoder encoder(reinterpret_cast(out.data() + offset + prefix_size), max_rle_size, bit_width); + for (size_t i = 0; i < size; ++i) + encoder.Put(data[i]); + encoder.Flush(); + Int32 len = encoder.len(); + + memcpy(out.data() + offset, &len, prefix_size); + out.resize(offset + prefix_size + len); +} + +void addToEncodingsUsed(ColumnChunkWriteState & s, parq::Encoding::type e) +{ + if (!std::count(s.column_chunk.meta_data.encodings.begin(), s.column_chunk.meta_data.encodings.end(), e)) + s.column_chunk.meta_data.encodings.push_back(e); +} + +void writePage(const parq::PageHeader & header, const PODArray & compressed, ColumnChunkWriteState & s, WriteBuffer & out) +{ + size_t header_size = serializeThriftStruct(header, out); + out.write(compressed.data(), compressed.size()); + + /// Remember first data page and first dictionary page. + if (header.__isset.data_page_header && s.column_chunk.meta_data.data_page_offset == -1) + s.column_chunk.meta_data.__set_data_page_offset(s.column_chunk.meta_data.total_compressed_size); + if (header.__isset.dictionary_page_header && !s.column_chunk.meta_data.__isset.dictionary_page_offset) + s.column_chunk.meta_data.__set_dictionary_page_offset(s.column_chunk.meta_data.total_compressed_size); + + s.column_chunk.meta_data.total_uncompressed_size += header.uncompressed_page_size + header_size; + s.column_chunk.meta_data.total_compressed_size += header.compressed_page_size + header_size; +} + +template +void writeColumnImpl( + ColumnChunkWriteState & s, const WriteOptions & options, WriteBuffer & out, Converter && converter) +{ + size_t num_values = s.max_def > 0 ? s.def.size() : s.primitive_column->size(); + auto encoding = options.encoding; + + typename Converter::Statistics page_statistics; + typename Converter::Statistics total_statistics; + + /// We start with dictionary encoding, then switch to `encoding` (non-dictionary) if the + /// dictionary gets too big. That's how arrow does it too. + bool initially_used_dictionary = options.use_dictionary_encoding; + bool currently_using_dictionary = initially_used_dictionary; + + std::optional fixed_string_descr; + if constexpr (std::is_same::value) + { + /// This just communicates one number to MakeTypedEncoder(): the fixed string length. + fixed_string_descr.emplace(parquet::schema::PrimitiveNode::Make( + "", parquet::Repetition::REQUIRED, parquet::Type::FIXED_LEN_BYTE_ARRAY, + parquet::ConvertedType::NONE, static_cast(converter.fixedStringSize())), 0, 0); + + page_statistics.fixed_string_size = converter.fixedStringSize(); + } + + /// Could use an arena here (by passing a custom MemoryPool), to reuse memory across pages. + /// Alternatively, we could avoid using arrow's dictionary encoding code and leverage + /// ColumnLowCardinality instead. It would work basically the same way as what this function + /// currently does: add values to the ColumnRowCardinality (instead of `encoder`) in batches, + /// checking dictionary size after each batch; if it gets big, flush the dictionary and the + /// indices and switch to non-dictionary encoding. Feels like it could even be slightly less code. + auto encoder = parquet::MakeTypedEncoder( + // ignored if using dictionary + static_cast(encoding), + currently_using_dictionary, fixed_string_descr ? &*fixed_string_descr : nullptr); + + struct PageData + { + parq::PageHeader header; + PODArray data; + }; + std::vector dict_encoded_pages; // can't write them out until we have full dictionary + + /// Reused across pages to reduce number of allocations and improve locality. + PODArray encoded; + PODArray compressed_maybe; + + /// Start of current page. + size_t def_offset = 0; // index in def and rep + size_t data_offset = 0; // index in primitive_column + + auto flush_page = [&](size_t def_count, size_t data_count) + { + encoded.clear(); + + /// Concatenate encoded rep, def, and data. + + if (s.max_rep > 0) + encodeRepDefLevelsRLE(s.rep.data() + def_offset, def_count, s.max_rep, encoded); + if (s.max_def > 0) + encodeRepDefLevelsRLE(s.def.data() + def_offset, def_count, s.max_def, encoded); + + std::shared_ptr values = encoder->FlushValues(); // resets it for next page + + encoded.resize(encoded.size() + values->size()); + memcpy(encoded.data() + encoded.size() - values->size(), values->data(), values->size()); + values.reset(); + + if (encoded.size() > INT32_MAX) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Uncompressed page is too big: {}", encoded.size()); + + size_t uncompressed_size = encoded.size(); + auto & compressed = compress(encoded, compressed_maybe, s.compression); + + if (compressed.size() > INT32_MAX) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Compressed page is too big: {}", compressed.size()); + + parq::PageHeader header; + header.__set_type(parq::PageType::DATA_PAGE); + header.__set_uncompressed_page_size(static_cast(uncompressed_size)); + header.__set_compressed_page_size(static_cast(compressed.size())); + header.__isset.data_page_header = true; + auto & d = header.data_page_header; + d.__set_num_values(static_cast(def_count)); + d.__set_encoding(currently_using_dictionary ? parq::Encoding::RLE_DICTIONARY : encoding); + d.__set_definition_level_encoding(parq::Encoding::RLE); + d.__set_repetition_level_encoding(parq::Encoding::RLE); + /// We could also put checksum in `header.crc`, but apparently no one uses it: + /// https://issues.apache.org/jira/browse/PARQUET-594 + + if (options.write_page_statistics) + { + d.__set_statistics(page_statistics.get(options)); + + if (s.max_def == 1 && s.max_rep == 0) + d.statistics.__set_null_count(static_cast(def_count - data_count)); + } + + total_statistics.merge(page_statistics); + page_statistics.clear(); + + if (currently_using_dictionary) + { + dict_encoded_pages.push_back({.header = std::move(header)}); + std::swap(dict_encoded_pages.back().data, compressed); + } + else + { + writePage(header, compressed, s, out); + } + + def_offset += def_count; + data_offset += data_count; + }; + + auto flush_dict = [&] -> bool + { + auto * dict_encoder = dynamic_cast *>(encoder.get()); + int dict_size = dict_encoder->dict_encoded_size(); + + encoded.resize(static_cast(dict_size)); + dict_encoder->WriteDict(reinterpret_cast(encoded.data())); + + auto & compressed = compress(encoded, compressed_maybe, s.compression); + + if (compressed.size() > INT32_MAX) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Compressed dictionary page is too big: {}", compressed.size()); + + parq::PageHeader header; + header.__set_type(parq::PageType::DICTIONARY_PAGE); + header.__set_uncompressed_page_size(dict_size); + header.__set_compressed_page_size(static_cast(compressed.size())); + header.__isset.dictionary_page_header = true; + header.dictionary_page_header.__set_num_values(dict_encoder->num_entries()); + header.dictionary_page_header.__set_encoding(parq::Encoding::PLAIN); + + writePage(header, compressed, s, out); + + for (auto & p : dict_encoded_pages) + writePage(p.header, p.data, s, out); + + dict_encoded_pages.clear(); + encoder.reset(); + + return true; + }; + + auto is_dict_too_big = [&] { + auto * dict_encoder = dynamic_cast *>(encoder.get()); + int dict_size = dict_encoder->dict_encoded_size(); + return static_cast(dict_size) >= options.dictionary_size_limit; + }; + + while (def_offset < num_values) + { + /// Pick enough data for a page. + size_t next_def_offset = def_offset; + size_t next_data_offset = data_offset; + while (true) + { + /// Bite off a batch of defs and corresponding data values. + size_t def_count = std::min(options.write_batch_size, num_values - next_def_offset); + size_t data_count = 0; + if (s.max_def == 0) + data_count = def_count; + else + for (size_t i = 0; i < def_count; ++i) + data_count += s.def[next_def_offset + i] == s.max_def; + + /// Encode the data (but not the levels yet), so that we can estimate its encoded size. + const typename ParquetDType::c_type * converted = converter.getBatch(next_data_offset, data_count); + + if (options.write_page_statistics || options.write_column_chunk_statistics) + for (size_t i = 0; i < data_count; ++i) + page_statistics.add(converted[i]); + + encoder->Put(converted, static_cast(data_count)); + + next_def_offset += def_count; + next_data_offset += data_count; + + if (currently_using_dictionary && is_dict_too_big()) + { + /// Fallback to non-dictionary encoding. + flush_page(next_def_offset - def_offset, next_data_offset - data_offset); + flush_dict(); + + currently_using_dictionary = false; + encoder = parquet::MakeTypedEncoder( + static_cast(encoding)); + break; + } + + if (next_def_offset == num_values || + static_cast(encoder->EstimatedDataEncodedSize()) >= options.data_page_size) + { + flush_page(next_def_offset - def_offset, next_data_offset - data_offset); + break; + } + } + } + + if (currently_using_dictionary) + flush_dict(); + + chassert(data_offset == s.primitive_column->size()); + + if (options.write_column_chunk_statistics) + { + s.column_chunk.meta_data.__set_statistics(total_statistics.get(options)); + + if (s.max_def == 1 && s.max_rep == 0) + s.column_chunk.meta_data.statistics.__set_null_count(static_cast(def_offset - data_offset)); + } + + /// Report which encodings we've used. + if (s.max_rep > 0 || s.max_def > 0) + addToEncodingsUsed(s, parq::Encoding::RLE); // levels + if (!currently_using_dictionary) + addToEncodingsUsed(s, encoding); // non-dictionary encoding + if (initially_used_dictionary) + { + addToEncodingsUsed(s, parq::Encoding::PLAIN); // dictionary itself + addToEncodingsUsed(s, parq::Encoding::RLE_DICTIONARY); // ids + } +} + +} + +void writeColumnChunkBody(ColumnChunkWriteState & s, const WriteOptions & options, WriteBuffer & out) +{ + s.column_chunk.meta_data.__set_num_values(s.max_def > 0 ? s.def.size() : s.primitive_column->size()); + + /// We'll be updating these as we go. + s.column_chunk.meta_data.__set_encodings({}); + s.column_chunk.meta_data.__set_total_compressed_size(0); + s.column_chunk.meta_data.__set_total_uncompressed_size(0); + s.column_chunk.meta_data.__set_data_page_offset(-1); + + s.primitive_column = s.primitive_column->convertToFullColumnIfLowCardinality(); + + switch (s.primitive_column->getDataType()) + { + /// Numeric conversion to Int32 or Int64. + #define N(source_type, parquet_dtype) \ + writeColumnImpl(s, options, out, \ + ConverterNumeric, parquet::parquet_dtype::c_type>( \ + s.primitive_column)) + + case TypeIndex::UInt8 : N(UInt8 , Int32Type); break; + case TypeIndex::UInt16 : N(UInt16, Int32Type); break; + case TypeIndex::UInt32 : N(UInt32, Int32Type); break; + case TypeIndex::UInt64 : N(UInt64, Int64Type); break; + case TypeIndex::Int8 : N(Int8 , Int32Type); break; + case TypeIndex::Int16 : N(Int16 , Int32Type); break; + case TypeIndex::Int32 : N(Int32 , Int32Type); break; + case TypeIndex::Int64 : N(Int64 , Int64Type); break; + + case TypeIndex::Enum8: N(Int8 , Int32Type); break; + case TypeIndex::Enum16: N(Int16 , Int32Type); break; + case TypeIndex::Date: N(UInt16, Int32Type); break; + case TypeIndex::Date32: N(Int32 , Int32Type); break; + case TypeIndex::DateTime: N(UInt32, Int32Type); break; + + #undef N + + case TypeIndex::Float32: + writeColumnImpl( + s, options, out, ConverterNumeric, Float32, Float32>( + s.primitive_column)); + break; + + case TypeIndex::Float64: + writeColumnImpl( + s, options, out, ConverterNumeric, Float64, Float64>( + s.primitive_column)); + break; + + case TypeIndex::DateTime64: + writeColumnImpl( + s, options, out, ConverterNumeric, Int64, Int64>( + s.primitive_column)); + break; + + case TypeIndex::IPv4: + writeColumnImpl( + s, options, out, ConverterNumeric, Int32, UInt32>( + s.primitive_column)); + break; + + case TypeIndex::String: + writeColumnImpl( + s, options, out, ConverterString(s.primitive_column)); + break; + + case TypeIndex::FixedString: + if (options.output_fixed_string_as_fixed_byte_array) + writeColumnImpl( + s, options, out, ConverterFixedString(s.primitive_column)); + else + writeColumnImpl( + s, options, out, ConverterFixedStringAsString(s.primitive_column)); + break; + + #define F(source_type) \ + writeColumnImpl( \ + s, options, out, ConverterNumberAsFixedString(s.primitive_column)) + case TypeIndex::UInt128: F(UInt128); break; + case TypeIndex::UInt256: F(UInt256); break; + case TypeIndex::Int128: F(Int128); break; + case TypeIndex::Int256: F(Int256); break; + case TypeIndex::IPv6: F(IPv6); break; + #undef F + + #define D(source_type) \ + writeColumnImpl( \ + s, options, out, ConverterDecimal(s.primitive_column)) + case TypeIndex::Decimal32: D(Decimal32); break; + case TypeIndex::Decimal64: D(Decimal64); break; + case TypeIndex::Decimal128: D(Decimal128); break; + case TypeIndex::Decimal256: D(Decimal256); break; + #undef D + + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected column type: {}", s.primitive_column->getFamilyName()); + } + + /// Free some memory. + s.primitive_column = {}; + s.def = {}; + s.rep = {}; +} + +void writeFileHeader(WriteBuffer & out) +{ + /// Write the magic bytes. We're a wizard now. + out.write("PAR1", 4); +} + +parq::ColumnChunk finalizeColumnChunkAndWriteFooter( + size_t offset_in_file, ColumnChunkWriteState s, const WriteOptions &, WriteBuffer & out) +{ + if (s.column_chunk.meta_data.data_page_offset != -1) + s.column_chunk.meta_data.data_page_offset += offset_in_file; + if (s.column_chunk.meta_data.__isset.dictionary_page_offset) + s.column_chunk.meta_data.dictionary_page_offset += offset_in_file; + s.column_chunk.file_offset = offset_in_file + s.column_chunk.meta_data.total_compressed_size; + + serializeThriftStruct(s.column_chunk, out); + + return std::move(s.column_chunk); +} + +parq::RowGroup makeRowGroup(std::vector column_chunks, size_t num_rows) +{ + parq::RowGroup r; + r.__set_num_rows(num_rows); + r.__set_columns(std::move(column_chunks)); + r.__set_total_compressed_size(0); + for (auto & c : r.columns) + { + r.total_byte_size += c.meta_data.total_uncompressed_size; + r.total_compressed_size += c.meta_data.total_compressed_size; + } + if (!r.columns.empty()) + { + auto & m = r.columns[0].meta_data; + r.__set_file_offset(m.__isset.dictionary_page_offset ? m.dictionary_page_offset : m.data_page_offset); + } + return r; +} + +void writeFileFooter(std::vector row_groups, SchemaElements schema, const WriteOptions & options, WriteBuffer & out) +{ + parq::FileMetaData meta; + meta.version = 2; + meta.schema = std::move(schema); + meta.row_groups = std::move(row_groups); + for (auto & r : meta.row_groups) + meta.num_rows += r.num_rows; + meta.__set_created_by(VERSION_NAME " " VERSION_DESCRIBE); + + if (options.write_page_statistics || options.write_column_chunk_statistics) + { + meta.__set_column_orders({}); + for (auto & s : meta.schema) + if (!s.__isset.num_children) + meta.column_orders.emplace_back(); + for (auto & c : meta.column_orders) + c.__set_TYPE_ORDER({}); + } + + size_t footer_size = serializeThriftStruct(meta, out); + + if (footer_size > INT32_MAX) + throw Exception(ErrorCodes::LIMIT_EXCEEDED, "Parquet file metadata too big: {}", footer_size); + + writeIntBinary(static_cast(footer_size), out); + out.write("PAR1", 4); +} + +} diff --git a/src/Processors/Formats/Impl/Parquet/Write.h b/src/Processors/Formats/Impl/Parquet/Write.h new file mode 100644 index 00000000000..333a32e191f --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/Write.h @@ -0,0 +1,135 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB::Parquet +{ + +/// A good resource for learning how Parquet format works is +/// contrib/arrow/cpp/src/parquet/parquet.thrift + +struct WriteOptions +{ + bool output_string_as_string = false; + bool output_fixed_string_as_fixed_byte_array = true; + + CompressionMethod compression = CompressionMethod::Lz4; + + size_t data_page_size = 1024 * 1024; + size_t write_batch_size = 1024; + + bool use_dictionary_encoding = true; + size_t dictionary_size_limit = 1024 * 1024; + /// If using dictionary, this encoding is used as a fallback when dictionary gets too big. + /// Otherwise, this is used for everything. + parquet::format::Encoding::type encoding = parquet::format::Encoding::PLAIN; + + bool write_page_statistics = true; + bool write_column_chunk_statistics = true; + size_t max_statistics_size = 4096; +}; + +/// Information about a primitive column (leaf of the schema tree) to write to Parquet file. +struct ColumnChunkWriteState +{ + /// After writeColumnChunkBody(), offsets in this struct are relative to the start of column chunk. + /// Then finalizeColumnChunkAndWriteFooter() fixes them up before writing to file. + parquet::format::ColumnChunk column_chunk; + + ColumnPtr primitive_column; + CompressionMethod compression; // must match what's inside column_chunk + + /// Repetition and definition levels. Produced by prepareColumnForWrite(). + /// def is empty iff max_def == 0, which means no arrays or nullables. + /// rep is empty iff max_rep == 0, which means no arrays. + PaddedPODArray def; // definition levels + PaddedPODArray rep; // repetition levels + /// Max possible levels, according to schema. Actual max in def/rep may be smaller. + UInt8 max_def = 0; + UInt8 max_rep = 0; + + ColumnChunkWriteState() = default; + /// Prevent accidental copying. + ColumnChunkWriteState(ColumnChunkWriteState &&) = default; + ColumnChunkWriteState & operator=(ColumnChunkWriteState &&) = default; + + /// Estimated memory usage. + size_t allocatedBytes() const + { + size_t r = def.allocated_bytes() + rep.allocated_bytes(); + if (primitive_column) + r += primitive_column->allocatedBytes(); + return r; + } +}; + +using SchemaElements = std::vector; +using ColumnChunkWriteStates = std::vector; + +/// Parquet file consists of row groups, which consist of column chunks. +/// +/// Column chunks can be encoded mostly independently of each other, in parallel. +/// But there are two small complications: +/// 1. One ClickHouse column can translate to multiple leaf columns in parquet. +/// E.g. tuples and maps. +/// If all primitive columns are in one big tuple, we'd like to encode them in parallel too, +/// even though they're one top-level ClickHouse column. +/// 2. At the end of each encoded column chunk there's a footer (struct ColumnMetaData) that +/// contains some absolute offsets in the file. We can't encode it until we know the exact +/// position in the file where the column chunk will go. So these footers have to be serialized +/// sequentially, after we know sizes of all previous column chunks. +/// +/// With that in mind, here's how to write a parquet file: +/// +/// (1) writeFileHeader() +/// (2) For each row group: +/// | (3) For each ClickHouse column: +/// | (4) Call prepareColumnForWrite(). +/// | It'll produce one or more ColumnChunkWriteStates, corresponding to primitive columns that +/// | we need to write. +/// | It'll also produce SchemaElements as a byproduct, describing the logical types and +/// | groupings of the physical columns (e.g. tuples, arrays, maps). +/// | (5) For each ColumnChunkWriteState: +/// | (6) Call writeColumnChunkBody() to write the actual data to the given WriteBuffer. +/// | (7) Call finalizeColumnChunkAndWriteFooter() to write the footer of the column chunk. +/// | (8) Call makeRowGroup() using the ColumnChunk metadata structs from previous step. +/// (9) Call writeFileFooter() using the row groups from previous step and SchemaElements from +/// convertSchema(). +/// +/// Steps (4) and (6) can be parallelized, both within and across row groups. + +/// Parquet schema is a tree of SchemaElements, flattened into a list in depth-first order. +/// Leaf nodes correspond to physical columns of primitive types. Inner nodes describe logical +/// groupings of those columns, e.g. tuples or structs. +SchemaElements convertSchema(const Block & sample, const WriteOptions & options); + +void prepareColumnForWrite( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates * out_columns_to_write, SchemaElements * out_schema = nullptr); + +void writeFileHeader(WriteBuffer & out); + +/// Encodes a column chunk, without the footer. +/// The ColumnChunkWriteState-s should then passed to finalizeColumnChunkAndWriteFooter(). +void writeColumnChunkBody(ColumnChunkWriteState & s, const WriteOptions & options, WriteBuffer & out); + +/// Unlike most of the column chunk data, the footer (`ColumnMetaData`) needs to know its absolute +/// offset in the file. So we encode it separately, after all previous row groups and column chunks +/// have been encoded. +/// (If you're wondering if the 8-byte offset values can be patched inside the encoded blob - no, +/// they're varint-encoded and can't be padded to a fixed length.) +/// `offset_in_file` is the absolute position in the file where the writeColumnChunkBody()'s output +/// starts. +/// Returns a ColumnChunk to add to the RowGroup. +parquet::format::ColumnChunk finalizeColumnChunkAndWriteFooter( + size_t offset_in_file, ColumnChunkWriteState s, const WriteOptions & options, WriteBuffer & out); + +parquet::format::RowGroup makeRowGroup(std::vector column_chunks, size_t num_rows); + +void writeFileFooter(std::vector row_groups, SchemaElements schema, const WriteOptions & options, WriteBuffer & out); + +} diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 91840cd2c50..9a2d9072860 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -3,14 +3,23 @@ #if USE_PARQUET #include +#include #include #include "ArrowBufferedStreams.h" #include "CHColumnToArrowColumn.h" +namespace CurrentMetrics +{ + extern const Metric ParquetEncoderThreads; + extern const Metric ParquetEncoderThreadsActive; +} + namespace DB { +using namespace Parquet; + namespace ErrorCodes { extern const int UNKNOWN_EXCEPTION; @@ -67,11 +76,219 @@ namespace ParquetBlockOutputFormat::ParquetBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) : IOutputFormat(header_, out_), format_settings{format_settings_} { + if (format_settings.parquet.use_custom_encoder) + { + if (format_settings.parquet.parallel_encoding && format_settings.max_threads > 1) + pool = std::make_unique( + CurrentMetrics::ParquetEncoderThreads, CurrentMetrics::ParquetEncoderThreadsActive, + format_settings.max_threads); + + using C = FormatSettings::ParquetCompression; + switch (format_settings.parquet.output_compression_method) + { + case C::NONE: options.compression = CompressionMethod::None; break; + case C::SNAPPY: options.compression = CompressionMethod::Snappy; break; + case C::ZSTD: options.compression = CompressionMethod::Zstd; break; + case C::LZ4: options.compression = CompressionMethod::Lz4; break; + case C::GZIP: options.compression = CompressionMethod::Gzip; break; + case C::BROTLI: options.compression = CompressionMethod::Brotli; break; + } + options.output_string_as_string = format_settings.parquet.output_string_as_string; + options.output_fixed_string_as_fixed_byte_array = format_settings.parquet.output_fixed_string_as_fixed_byte_array; + options.data_page_size = format_settings.parquet.data_page_size; + options.write_batch_size = format_settings.parquet.write_batch_size; + + schema = convertSchema(header_, options); + } } -void ParquetBlockOutputFormat::consumeStaged() +ParquetBlockOutputFormat::~ParquetBlockOutputFormat() { - const size_t columns_num = staging_chunks.at(0).getNumColumns(); + if (pool) + { + is_stopped = true; + pool->wait(); + } +} + +void ParquetBlockOutputFormat::consume(Chunk chunk) +{ + /// Poll background tasks. + if (pool) + { + std::unique_lock lock(mutex); + while (true) + { + /// If some row groups are ready to be written to the file, write them. + reapCompletedRowGroups(lock); + + if (background_exception) + std::rethrow_exception(background_exception); + + if (is_stopped) + return; + + /// If there's too much work in flight, wait for some of it to complete. + if (row_groups.size() < 2) + break; + if (bytes_in_flight <= format_settings.parquet.row_group_bytes * 4 && + task_queue.size() <= format_settings.max_threads * 4) + break; + + condvar.wait(lock); + } + } + + /// Do something like SquashingTransform to produce big enough row groups. + /// Because the real SquashingTransform is only used for INSERT, not for SELECT ... INTO OUTFILE. + /// The latter doesn't even have a pipeline where a transform could be inserted, so it's more + /// convenient to do the squashing here. It's also parallelized here. + + if (chunk.getNumRows() != 0) + { + staging_rows += chunk.getNumRows(); + staging_bytes += chunk.bytes(); + staging_chunks.push_back(std::move(chunk)); + } + + const size_t target_rows = std::max(static_cast(1), format_settings.parquet.row_group_rows); + + if (staging_rows < target_rows && + staging_bytes < format_settings.parquet.row_group_bytes) + return; + + /// In the rare case that more than `row_group_rows` rows arrived in one chunk, split the + /// staging chunk into multiple row groups. + if (staging_rows >= target_rows * 2) + { + /// Increase row group size slightly (by < 2x) to avoid a small row group at the end. + size_t num_row_groups = std::max(static_cast(1), staging_rows / target_rows); + size_t row_group_size = (staging_rows - 1) / num_row_groups + 1; // round up + + Chunk concatenated = std::move(staging_chunks[0]); + for (size_t i = 1; i < staging_chunks.size(); ++i) + concatenated.append(staging_chunks[i]); + staging_chunks.clear(); + + for (size_t offset = 0; offset < staging_rows; offset += row_group_size) + { + size_t count = std::min(row_group_size, staging_rows - offset); + MutableColumns columns = concatenated.cloneEmptyColumns(); + for (size_t i = 0; i < columns.size(); ++i) + columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count); + + Chunks piece; + piece.emplace_back(std::move(columns), count, concatenated.getChunkInfo()); + writeRowGroup(std::move(piece)); + } + } + else + { + writeRowGroup(std::move(staging_chunks)); + } + + staging_chunks.clear(); + staging_rows = 0; + staging_bytes = 0; +} + +void ParquetBlockOutputFormat::finalizeImpl() +{ + if (!staging_chunks.empty()) + writeRowGroup(std::move(staging_chunks)); + + if (format_settings.parquet.use_custom_encoder) + { + if (pool) + { + std::unique_lock lock(mutex); + + /// Wait for background work to complete. + while (true) + { + reapCompletedRowGroups(lock); + + if (background_exception) + std::rethrow_exception(background_exception); + + if (is_stopped) + return; + + if (row_groups.empty()) + break; + + condvar.wait(lock); + } + } + + if (row_groups_complete.empty()) + writeFileHeader(out); + writeFileFooter(std::move(row_groups_complete), schema, options, out); + } + else + { + if (!file_writer) + { + Block header = materializeBlock(getPort(PortKind::Main).getHeader()); + std::vector chunks; + chunks.push_back(Chunk(header.getColumns(), 0)); + writeRowGroup(std::move(chunks)); + } + + if (file_writer) + { + auto status = file_writer->Close(); + if (!status.ok()) + throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while closing a table: {}", status.ToString()); + } + } +} + +void ParquetBlockOutputFormat::resetFormatterImpl() +{ + if (pool) + { + is_stopped = true; + pool->wait(); + is_stopped = false; + } + + background_exception = nullptr; + threads_running = 0; + task_queue.clear(); + row_groups.clear(); + file_writer.reset(); + row_groups_complete.clear(); + staging_chunks.clear(); + staging_rows = 0; + staging_bytes = 0; +} + +void ParquetBlockOutputFormat::onCancel() +{ + is_stopped = true; +} + +void ParquetBlockOutputFormat::writeRowGroup(std::vector chunks) +{ + if (pool) + writeRowGroupInParallel(std::move(chunks)); + else if (!format_settings.parquet.use_custom_encoder) + writeUsingArrow(std::move(chunks)); + else + { + Chunk concatenated = std::move(chunks[0]); + for (size_t i = 1; i < chunks.size(); ++i) + concatenated.append(chunks[i]); + chunks.clear(); + + writeRowGroupInOneThread(std::move(concatenated)); + } +} + +void ParquetBlockOutputFormat::writeUsingArrow(std::vector chunks) +{ + const size_t columns_num = chunks.at(0).getNumColumns(); std::shared_ptr arrow_table; if (!ch_column_to_arrow_column) @@ -85,7 +302,7 @@ void ParquetBlockOutputFormat::consumeStaged() format_settings.parquet.output_fixed_string_as_fixed_byte_array); } - ch_column_to_arrow_column->chChunkToArrowTable(arrow_table, staging_chunks, columns_num); + ch_column_to_arrow_column->chChunkToArrowTable(arrow_table, chunks, columns_num); if (!file_writer) { @@ -112,64 +329,228 @@ void ParquetBlockOutputFormat::consumeStaged() file_writer = std::move(result.ValueOrDie()); } - // TODO: calculate row_group_size depending on a number of rows and table size - - // allow slightly bigger than row_group_size to avoid a very small tail row group - auto status = file_writer->WriteTable(*arrow_table, std::max(format_settings.parquet.row_group_rows, staging_rows)); + auto status = file_writer->WriteTable(*arrow_table, INT64_MAX); if (!status.ok()) throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while writing a table: {}", status.ToString()); } -void ParquetBlockOutputFormat::consume(Chunk chunk) +void ParquetBlockOutputFormat::writeRowGroupInOneThread(Chunk chunk) { - /// Do something like SquashingTransform to produce big enough row groups. - /// Because the real SquashingTransform is only used for INSERT, not for SELECT ... INTO OUTFILE. - /// The latter doesn't even have a pipeline where a transform could be inserted, so it's more - /// convenient to do the squashing here. - staging_rows += chunk.getNumRows(); - staging_bytes += chunk.bytes(); - staging_chunks.push_back(std::move(chunk)); - chassert(staging_chunks.back().getNumColumns() == staging_chunks.front().getNumColumns()); - if (staging_rows < format_settings.parquet.row_group_rows && - staging_bytes < format_settings.parquet.row_group_bytes) - { + if (chunk.getNumRows() == 0) return; - } - else + + const Block & header = getPort(PortKind::Main).getHeader(); + Parquet::ColumnChunkWriteStates columns_to_write; + chassert(header.columns() == chunk.getNumColumns()); + for (size_t i = 0; i < header.columns(); ++i) + prepareColumnForWrite( + chunk.getColumns()[i], header.getByPosition(i).type, header.getByPosition(i).name, + options, &columns_to_write); + + if (row_groups_complete.empty()) + writeFileHeader(out); + + std::vector column_chunks; + for (auto & s : columns_to_write) { - consumeStaged(); - staging_chunks.clear(); - staging_rows = 0; - staging_bytes = 0; + size_t offset = out.count(); + writeColumnChunkBody(s, options, out); + auto c = finalizeColumnChunkAndWriteFooter(offset, std::move(s), options, out); + column_chunks.push_back(std::move(c)); + } + + auto r = makeRowGroup(std::move(column_chunks), chunk.getNumRows()); + row_groups_complete.push_back(std::move(r)); +} + +void ParquetBlockOutputFormat::writeRowGroupInParallel(std::vector chunks) +{ + std::unique_lock lock(mutex); + + const Block & header = getPort(PortKind::Main).getHeader(); + + RowGroupState & r = row_groups.emplace_back(); + r.column_chunks.resize(header.columns()); + r.tasks_in_flight = r.column_chunks.size(); + + std::vector columnses; + for (auto & chunk : chunks) + { + chassert(header.columns() == chunk.getNumColumns()); + r.num_rows += chunk.getNumRows(); + columnses.push_back(chunk.detachColumns()); + } + + for (size_t i = 0; i < header.columns(); ++i) + { + Task & t = task_queue.emplace_back(&r, i, this); + t.column_type = header.getByPosition(i).type; + t.column_name = header.getByPosition(i).name; + + /// Defer concatenating the columns to the threads. + size_t bytes = 0; + for (size_t j = 0; j < chunks.size(); ++j) + { + auto & col = columnses[j][i]; + bytes += col->allocatedBytes(); + t.column_pieces.push_back(std::move(col)); + } + t.mem.set(bytes); + } + + startMoreThreadsIfNeeded(lock); +} + +void ParquetBlockOutputFormat::reapCompletedRowGroups(std::unique_lock & lock) +{ + while (!row_groups.empty() && row_groups.front().tasks_in_flight == 0 && !is_stopped) + { + RowGroupState & r = row_groups.front(); + + /// Write to the file. + + lock.unlock(); + + if (row_groups_complete.empty()) + writeFileHeader(out); + + std::vector metadata; + for (auto & cols : r.column_chunks) + { + for (ColumnChunk & col : cols) + { + size_t offset = out.count(); + + out.write(col.serialized.data(), col.serialized.size()); + auto m = finalizeColumnChunkAndWriteFooter(offset, std::move(col.state), options, out); + + metadata.push_back(std::move(m)); + } + } + + row_groups_complete.push_back(makeRowGroup(std::move(metadata), r.num_rows)); + + lock.lock(); + + row_groups.pop_front(); } } -void ParquetBlockOutputFormat::finalizeImpl() +void ParquetBlockOutputFormat::startMoreThreadsIfNeeded(const std::unique_lock &) { - if (!file_writer && staging_chunks.empty()) + /// Speculate that all current are already working on tasks. + size_t to_add = std::min(task_queue.size(), format_settings.max_threads - threads_running); + for (size_t i = 0; i < to_add; ++i) { - Block header = materializeBlock(getPort(PortKind::Main).getHeader()); + auto job = [this, thread_group = CurrentThread::getGroup()]() + { + if (thread_group) + CurrentThread::attachToGroupIfDetached(thread_group); + SCOPE_EXIT_SAFE(if (thread_group) CurrentThread::detachFromGroupIfNotDetached();); - consume(Chunk(header.getColumns(), 0)); // this will make staging_chunks non-empty + try + { + setThreadName("ParquetEncoder"); + + threadFunction(); + } + catch (...) + { + std::lock_guard lock(mutex); + background_exception = std::current_exception(); + condvar.notify_all(); + --threads_running; + } + }; + + if (threads_running == 0) + { + /// First thread. We need it to succeed; otherwise we may get stuck. + pool->scheduleOrThrowOnError(job); + ++threads_running; + } + else + { + /// More threads. This may be called from inside the thread pool, so avoid waiting; + /// otherwise it may deadlock. + if (!pool->trySchedule(job)) + break; + } } - - if (!staging_chunks.empty()) - { - consumeStaged(); - staging_chunks.clear(); - staging_rows = 0; - staging_bytes = 0; - } - - auto status = file_writer->Close(); - if (!status.ok()) - throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while closing a table: {}", status.ToString()); } -void ParquetBlockOutputFormat::resetFormatterImpl() +void ParquetBlockOutputFormat::threadFunction() { - file_writer.reset(); + std::unique_lock lock(mutex); + + while (true) + { + if (task_queue.empty() || is_stopped) + { + /// The check and the decrement need to be in the same critical section, to make sure + /// we never get stuck with tasks but no threads. + --threads_running; + return; + } + + auto task = std::move(task_queue.front()); + task_queue.pop_front(); + + if (task.column_type) + { + lock.unlock(); + + IColumn::MutablePtr concatenated = IColumn::mutate(std::move(task.column_pieces[0])); + for (size_t i = 1; i < task.column_pieces.size(); ++i) + { + auto & c = task.column_pieces[i]; + concatenated->insertRangeFrom(*c, 0, c->size()); + c.reset(); + } + task.column_pieces.clear(); + + std::vector subcolumns; + prepareColumnForWrite( + std::move(concatenated), task.column_type, task.column_name, options, &subcolumns); + + lock.lock(); + + for (size_t i = 0; i < subcolumns.size(); ++i) + { + task.row_group->column_chunks[task.column_idx].emplace_back(this); + task.row_group->tasks_in_flight += 1; + + auto & t = task_queue.emplace_back(task.row_group, task.column_idx, this); + t.subcolumn_idx = i; + t.state = std::move(subcolumns[i]); + t.mem.set(t.state.allocatedBytes()); + } + + startMoreThreadsIfNeeded(lock); + } + else + { + lock.unlock(); + + PODArray serialized; + { + WriteBufferFromVector buf(serialized); + writeColumnChunkBody(task.state, options, buf); + } + + lock.lock(); + + auto & c = task.row_group->column_chunks[task.column_idx][task.subcolumn_idx]; + c.state = std::move(task.state); + c.serialized = std::move(serialized); + c.mem.set(c.serialized.size() + c.state.allocatedBytes()); + } + + --task.row_group->tasks_in_flight; + + condvar.notify_all(); + } } void registerOutputFormatParquet(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h index 482c778bc52..4c73de007fe 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h @@ -2,8 +2,11 @@ #include "config.h" #if USE_PARQUET -# include -# include + +#include +#include +#include +#include namespace arrow { @@ -28,25 +31,128 @@ class ParquetBlockOutputFormat : public IOutputFormat { public: ParquetBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); + ~ParquetBlockOutputFormat() override; String getName() const override { return "ParquetBlockOutputFormat"; } String getContentType() const override { return "application/octet-stream"; } private: - void consumeStaged(); + struct MemoryToken + { + ParquetBlockOutputFormat * parent; + size_t bytes = 0; + + explicit MemoryToken(ParquetBlockOutputFormat * p, size_t b = 0) : parent(p) + { + set(b); + } + + MemoryToken(MemoryToken && t) + : parent(std::exchange(t.parent, nullptr)), bytes(std::exchange(t.bytes, 0)) {} + + MemoryToken & operator=(MemoryToken && t) + { + parent = std::exchange(t.parent, nullptr); + bytes = std::exchange(t.bytes, 0); + return *this; + } + + ~MemoryToken() + { + set(0); + } + + void set(size_t new_size) + { + if (new_size == bytes) + return; + parent->bytes_in_flight += new_size - bytes; // overflow is fine + bytes = new_size; + } + }; + + struct ColumnChunk + { + Parquet::ColumnChunkWriteState state; + PODArray serialized; + + MemoryToken mem; + + ColumnChunk(ParquetBlockOutputFormat * p) : mem(p) {} + }; + + struct RowGroupState + { + size_t tasks_in_flight = 0; + std::vector> column_chunks; + size_t num_rows = 0; + }; + + struct Task + { + RowGroupState * row_group; + size_t column_idx; + size_t subcolumn_idx = 0; + + MemoryToken mem; + + /// If not null, we need to call prepareColumnForWrite(). + /// Otherwise we need to call writeColumnChunkBody(). + DataTypePtr column_type; + std::string column_name; + std::vector column_pieces; + + Parquet::ColumnChunkWriteState state; + + Task(RowGroupState * rg, size_t ci, ParquetBlockOutputFormat * p) + : row_group(rg), column_idx(ci), mem(p) {} + }; + void consume(Chunk) override; void finalizeImpl() override; void resetFormatterImpl() override; + void onCancel() override; + void writeRowGroup(std::vector chunks); + void writeUsingArrow(std::vector chunks); + void writeRowGroupInOneThread(Chunk chunk); + void writeRowGroupInParallel(std::vector chunks); + + void threadFunction(); + void startMoreThreadsIfNeeded(const std::unique_lock & lock); + + /// Called in single-threaded fashion. Writes to the file. + void reapCompletedRowGroups(std::unique_lock & lock); + + const FormatSettings format_settings; + + /// Chunks to squash together to form a row group. std::vector staging_chunks; size_t staging_rows = 0; size_t staging_bytes = 0; - const FormatSettings format_settings; - std::unique_ptr file_writer; std::unique_ptr ch_column_to_arrow_column; + + Parquet::WriteOptions options; + Parquet::SchemaElements schema; + std::vector row_groups_complete; + + + std::mutex mutex; + std::condition_variable condvar; // wakes up consume() + std::unique_ptr pool; + + std::atomic_bool is_stopped{false}; + std::exception_ptr background_exception = nullptr; + + /// Invariant: if there's at least one task then there's at least one thread. + size_t threads_running = 0; + std::atomic bytes_in_flight{0}; + + std::deque task_queue; + std::deque row_groups; }; } diff --git a/tests/queries/0_stateless/02735_parquet_encoder.reference b/tests/queries/0_stateless/02735_parquet_encoder.reference new file mode 100644 index 00000000000..c7d79392d85 --- /dev/null +++ b/tests/queries/0_stateless/02735_parquet_encoder.reference @@ -0,0 +1,55 @@ +u8 Nullable(UInt8) +u16 Nullable(UInt16) +u32 Nullable(UInt32) +u64 Nullable(UInt64) +i8 Nullable(Int8) +i16 Nullable(Int16) +i32 Nullable(Int32) +i64 Nullable(Int64) +date Nullable(UInt16) +date32 Nullable(Date32) +datetime Nullable(UInt32) +datetime64 Nullable(DateTime64(3, \'UTC\')) +enum8 Nullable(Int8) +enum16 Nullable(Int16) +float32 Nullable(Float32) +float64 Nullable(Float64) +str Nullable(String) +fstr Nullable(FixedString(12)) +u128 Nullable(FixedString(16)) +u256 Nullable(FixedString(32)) +i128 Nullable(FixedString(16)) +i256 Nullable(FixedString(32)) +decimal32 Nullable(Decimal(9, 3)) +decimal64 Nullable(Decimal(18, 10)) +decimal128 Nullable(Decimal(38, 20)) +decimal256 Nullable(Decimal(76, 40)) +ipv4 Nullable(UInt32) +ipv6 Nullable(FixedString(16)) +0 +0 +0 +0 +1 2 1 +1 2 2 +1 3 3 +1 1000000 1 +3914219105369203805 +4 1000000 1 +(1000000,0,NULL,'100','299') +(1000000,0,NULL,'0','-1294970296') +(1000000,0,NULL,'-2147483296','2147481000') +(100000,900000,NULL,'100009','999999') +[(2,0,NULL,'','[]')] +1 1 +0 1 +16159458007063698496 +16159458007063698496 +BYTE_ARRAY String +FIXED_LEN_BYTE_ARRAY None +BYTE_ARRAY None +BYTE_ARRAY None +BYTE_ARRAY String +never gonna +give you +up diff --git a/tests/queries/0_stateless/02735_parquet_encoder.sql b/tests/queries/0_stateless/02735_parquet_encoder.sql new file mode 100644 index 00000000000..d8d52a13218 --- /dev/null +++ b/tests/queries/0_stateless/02735_parquet_encoder.sql @@ -0,0 +1,168 @@ +-- Tags: no-fasttest + +set output_format_parquet_use_custom_encoder = 1; +set output_format_parquet_row_group_size = 1000; +set output_format_parquet_data_page_size = 800; +set output_format_parquet_batch_size = 100; +set output_format_parquet_row_group_size_bytes = 1000000000; +set engine_file_truncate_on_insert=1; + +-- Write random data to parquet file, then read from it and check that it matches what we wrote. +-- Do this for all kinds of data types: primitive, Nullable(primitive), Array(primitive), +-- Array(Nullable(primitive)), Array(Array(primitive)), Map(primitive, primitive), etc. + +drop table if exists basic_types_02735; +create temporary table basic_types_02735 as select * from generateRandom(' + u8 UInt8, + u16 UInt16, + u32 UInt32, + u64 UInt64, + i8 Int8, + i16 Int16, + i32 Int32, + i64 Int64, + date Date, + date32 Date32, + datetime DateTime, + datetime64 DateTime64, + enum8 Enum8(''x'' = 1, ''y'' = 2, ''z'' = 3), + enum16 Enum16(''xx'' = 1000, ''yy'' = 2000, ''zz'' = 3000), + float32 Float32, + float64 Float64, + str String, + fstr FixedString(12), + u128 UInt128, + u256 UInt256, + i128 Int128, + i256 Int256, + decimal32 Decimal32(3), + decimal64 Decimal64(10), + decimal128 Decimal128(20), + decimal256 Decimal256(40), + ipv4 IPv4, + ipv6 IPv6') limit 10101; +insert into function file(basic_types_02735.parquet) select * from basic_types_02735; +desc file(basic_types_02735.parquet); +select (select sum(cityHash64(*)) from basic_types_02735) - (select sum(cityHash64(*)) from file(basic_types_02735.parquet)); +drop table basic_types_02735; + + +drop table if exists nullables_02735; +create temporary table nullables_02735 as select * from generateRandom(' + u16 Nullable(UInt16), + i64 Nullable(Int64), + datetime64 Nullable(DateTime64), + enum8 Nullable(Enum8(''x'' = 1, ''y'' = 2, ''z'' = 3)), + float64 Nullable(Float64), + str Nullable(String), + fstr Nullable(FixedString(12)), + i256 Nullable(Int256), + decimal256 Nullable(Decimal256(40)), + ipv6 Nullable(IPv6)') limit 10000; +insert into function file(nullables_02735.parquet) select * from nullables_02735; +select (select sum(cityHash64(*)) from nullables_02735) - (select sum(cityHash64(*)) from file(nullables_02735.parquet)); +drop table nullables_02735; + + +-- TODO: When cityHash64() fully supports Nullable: https://github.com/ClickHouse/ClickHouse/pull/48625 +-- the next two blocks can be simplified: arrays_out_02735 intermediate table is not needed, +-- a.csv and b.csv are not needed. + +drop table if exists arrays_02735; +drop table if exists arrays_out_02735; +create table arrays_02735 engine = Memory as select * from generateRandom(' + u32 Array(UInt32), + i8 Array(Int8), + datetime Array(DateTime), + enum16 Array(Enum16(''xx'' = 1000, ''yy'' = 2000, ''zz'' = 3000)), + float32 Array(Float32), + str Array(String), + fstr Array(FixedString(12)), + u128 Array(UInt128), + decimal64 Array(Decimal64(10)), + ipv4 Array(IPv4), + msi Map(String, Int16), + tup Tuple(FixedString(3), Array(String), Map(Int8, Date))') limit 10000; +insert into function file(arrays_02735.parquet) select * from arrays_02735; +create temporary table arrays_out_02735 as arrays_02735; +insert into arrays_out_02735 select * from file(arrays_02735.parquet); +select (select sum(cityHash64(*)) from arrays_02735) - (select sum(cityHash64(*)) from arrays_out_02735); +--select (select sum(cityHash64(*)) from arrays_02735) - +-- (select sum(cityHash64(u32, i8, datetime, enum16, float32, str, fstr, arrayMap(x->reinterpret(x, 'UInt128'), u128), decimal64, ipv4, msi, tup)) from file(arrays_02735.parquet)); +drop table arrays_02735; +drop table arrays_out_02735; + + +drop table if exists madness_02735; +create temporary table madness_02735 as select * from generateRandom(' + aa Array(Array(UInt32)), + aaa Array(Array(Array(UInt32))), + an Array(Nullable(String)), + aan Array(Array(Nullable(FixedString(10)))), + l LowCardinality(String), + ln LowCardinality(Nullable(FixedString(11))), + al Array(LowCardinality(UInt128)), + aaln Array(Array(LowCardinality(Nullable(String)))), + mln Map(LowCardinality(String), Nullable(Int8)), + t Tuple(Map(FixedString(5), Tuple(Array(UInt16), Nullable(UInt16), Array(Tuple(Int8, Decimal64(10))))), Tuple(kitchen UInt64, sink String)), + n Nested(hello UInt64, world Tuple(first String, second FixedString(1))) + ') limit 10000; +insert into function file(madness_02735.parquet) select * from madness_02735; +insert into function file(a.csv) select * from madness_02735 order by tuple(*); +insert into function file(b.csv) select aa, aaa, an, aan, l, ln, arrayMap(x->reinterpret(x, 'UInt128'), al) as al_, aaln, mln, t, n.hello, n.world from file(madness_02735.parquet) order by tuple(aa, aaa, an, aan, l, ln, al_, aaln, mln, t, n.hello, n.world); +select (select sum(cityHash64(*)) from file(a.csv, LineAsString)) - (select sum(cityHash64(*)) from file(b.csv, LineAsString)); +--select (select sum(cityHash64(*)) from madness_02735) - +-- (select sum(cityHash64(aa, aaa, an, aan, l, ln, map(x->reinterpret(x, 'UInt128'), al), aaln, mln, t, n.hello, n.world)) from file(madness_02735.parquet)); +drop table madness_02735; + + +-- Merging input blocks into bigger row groups. +insert into function file(squash_02735.parquet) select '012345' union all select '543210' settings max_block_size = 1; +select num_columns, num_rows, num_row_groups from file(squash_02735.parquet, ParquetMetadata); + +-- Row group size limit in bytes. +insert into function file(row_group_bytes_02735.parquet) select '012345' union all select '543210' settings max_block_size = 1, output_format_parquet_row_group_size_bytes = 5; +select num_columns, num_rows, num_row_groups from file(row_group_bytes_02735.parquet, ParquetMetadata); + +-- Row group size limit in rows. +insert into function file(tiny_row_groups_02735.parquet) select * from numbers(3) settings output_format_parquet_row_group_size = 1; +select num_columns, num_rows, num_row_groups from file(tiny_row_groups_02735.parquet, ParquetMetadata); + +-- 1M unique 8-byte values should exceed dictionary_size_limit (1 MB). +insert into function file(big_column_chunk_02735.parquet) select number from numbers(1000000) settings output_format_parquet_row_group_size = 1000000; +select num_columns, num_rows, num_row_groups from file(big_column_chunk_02735.parquet, ParquetMetadata); +select sum(cityHash64(number)) from file(big_column_chunk_02735.parquet); + +-- Check statistics: signed vs unsigned, null count. Use enough rows to produce multiple pages. +insert into function file(statistics_02735.parquet) select 100 + number%200 as a, toUInt32(number * 3000) as u, toInt32(number * 3000) as i, if(number % 10 == 9, toString(number), null) as s from numbers(1000000) settings output_format_parquet_row_group_size = 1000000; +select num_columns, num_rows, num_row_groups from file(statistics_02735.parquet, ParquetMetadata); +select tupleElement(c, 'statistics') from file(statistics_02735.parquet, ParquetMetadata) array join tupleElement(row_groups[1], 'columns') as c; + +-- Statistics string length limit (max_statistics_size). +insert into function file(long_string_02735.parquet) select toString(range(number * 2000)) from numbers(2); +select tupleElement(tupleElement(row_groups[1], 'columns'), 'statistics') from file(long_string_02735.parquet, ParquetMetadata); + +-- Compression setting. +insert into function file(compressed_02735.parquet) select concat('aaaaaaaaaaaaaaaa', toString(number)) as s from numbers(1000) settings output_format_parquet_row_group_size = 10000, output_format_parquet_compression_method='zstd'; +select total_compressed_size < 10000, total_uncompressed_size > 15000 from file(compressed_02735.parquet, ParquetMetadata); +insert into function file(compressed_02735.parquet) select concat('aaaaaaaaaaaaaaaa', toString(number)) as s from numbers(1000) settings output_format_parquet_row_group_size = 10000, output_format_parquet_compression_method='none'; +select total_compressed_size < 10000, total_uncompressed_size > 15000 from file(compressed_02735.parquet, ParquetMetadata); + +-- Single-threaded encoding and Arrow encoder. +drop table if exists other_encoders_02735; +create temporary table other_encoders_02735 as select number, number*2 from numbers(10000); +insert into function file(single_thread_02735.parquet) select * from other_encoders_02735 settings max_threads = 1; +select sum(cityHash64(*)) from file(single_thread_02735.parquet); +insert into function file(arrow_02735.parquet) select * from other_encoders_02735 settings output_format_parquet_use_custom_encoder = 0; +select sum(cityHash64(*)) from file(arrow_02735.parquet); + +-- String -> binary vs string; FixedString -> fixed-length-binary vs binary vs string. +insert into function file(strings1_02735.parquet) select 'never', toFixedString('gonna', 5) settings output_format_parquet_string_as_string = 1, output_format_parquet_fixed_string_as_fixed_byte_array = 1; +select columns.5, columns.6 from file(strings1_02735.parquet, ParquetMetadata) array join columns; +insert into function file(strings2_02735.parquet) select 'give', toFixedString('you', 3) settings output_format_parquet_string_as_string = 0, output_format_parquet_fixed_string_as_fixed_byte_array = 0; +select columns.5, columns.6 from file(strings2_02735.parquet, ParquetMetadata) array join columns; +insert into function file(strings3_02735.parquet) select toFixedString('up', 2) settings output_format_parquet_string_as_string = 1, output_format_parquet_fixed_string_as_fixed_byte_array = 0; +select columns.5, columns.6 from file(strings3_02735.parquet, ParquetMetadata) array join columns; +select * from file(strings1_02735.parquet); +select * from file(strings2_02735.parquet); +select * from file(strings3_02735.parquet); From db5cb960508fc20ff7127aa092b89e6002c9f503 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Wed, 17 May 2023 01:56:00 +0000 Subject: [PATCH 1968/1997] Start over when falling back to non-dictionary encoding --- src/Processors/Formats/Impl/Parquet/Write.cpp | 41 +++++++++++-------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp index a29bb81f8dc..ba67f075a0d 100644 --- a/src/Processors/Formats/Impl/Parquet/Write.cpp +++ b/src/Processors/Formats/Impl/Parquet/Write.cpp @@ -421,10 +421,7 @@ void writeColumnImpl( typename Converter::Statistics page_statistics; typename Converter::Statistics total_statistics; - /// We start with dictionary encoding, then switch to `encoding` (non-dictionary) if the - /// dictionary gets too big. That's how arrow does it too. - bool initially_used_dictionary = options.use_dictionary_encoding; - bool currently_using_dictionary = initially_used_dictionary; + bool use_dictionary = options.use_dictionary_encoding; std::optional fixed_string_descr; if constexpr (std::is_same::value) @@ -441,12 +438,11 @@ void writeColumnImpl( /// Alternatively, we could avoid using arrow's dictionary encoding code and leverage /// ColumnLowCardinality instead. It would work basically the same way as what this function /// currently does: add values to the ColumnRowCardinality (instead of `encoder`) in batches, - /// checking dictionary size after each batch; if it gets big, flush the dictionary and the - /// indices and switch to non-dictionary encoding. Feels like it could even be slightly less code. + /// checking dictionary size after each batch. That might be faster. auto encoder = parquet::MakeTypedEncoder( // ignored if using dictionary static_cast(encoding), - currently_using_dictionary, fixed_string_descr ? &*fixed_string_descr : nullptr); + use_dictionary, fixed_string_descr ? &*fixed_string_descr : nullptr); struct PageData { @@ -496,7 +492,7 @@ void writeColumnImpl( header.__isset.data_page_header = true; auto & d = header.data_page_header; d.__set_num_values(static_cast(def_count)); - d.__set_encoding(currently_using_dictionary ? parq::Encoding::RLE_DICTIONARY : encoding); + d.__set_encoding(use_dictionary ? parq::Encoding::RLE_DICTIONARY : encoding); d.__set_definition_level_encoding(parq::Encoding::RLE); d.__set_repetition_level_encoding(parq::Encoding::RLE); /// We could also put checksum in `header.crc`, but apparently no one uses it: @@ -513,7 +509,7 @@ void writeColumnImpl( total_statistics.merge(page_statistics); page_statistics.clear(); - if (currently_using_dictionary) + if (use_dictionary) { dict_encoded_pages.push_back({.header = std::move(header)}); std::swap(dict_encoded_pages.back().data, compressed); @@ -593,13 +589,22 @@ void writeColumnImpl( next_def_offset += def_count; next_data_offset += data_count; - if (currently_using_dictionary && is_dict_too_big()) + if (use_dictionary && is_dict_too_big()) { /// Fallback to non-dictionary encoding. - flush_page(next_def_offset - def_offset, next_data_offset - data_offset); - flush_dict(); + /// + /// Discard encoded data and start over. + /// This is different from what arrow does: arrow writes out the dictionary-encoded + /// data, then uses non-dictionary encoding for later pages. + /// Starting over seems better: it produces slightly smaller files (I saw 1-4%) in + /// exchange for slight decrease in speed (I saw < 5%). This seems like a good + /// trade because encoding speed is much less important than decoding (as evidenced + /// by arrow not supporting parallel encoding, even though it's easy to support). - currently_using_dictionary = false; + def_offset = 0; + data_offset = 0; + dict_encoded_pages.clear(); + use_dictionary = false; encoder = parquet::MakeTypedEncoder( static_cast(encoding)); break; @@ -614,7 +619,7 @@ void writeColumnImpl( } } - if (currently_using_dictionary) + if (use_dictionary) flush_dict(); chassert(data_offset == s.primitive_column->size()); @@ -630,13 +635,15 @@ void writeColumnImpl( /// Report which encodings we've used. if (s.max_rep > 0 || s.max_def > 0) addToEncodingsUsed(s, parq::Encoding::RLE); // levels - if (!currently_using_dictionary) - addToEncodingsUsed(s, encoding); // non-dictionary encoding - if (initially_used_dictionary) + if (use_dictionary) { addToEncodingsUsed(s, parq::Encoding::PLAIN); // dictionary itself addToEncodingsUsed(s, parq::Encoding::RLE_DICTIONARY); // ids } + else + { + addToEncodingsUsed(s, encoding); + } } } From dfdf5de972b0b8ee37fd0e89cfeaa8c3f5ea79cf Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Tue, 30 May 2023 01:28:16 +0000 Subject: [PATCH 1969/1997] Fixes --- contrib/arrow-cmake/CMakeLists.txt | 5 +- programs/client/Client.cpp | 5 + .../Formats/Impl/Parquet/PrepareForWrite.cpp | 16 ++- src/Processors/Formats/Impl/Parquet/Write.cpp | 120 +++++++++++++++--- src/Processors/Formats/Impl/Parquet/Write.h | 1 + .../Formats/Impl/ParquetBlockInputFormat.cpp | 7 +- .../Formats/Impl/ParquetBlockOutputFormat.cpp | 18 ++- .../Formats/Impl/ParquetBlockOutputFormat.h | 1 + .../02581_parquet_arrow_orc_compressions.sh | 2 + .../0_stateless/02735_parquet_encoder.sql | 2 +- 10 files changed, 147 insertions(+), 30 deletions(-) diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index 46b86cb4ddb..e3ea0381595 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -502,9 +502,10 @@ target_include_directories(_parquet SYSTEM BEFORE "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src" "${CMAKE_CURRENT_SOURCE_DIR}/cpp/src") target_link_libraries(_parquet - PUBLIC _arrow - PRIVATE + PUBLIC + _arrow ch_contrib::thrift + PRIVATE boost::headers_only boost::regex OpenSSL::Crypto OpenSSL::SSL) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index e1a33231592..e73f77819ad 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -812,6 +812,11 @@ bool Client::processWithFuzzing(const String & full_query) } catch (...) { + if (!ast_to_process) + fmt::print(stderr, + "Error while forming new query: {}\n", + getCurrentExceptionMessage(true)); + // Some functions (e.g. protocol parsers) don't throw, but // set last_exception instead, so we'll also do it here for // uniformity. diff --git a/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp index a70b6fcfc81..0700fc8491c 100644 --- a/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp +++ b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp @@ -295,7 +295,17 @@ void preparePrimitiveColumn(ColumnPtr column, DataTypePtr type, const std::strin switch (type->getTypeId()) { - case TypeIndex::UInt8: types(T::INT32, C::UINT_8 , int_type(8 , false)); break; + case TypeIndex::UInt8: + if (isBool(type)) + { + types(T::BOOLEAN); + state.is_bool = true; + } + else + { + types(T::INT32, C::UINT_8 , int_type(8 , false)); + } + break; case TypeIndex::UInt16: types(T::INT32, C::UINT_16, int_type(16, false)); break; case TypeIndex::UInt32: types(T::INT32, C::UINT_32, int_type(32, false)); break; case TypeIndex::UInt64: types(T::INT64, C::UINT_64, int_type(64, false)); break; @@ -588,7 +598,7 @@ SchemaElements convertSchema(const Block & sample, const WriteOptions & options) root.__set_name("schema"); root.__set_num_children(static_cast(sample.columns())); - for (auto & c : sample) + for (const auto & c : sample) prepareColumnForWrite(c.column, c.type, c.name, options, nullptr, &schema); return schema; @@ -598,7 +608,7 @@ void prepareColumnForWrite( ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, ColumnChunkWriteStates * out_columns_to_write, SchemaElements * out_schema) { - if (column->size() == 0 && out_columns_to_write != nullptr) + if (column->empty() && out_columns_to_write != nullptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty column passed to Parquet encoder"); ColumnChunkWriteStates states; diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp index ba67f075a0d..9664d173f29 100644 --- a/src/Processors/Formats/Impl/Parquet/Write.cpp +++ b/src/Processors/Formats/Impl/Parquet/Write.cpp @@ -15,6 +15,10 @@ #include #include "config_version.h" +#if USE_SNAPPY +#include +#endif + namespace DB::ErrorCodes { extern const int CANNOT_COMPRESS; @@ -68,7 +72,7 @@ struct StatisticsNumeric } }; -struct StatisticsFixedString +struct StatisticsFixedStringRef { size_t fixed_string_size = UINT64_MAX; const uint8_t * min = nullptr; @@ -81,7 +85,7 @@ struct StatisticsFixedString addMax(a.ptr); } - void merge(const StatisticsFixedString & s) + void merge(const StatisticsFixedStringRef & s) { chassert(fixed_string_size == UINT64_MAX || fixed_string_size == s.fixed_string_size); fixed_string_size = s.fixed_string_size; @@ -93,7 +97,7 @@ struct StatisticsFixedString void clear() { min = max = nullptr; } - parq::Statistics get(const WriteOptions & options) + parq::Statistics get(const WriteOptions & options) const { parq::Statistics s; if (min == nullptr || fixed_string_size > options.max_statistics_size) @@ -115,7 +119,54 @@ struct StatisticsFixedString } }; -struct StatisticsString +template +struct StatisticsFixedStringCopy +{ + bool empty = true; + std::array min {}; + std::array max {}; + + void add(parquet::FixedLenByteArray a) + { + addMin(a.ptr); + addMax(a.ptr); + empty = false; + } + + void merge(const StatisticsFixedStringCopy & s) + { + if (s.empty) + return; + addMin(&s.min[0]); + addMax(&s.max[0]); + empty = false; + } + + void clear() { empty = true; } + + parq::Statistics get(const WriteOptions &) const + { + parq::Statistics s; + if (empty) + return s; + s.__set_min_value(std::string(reinterpret_cast(min.data()), S)); + s.__set_max_value(std::string(reinterpret_cast(max.data()), S)); + return s; + } + + void addMin(const uint8_t * p) + { + if (empty || memcmp(p, min.data(), S) < 0) + memcpy(min.data(), p, S); + } + void addMax(const uint8_t * p) + { + if (empty || memcmp(p, max.data(), S) > 0) + memcpy(max.data(), p, S); + } +}; + +struct StatisticsStringRef { parquet::ByteArray min; parquet::ByteArray max; @@ -126,7 +177,7 @@ struct StatisticsString addMax(x); } - void merge(const StatisticsString & s) + void merge(const StatisticsStringRef & s) { if (s.min.ptr == nullptr) return; @@ -136,7 +187,7 @@ struct StatisticsString void clear() { *this = {}; } - parq::Statistics get(const WriteOptions & options) + parq::Statistics get(const WriteOptions & options) const { parq::Statistics s; if (min.ptr == nullptr) @@ -197,7 +248,7 @@ struct ConverterNumeric { buf.resize(count); for (size_t i = 0; i < count; ++i) - buf[i] = static_cast(column.getData()[offset + i]); + buf[i] = static_cast(column.getData()[offset + i]); // NOLINT return buf.data(); } } @@ -205,7 +256,7 @@ struct ConverterNumeric struct ConverterString { - using Statistics = StatisticsString; + using Statistics = StatisticsStringRef; const ColumnString & column; PODArray buf; @@ -226,7 +277,7 @@ struct ConverterString struct ConverterFixedString { - using Statistics = StatisticsFixedString; + using Statistics = StatisticsFixedStringRef; const ColumnFixedString & column; PODArray buf; @@ -246,7 +297,7 @@ struct ConverterFixedString struct ConverterFixedStringAsString { - using Statistics = StatisticsString; + using Statistics = StatisticsStringRef; const ColumnFixedString & column; PODArray buf; @@ -267,7 +318,7 @@ struct ConverterNumberAsFixedString { /// Calculate min/max statistics for little-endian fixed strings, not numbers, because parquet /// doesn't know it's numbers. - using Statistics = StatisticsFixedString; + using Statistics = StatisticsFixedStringCopy; const ColumnVector & column; PODArray buf; @@ -290,7 +341,7 @@ struct ConverterNumberAsFixedString template struct ConverterDecimal { - using Statistics = StatisticsFixedString; + using Statistics = StatisticsFixedStringCopy; const ColumnDecimal & column; PODArray data_buf; @@ -348,6 +399,24 @@ PODArray & compress(PODArray & source, PODArray & scratch, Com return scratch; } +#if USE_SNAPPY + case CompressionMethod::Snappy: + { + size_t max_dest_size = snappy::MaxCompressedLength(source.size()); + + if (max_dest_size > std::numeric_limits::max()) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column of size {}", formatReadableSizeWithBinarySuffix(source.size())); + + scratch.resize(max_dest_size); + + size_t compressed_size; + snappy::RawCompress(source.data(), source.size(), scratch.data(), &compressed_size); + + scratch.resize(static_cast(compressed_size)); + return scratch; + } +#endif + default: { auto dest_buf = std::make_unique>>(scratch); @@ -421,7 +490,7 @@ void writeColumnImpl( typename Converter::Statistics page_statistics; typename Converter::Statistics total_statistics; - bool use_dictionary = options.use_dictionary_encoding; + bool use_dictionary = options.use_dictionary_encoding && !s.is_bool; std::optional fixed_string_descr; if constexpr (std::is_same::value) @@ -431,7 +500,8 @@ void writeColumnImpl( "", parquet::Repetition::REQUIRED, parquet::Type::FIXED_LEN_BYTE_ARRAY, parquet::ConvertedType::NONE, static_cast(converter.fixedStringSize())), 0, 0); - page_statistics.fixed_string_size = converter.fixedStringSize(); + if constexpr (std::is_same::value) + page_statistics.fixed_string_size = converter.fixedStringSize(); } /// Could use an arena here (by passing a custom MemoryPool), to reuse memory across pages. @@ -605,8 +675,16 @@ void writeColumnImpl( data_offset = 0; dict_encoded_pages.clear(); use_dictionary = false; + +#ifndef NDEBUG + /// Arrow's DictEncoderImpl destructor asserts that FlushValues() was called, so we + /// call it even though we don't need its output. + encoder->FlushValues(); +#endif + encoder = parquet::MakeTypedEncoder( - static_cast(encoding)); + static_cast(encoding), /* use_dictionary */ false, + fixed_string_descr ? &*fixed_string_descr : nullptr); break; } @@ -668,7 +746,13 @@ void writeColumnChunkBody(ColumnChunkWriteState & s, const WriteOptions & option ConverterNumeric, parquet::parquet_dtype::c_type>( \ s.primitive_column)) - case TypeIndex::UInt8 : N(UInt8 , Int32Type); break; + case TypeIndex::UInt8: + if (s.is_bool) + writeColumnImpl(s, options, out, + ConverterNumeric, bool, bool>(s.primitive_column)); + else + N(UInt8 , Int32Type); + break; case TypeIndex::UInt16 : N(UInt16, Int32Type); break; case TypeIndex::UInt32 : N(UInt32, Int32Type); break; case TypeIndex::UInt64 : N(UInt64, Int64Type); break; @@ -769,14 +853,14 @@ parq::ColumnChunk finalizeColumnChunkAndWriteFooter( serializeThriftStruct(s.column_chunk, out); - return std::move(s.column_chunk); + return s.column_chunk; } parq::RowGroup makeRowGroup(std::vector column_chunks, size_t num_rows) { parq::RowGroup r; r.__set_num_rows(num_rows); - r.__set_columns(std::move(column_chunks)); + r.__set_columns(column_chunks); r.__set_total_compressed_size(0); for (auto & c : r.columns) { diff --git a/src/Processors/Formats/Impl/Parquet/Write.h b/src/Processors/Formats/Impl/Parquet/Write.h index 333a32e191f..9197eae5384 100644 --- a/src/Processors/Formats/Impl/Parquet/Write.h +++ b/src/Processors/Formats/Impl/Parquet/Write.h @@ -42,6 +42,7 @@ struct ColumnChunkWriteState ColumnPtr primitive_column; CompressionMethod compression; // must match what's inside column_chunk + bool is_bool = false; /// Repetition and definition levels. Produced by prepareColumnForWrite(). /// def is empty iff max_def == 0, which means no arrays or nullables. diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 3dde8ad6a6c..be9c600f9bd 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -59,7 +59,12 @@ ParquetBlockInputFormat::ParquetBlockInputFormat( pool = std::make_unique(CurrentMetrics::ParquetDecoderThreads, CurrentMetrics::ParquetDecoderThreadsActive, max_decoding_threads); } -ParquetBlockInputFormat::~ParquetBlockInputFormat() = default; +ParquetBlockInputFormat::~ParquetBlockInputFormat() +{ + is_stopped = true; + if (pool) + pool->wait(); +} void ParquetBlockInputFormat::initializeIfNeeded() { diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 9a2d9072860..fbf8b3a7c87 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -68,9 +68,8 @@ namespace if (method == FormatSettings::ParquetCompression::GZIP) return parquet::Compression::type::GZIP; - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method"); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported parquet compression method"); } - } ParquetBlockOutputFormat::ParquetBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) @@ -162,7 +161,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) if (staging_rows >= target_rows * 2) { /// Increase row group size slightly (by < 2x) to avoid a small row group at the end. - size_t num_row_groups = std::max(static_cast(1), staging_rows / target_rows); + size_t num_row_groups = std::max(static_cast(1), staging_rows / target_rows); size_t row_group_size = (staging_rows - 1) / num_row_groups + 1; // round up Chunk concatenated = std::move(staging_chunks[0]); @@ -222,7 +221,10 @@ void ParquetBlockOutputFormat::finalizeImpl() } if (row_groups_complete.empty()) + { + base_offset = out.count(); writeFileHeader(out); + } writeFileFooter(std::move(row_groups_complete), schema, options, out); } else @@ -349,12 +351,15 @@ void ParquetBlockOutputFormat::writeRowGroupInOneThread(Chunk chunk) options, &columns_to_write); if (row_groups_complete.empty()) + { + base_offset = out.count(); writeFileHeader(out); + } std::vector column_chunks; for (auto & s : columns_to_write) { - size_t offset = out.count(); + size_t offset = out.count() - base_offset; writeColumnChunkBody(s, options, out); auto c = finalizeColumnChunkAndWriteFooter(offset, std::move(s), options, out); column_chunks.push_back(std::move(c)); @@ -413,14 +418,17 @@ void ParquetBlockOutputFormat::reapCompletedRowGroups(std::unique_lock metadata; for (auto & cols : r.column_chunks) { for (ColumnChunk & col : cols) { - size_t offset = out.count(); + size_t offset = out.count() - base_offset; out.write(col.serialized.data(), col.serialized.size()); auto m = finalizeColumnChunkAndWriteFooter(offset, std::move(col.state), options, out); diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h index 4c73de007fe..aededc39dc4 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h @@ -138,6 +138,7 @@ private: Parquet::WriteOptions options; Parquet::SchemaElements schema; std::vector row_groups_complete; + size_t base_offset = 0; std::mutex mutex; diff --git a/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh index 89b5147f026..d00026d516a 100755 --- a/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh +++ b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh @@ -5,6 +5,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +set -o pipefail + $CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='none'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" $CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='lz4'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" $CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='snappy'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" diff --git a/tests/queries/0_stateless/02735_parquet_encoder.sql b/tests/queries/0_stateless/02735_parquet_encoder.sql index d8d52a13218..3701c685120 100644 --- a/tests/queries/0_stateless/02735_parquet_encoder.sql +++ b/tests/queries/0_stateless/02735_parquet_encoder.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest +-- Tags: no-fasttest, no-parallel set output_format_parquet_use_custom_encoder = 1; set output_format_parquet_row_group_size = 1000; From 6b8752f2931fed6483d9221b6f5388e302245f31 Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 25 Jul 2023 12:19:35 +0200 Subject: [PATCH 1970/1997] fix error message --- src/Functions/FunctionToDecimalString.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionToDecimalString.h b/src/Functions/FunctionToDecimalString.h index 68ad978632e..c16a72115d6 100644 --- a/src/Functions/FunctionToDecimalString.h +++ b/src/Functions/FunctionToDecimalString.h @@ -41,7 +41,7 @@ public: { FunctionArgumentDescriptors mandatory_args = { {"Value", nullptr, nullptr, nullptr}, - {"precision", &isNativeInteger, &isColumnConst, "const Integer [0-77]"} + {"precision", &isNativeInteger, &isColumnConst, "const Integer"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, {}); From 5ee71bd643caf26b9f533dab1e369f9dc306296b Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Tue, 25 Jul 2023 10:26:26 +0000 Subject: [PATCH 1971/1997] Work around the clang bug --- src/Processors/Formats/Impl/Parquet/Write.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp index 9664d173f29..47ef0c53ab5 100644 --- a/src/Processors/Formats/Impl/Parquet/Write.cpp +++ b/src/Processors/Formats/Impl/Parquet/Write.cpp @@ -651,6 +651,10 @@ void writeColumnImpl( const typename ParquetDType::c_type * converted = converter.getBatch(next_data_offset, data_count); if (options.write_page_statistics || options.write_column_chunk_statistics) +/// Workaround for clang bug: https://github.com/llvm/llvm-project/issues/63630 +#ifdef MEMORY_SANITIZER +#pragma clang loop vectorize(disable) +#endif for (size_t i = 0; i < data_count; ++i) page_statistics.add(converted[i]); From 155b90c780733a7712956982367088d856ec139b Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Tue, 25 Jul 2023 13:47:59 +0300 Subject: [PATCH 1972/1997] oops --- src/Functions/FunctionToDecimalString.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Functions/FunctionToDecimalString.h b/src/Functions/FunctionToDecimalString.h index c16a72115d6..a965e2c2c90 100644 --- a/src/Functions/FunctionToDecimalString.h +++ b/src/Functions/FunctionToDecimalString.h @@ -19,10 +19,8 @@ namespace DB namespace ErrorCodes { - extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_COLUMN; extern const int CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } class FunctionToDecimalString : public IFunction From f8c90d5964a4c27dc119fd4417c23785a40b9c5e Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 25 Jul 2023 13:36:57 +0200 Subject: [PATCH 1973/1997] Make better --- docs/en/sql-reference/transactions.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/transactions.md b/docs/en/sql-reference/transactions.md index 1ca2db44b13..cb89a091d68 100644 --- a/docs/en/sql-reference/transactions.md +++ b/docs/en/sql-reference/transactions.md @@ -5,7 +5,7 @@ slug: /en/guides/developer/transactional ## Case 1: INSERT into one partition, of one table, of the MergeTree* family -This is transactional (ACID) if the number of rows inserted is less than or equal to `max_insert_block_size rows`, and in the case of data in TSV, TKSV, CSV, or JSONEachRow format if the number of bytes is less than `min_chunk_bytes_for_parallel_parsing`: +This is transactional (ACID) if the inserted rows are packed and inserted as a single block (see Notes): - Atomic: an INSERT succeeds or is rejected as a whole: if a confirmation is sent to the client, then all rows were inserted; if an error is sent to the client, then no rows were inserted. - Consistent: if there are no table constraints violated, then all rows in an INSERT are inserted and the INSERT succeeds; if constraints are violated, then no rows are inserted. - Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as it was before the INSERT attempt, or after the successful INSERT; no partial state is seen @@ -33,14 +33,16 @@ Same as Case 1 above, with this detail: - atomicity is ensured even if `async_insert` is enabled and `wait_for_async_insert` is set to 1 (the default), but if `wait_for_async_insert` is set to 0, then atomicity is not ensured. ## Notes -- `max_insert_block_size` is 1 000 000 by default and can be adjusted as needed -- `min_chunk_bytes_for_parallel_parsing` is 1 000 000 by default and can be adjusted as needed +- rows inserted from the client in some data format are packed into a single block when: + - the insert format is row-based (like CSV, TSV, Values, JSONEachRow, etc) and the data contains less then `max_insert_block_size` rows (~1 000 000 by default) or less then `min_chunk_bytes_for_parallel_parsing` bytes (10 MB by default) in case of parallel parsing is used (enabled by default) + - the insert format is column-based (like Native, Parquet, ORC, etc) and the data contains only one block of data +- the size of the inserted block in general may depend on many settings (for example: `max_block_size`, `max_insert_block_size`, `min_insert_block_size_rows`, `min_insert_block_size_bytes`, `preferred_block_size_bytes`, etc) - if the client did not receive an answer from the server, the client does not know if the transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties - ClickHouse is using MVCC with snapshot isolation internally - all ACID properties are valid even in the case of server kill/crash - either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in the typical setup - "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency) -- this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. (see the next section on Transactions, Commit, and Rollback). +- this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. (see the next section on Transactions, Commit, and Rollback) ## Transactions, Commit, and Rollback From 93e5d7f51c561af4d9236ef7e146b94754bc8fd8 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 25 Jul 2023 11:42:22 +0000 Subject: [PATCH 1974/1997] Fix flaky 00995_exception_while_insert --- tests/queries/0_stateless/00995_exception_while_insert.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00995_exception_while_insert.sh b/tests/queries/0_stateless/00995_exception_while_insert.sh index 927ac6a54e5..732dba6c6f1 100755 --- a/tests/queries/0_stateless/00995_exception_while_insert.sh +++ b/tests/queries/0_stateless/00995_exception_while_insert.sh @@ -7,8 +7,8 @@ CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS check;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE check (x UInt64, y UInt64 DEFAULT throwIf(x > 1500000)) ENGINE = Memory;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE check (x UInt64, y UInt64 DEFAULT throwIf(x = 1500000)) ENGINE = Memory;" -seq 1 2000000 | $CLICKHOUSE_CLIENT --query="INSERT INTO check(x) FORMAT TSV" 2>&1 | grep -q "Value passed to 'throwIf' function is non-zero." && echo 'OK' || echo 'FAIL' ||: +seq 1 1500000 | $CLICKHOUSE_CLIENT --query="INSERT INTO check(x) FORMAT TSV" 2>&1 | grep -q "Value passed to 'throwIf' function is non-zero." && echo 'OK' || echo 'FAIL' ||: $CLICKHOUSE_CLIENT --query="DROP TABLE check;" From 328d0a5269407eef6899907d6b9869307a56dfa4 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 25 Jul 2023 14:50:27 +0200 Subject: [PATCH 1975/1997] fix --- src/Storages/StorageReplicatedMergeTree.cpp | 10 +++++++--- .../test.py | 4 +++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e6431927805..9e4a63f6ba9 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4861,9 +4861,13 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) LOG_TRACE(log, "Waiting for RestartingThread to startup table"); } - std::lock_guard lock{flush_and_shutdown_mutex}; - if (shutdown_prepared_called.load() || shutdown_called.load()) - throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Cannot startup table because it is dropped"); + auto lock = std::unique_lock(flush_and_shutdown_mutex, std::defer_lock); + do + { + if (shutdown_prepared_called.load() || shutdown_called.load()) + throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Cannot startup table because it is dropped"); + } + while (!lock.try_lock()); /// And this is just a callback session_expired_callback_handler = EventNotifier::instance().subscribe(Coordination::Error::ZSESSIONEXPIRED, [this]() diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py index 20b6a6c977f..d971e4ec658 100644 --- a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py @@ -3,6 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster from helpers.network import PartitionManager +from helpers.test_tools import assert_eq_with_retry from multiprocessing.dummy import Pool import time @@ -54,9 +55,10 @@ def test_shutdown_and_wait(start_cluster): node1.query(f"INSERT INTO test_table VALUES ({value})") with PartitionManager() as pm: + assert node2.query("SELECT * FROM test_table") == "0\n" pm.partition_instances(node1, node2) # iptables rules must be applied immediately, but looks like sometimes they are not... - time.sleep(3) + assert_eq_with_retry(node1, "select count() from remote('node1,node2', 'system.one')", "1\n", settings={"skip_unavailable_shards": 1}) p.map(insert, range(1, 50)) From d7de8bf797a7444927e80c7c88d9b7c5a4040e01 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 25 Jul 2023 13:03:12 +0000 Subject: [PATCH 1976/1997] Automatic style fix --- .../test_replicated_merge_tree_wait_on_shutdown/test.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py index d971e4ec658..d1373d44d0f 100644 --- a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py @@ -58,7 +58,12 @@ def test_shutdown_and_wait(start_cluster): assert node2.query("SELECT * FROM test_table") == "0\n" pm.partition_instances(node1, node2) # iptables rules must be applied immediately, but looks like sometimes they are not... - assert_eq_with_retry(node1, "select count() from remote('node1,node2', 'system.one')", "1\n", settings={"skip_unavailable_shards": 1}) + assert_eq_with_retry( + node1, + "select count() from remote('node1,node2', 'system.one')", + "1\n", + settings={"skip_unavailable_shards": 1}, + ) p.map(insert, range(1, 50)) From b91852de3a311cd03ef571e4470deba3deeba25b Mon Sep 17 00:00:00 2001 From: Julian Maicher Date: Tue, 25 Jul 2023 16:01:19 +0200 Subject: [PATCH 1977/1997] fix(docs): Document correct MODIFY COLUMN REMOVE syntax --- docs/en/sql-reference/statements/alter/column.md | 2 +- docs/ru/sql-reference/statements/alter/column.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index dae2c7dd1d3..6ceb9b5849e 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -213,7 +213,7 @@ Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC Syntax: ```sql -ALTER TABLE table_name MODIFY column_name REMOVE property; +ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property; ``` **Example** diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md index a8ace213075..92be30b101a 100644 --- a/docs/ru/sql-reference/statements/alter/column.md +++ b/docs/ru/sql-reference/statements/alter/column.md @@ -182,7 +182,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String) Синтаксис: ```sql -ALTER TABLE table_name MODIFY column_name REMOVE property; +ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property; ``` **Пример** From bd09ad6736bac2b9e986993e75f1f8f61b1508a6 Mon Sep 17 00:00:00 2001 From: Val Doroshchuk Date: Tue, 25 Jul 2023 16:19:44 +0200 Subject: [PATCH 1978/1997] MaterializedMySQL: Fix typos in tests --- .../materialized_with_ddl.py | 27 +++++++++++++------ .../test_materialized_mysql_database/test.py | 9 ++++--- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py index 8b2943c2b73..389d430622d 100644 --- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py @@ -13,25 +13,36 @@ from multiprocessing.dummy import Pool from helpers.test_tools import assert_eq_with_retry -def check_query(clickhouse_node, query, result_set, retry_count=10, interval_seconds=3): - lastest_result = "" +def check_query( + clickhouse_node, + query, + result_set, + retry_count=30, + interval_seconds=1, + on_failure=None, +): + latest_result = "" + if "/* expect: " not in query: + query = "/* expect: " + result_set.rstrip("\n") + "*/ " + query for i in range(retry_count): try: - lastest_result = clickhouse_node.query(query) - if result_set == lastest_result: + latest_result = clickhouse_node.query(query) + if result_set == latest_result: return - logging.debug(f"latest_result {lastest_result}") + logging.debug(f"latest_result {latest_result}") time.sleep(interval_seconds) except Exception as e: logging.debug(f"check_query retry {i+1} exception {e}") time.sleep(interval_seconds) else: - result_got = clickhouse_node.query(query) + latest_result = clickhouse_node.query(query) + if on_failure is not None and latest_result != result_set: + on_failure(latest_result, result_set) assert ( - result_got == result_set - ), f"Got result {result_got}, while expected result {result_set}" + latest_result == result_set + ), f"Got result '{latest_result}', expected result '{result_set}'" def dml_with_materialized_mysql_database(clickhouse_node, mysql_node, service_name): diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index c21e04af8db..1fd09f733f0 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -52,6 +52,7 @@ def started_cluster(): cluster.start() yield cluster finally: + node_db.stop_clickhouse() # ensures that coverage report is written to disk, even if cluster.shutdown() times out. cluster.shutdown() @@ -86,7 +87,7 @@ class MySQLConnection: else: self.mysql_connection.ping(reconnect=True) logging.debug( - "MySQL Connection establised: {}:{}".format( + "MySQL Connection established: {}:{}".format( self.ip_address, self.port ) ) @@ -94,7 +95,7 @@ class MySQLConnection: except Exception as e: errors += [str(e)] time.sleep(1) - raise Exception("Connection not establised, {}".format(errors)) + raise Exception("Connection not established, {}".format(errors)) def query(self, execution_query): with self.alloc_connection().cursor() as cursor: @@ -118,9 +119,9 @@ class MySQLConnection: if result is not None: print(cursor.fetchall()) - def query_and_get_data(self, executio_query): + def query_and_get_data(self, execution_query): with self.alloc_connection().cursor() as cursor: - cursor.execute(executio_query) + cursor.execute(execution_query) return cursor.fetchall() def close(self): From 2c7c38950d54c009e5268d371dabe8035b817283 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 25 Jul 2023 14:21:12 +0000 Subject: [PATCH 1979/1997] better check for lightweight deletes --- src/Storages/MergeTree/MergeTreeData.cpp | 7 +++++-- .../02792_drop_projection_lwd.reference | 2 +- .../0_stateless/02792_drop_projection_lwd.sql | 16 +++++----------- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 06a9b62d9de..6179c70ca57 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5693,8 +5693,11 @@ bool MergeTreeData::supportsLightweightDelete() const auto lock = lockParts(); for (const auto & part : data_parts_by_info) { - if (part->getState() == MergeTreeDataPartState::Active - && !part->supportLightweightDeleteMutate()) + if (part->getState() == MergeTreeDataPartState::Outdated + || part->getState() == MergeTreeDataPartState::Deleting) + continue; + + if (!part->supportLightweightDeleteMutate()) return false; } return true; diff --git a/tests/queries/0_stateless/02792_drop_projection_lwd.reference b/tests/queries/0_stateless/02792_drop_projection_lwd.reference index 6529ff889b0..3ad5abd03ae 100644 --- a/tests/queries/0_stateless/02792_drop_projection_lwd.reference +++ b/tests/queries/0_stateless/02792_drop_projection_lwd.reference @@ -1 +1 @@ -98 +99 diff --git a/tests/queries/0_stateless/02792_drop_projection_lwd.sql b/tests/queries/0_stateless/02792_drop_projection_lwd.sql index fd446a8efe8..a1d8a9c90f3 100644 --- a/tests/queries/0_stateless/02792_drop_projection_lwd.sql +++ b/tests/queries/0_stateless/02792_drop_projection_lwd.sql @@ -1,23 +1,17 @@ +SET mutations_sync = 2; + DROP TABLE IF EXISTS t_projections_lwd; -CREATE TABLE t_projections_lwd (a UInt32, b UInt32) ENGINE = MergeTree ORDER BY a; +CREATE TABLE t_projections_lwd (a UInt32, b UInt32, PROJECTION p (SELECT * ORDER BY b)) ENGINE = MergeTree ORDER BY a; INSERT INTO t_projections_lwd SELECT number, number FROM numbers(100); --- LWD works -DELETE FROM t_projections_lwd WHERE a = 0; - --- add projection -ALTER TABLE t_projections_lwd ADD PROJECTION p_t_projections_lwd (SELECT * ORDER BY b); -ALTER TABLE t_projections_lwd MATERIALIZE PROJECTION p_t_projections_lwd; - -- LWD does not work, as expected -DELETE FROM t_projections_lwd WHERE a = 1; -- { serverError UNFINISHED } +DELETE FROM t_projections_lwd WHERE a = 1; -- { serverError BAD_ARGUMENTS } KILL MUTATION WHERE database = currentDatabase() AND table = 't_projections_lwd' SYNC FORMAT Null; -- drop projection -SET mutations_sync = 2; -ALTER TABLE t_projections_lwd DROP projection p_t_projections_lwd; +ALTER TABLE t_projections_lwd DROP projection p; DELETE FROM t_projections_lwd WHERE a = 2; From 79d0343becaa001dca587ee1932a8520e086d0ce Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 25 Jul 2023 16:34:40 +0200 Subject: [PATCH 1980/1997] tests: fix 01821_join_table_race_long flakiness (#52559) By grouping multiple queries into one clickhouse-client invocation, since each execve of the binary can take ~1 second in debug builds. But this slightly changes the logic, so be aware. Signed-off-by: Azat Khuzhin Co-authored-by: Alexander Tokmakov --- tests/queries/0_stateless/01821_join_table_race_long.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01821_join_table_race_long.sh b/tests/queries/0_stateless/01821_join_table_race_long.sh index e02fe788653..561b856841b 100755 --- a/tests/queries/0_stateless/01821_join_table_race_long.sh +++ b/tests/queries/0_stateless/01821_join_table_race_long.sh @@ -9,13 +9,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS join_table_race" $CLICKHOUSE_CLIENT -q "CREATE TABLE join_table_race(id Int32, name String) ENGINE = Join(ANY, LEFT, id)" -for _ in {0..100}; do $CLICKHOUSE_CLIENT -q "INSERT INTO join_table_race VALUES ($RANDOM, '$RANDOM')" > /dev/null 2> /dev/null; done & +for _ in {0..100}; do echo "INSERT INTO join_table_race VALUES ($RANDOM, '$RANDOM');"; done | $CLICKHOUSE_CLIENT --ignore-error -nm > /dev/null 2> /dev/null & -for _ in {0..200}; do $CLICKHOUSE_CLIENT -q "SELECT count() FROM join_table_race FORMAT Null" > /dev/null 2> /dev/null; done & +for _ in {0..200}; do echo "SELECT count() FROM join_table_race FORMAT Null;"; done | $CLICKHOUSE_CLIENT --ignore-error -nm > /dev/null 2> /dev/null & -for _ in {0..100}; do $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE join_table_race" > /dev/null 2> /dev/null; done & +for _ in {0..100}; do echo "TRUNCATE TABLE join_table_race;"; done | $CLICKHOUSE_CLIENT --ignore-error -nm > /dev/null 2> /dev/null & -for _ in {0..100}; do $CLICKHOUSE_CLIENT -q "ALTER TABLE join_table_race DELETE WHERE id % 2 = 0" > /dev/null 2> /dev/null; done & +for _ in {0..100}; do echo "ALTER TABLE join_table_race DELETE WHERE id % 2 = 0;"; done | $CLICKHOUSE_CLIENT --ignore-error -nm > /dev/null 2> /dev/null & wait From 85082ad8f8ee0d1023273d8db888e143e59bd828 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 25 Jul 2023 16:35:01 +0200 Subject: [PATCH 1981/1997] Fix data-race DatabaseReplicated::startupTables()/canExecuteReplicatedMetadataAlter() (#52490) CI founds [1]: Exception: Sanitizer assert found for instance ================== WARNING: ThreadSanitizer: data race (pid=348) Write of size 8 at 0x7b58000044a0 by main thread: 2 DB::DatabaseReplicated::startupTables(ThreadPoolImpl>&, DB::LoadingStrictnessLevel) build_docker/./src/Databases/DatabaseReplicated.cpp:526:16 (clickhouse+0x1ec45092) 3 DB::TablesLoader::startupTables() build_docker/./src/Databases/TablesLoader.cpp:87:26 (clickhouse+0x1f9258ab) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 4 DB::loadMetadata(std::__1::shared_ptr, std::__1::basic_string, std::__1::allocator> const&) build_docker/./src/Interpreters/loadMetadata.cpp:234:12 (clickhouse+0x1fff3834) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 5 DB::Server::main() build_docker/./programs/server/Server.cpp:1615:9 (clickhouse+0x163e7f78) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 6 Poco::Util::Application::run() build_docker/./base/poco/Util/src/Application.cpp:315:8 (clickhouse+0x257608fe) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 7 DB::Server::run() build_docker/./programs/server/Server.cpp:391:25 (clickhouse+0x163d7d7c) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 8 Poco::Util::ServerApplication::run(int, char**) build_docker/./base/poco/Util/src/ServerApplication.cpp:131:9 (clickhouse+0x25780114) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 9 mainEntryClickHouseServer(int, char**) build_docker/./programs/server/Server.cpp:196:20 (clickhouse+0x163d4c23) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 10 main build_docker/./programs/main.cpp:487:12 (clickhouse+0xdf8c877) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) Previous read of size 8 at 0x7b58000044a0 by thread T27 (mutexes: write M0, write M1): 1 DB::DatabaseReplicated::canExecuteReplicatedMetadataAlter() const build_docker/./src/Databases/DatabaseReplicated.cpp:1303:12 (clickhouse+0x1ec5c5bd) 2 DB::ReplicatedMergeTreeQueue::shouldExecuteLogEntry() const build_docker/./src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp:1471:24 (clickhouse+0x2115fb56) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 3 DB::ReplicatedMergeTreeQueue::selectEntryToProcess(DB::MergeTreeDataMergerMutator&, DB::MergeTreeData&) build_docker/./src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp:1676:13 (clickhouse+0x21163c58) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 4 DB::StorageReplicatedMergeTree::selectQueueEntry() build_docker/./src/Storages/StorageReplicatedMergeTree.cpp:3240:26 (clickhouse+0x20823db2) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 5 DB::StorageReplicatedMergeTree::scheduleDataProcessingJob(DB::BackgroundJobsAssignee&) build_docker/./src/Storages/StorageReplicatedMergeTree.cpp:3304:65 (clickhouse+0x208240fc) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) [1]: https://s3.amazonaws.com/clickhouse-test-reports/52395/0b258dda4ee618a4d002e2b5246d68bbd2c77c7e/integration_tests__tsan__[5_6].html Add ddl_worker_initialized flag to avoid this race. Note, that it should be enough to check this flag only in canExecuteReplicatedMetadataAlter() since only it can be run in parallel with ctor before it had been finished. v0: initialize ddl before startupTables() v2: ddl_worker_initialized Signed-off-by: Azat Khuzhin Co-authored-by: Alexander Tokmakov --- src/Databases/DatabaseReplicated.cpp | 4 +++- src/Databases/DatabaseReplicated.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index d3b3d4b545f..ed56edd7503 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -524,6 +524,7 @@ void DatabaseReplicated::startupTables(ThreadPool & thread_pool, LoadingStrictne ddl_worker = std::make_unique(this, getContext()); ddl_worker->startup(); + ddl_worker_initialized = true; } bool DatabaseReplicated::checkDigestValid(const ContextPtr & local_context, bool debug_check /* = true */) const @@ -1155,6 +1156,7 @@ void DatabaseReplicated::stopReplication() void DatabaseReplicated::shutdown() { stopReplication(); + ddl_worker_initialized = false; ddl_worker = nullptr; DatabaseAtomic::shutdown(); } @@ -1299,7 +1301,7 @@ bool DatabaseReplicated::canExecuteReplicatedMetadataAlter() const /// It may update the metadata digest (both locally and in ZooKeeper) /// before DatabaseReplicatedDDLWorker::initializeReplication() has finished. /// We should not update metadata until the database is initialized. - return ddl_worker && ddl_worker->isCurrentlyActive(); + return ddl_worker_initialized && ddl_worker->isCurrentlyActive(); } void DatabaseReplicated::detachTablePermanently(ContextPtr local_context, const String & table_name) diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 8e33f482ac1..7ba91e48085 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -134,6 +134,7 @@ private: std::atomic_bool is_readonly = true; std::atomic_bool is_probably_dropped = false; std::atomic_bool is_recovering = false; + std::atomic_bool ddl_worker_initialized = false; std::unique_ptr ddl_worker; UInt32 max_log_ptr_at_creation = 0; From c75b5bc740cd20ee7f5e6bb5a71b9f8e215eb03c Mon Sep 17 00:00:00 2001 From: Sanjam Panda <36253777+saitama951@users.noreply.github.com> Date: Tue, 25 Jul 2023 20:12:22 +0530 Subject: [PATCH 1982/1997] Update TwoLevelStringHashTable.h --- .../HashTable/TwoLevelStringHashTable.h | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Common/HashTable/TwoLevelStringHashTable.h b/src/Common/HashTable/TwoLevelStringHashTable.h index ee6dcd05d9a..0527ec67e6e 100644 --- a/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/src/Common/HashTable/TwoLevelStringHashTable.h @@ -114,18 +114,18 @@ public: { memcpy(&n[0], p, 8); if constexpr (std::endian::native == std::endian::little) - n[0] &= -1ULL >> s; - else - n[0] &= -1ULL << s; + n[0] &= -1ULL >> s; + else + n[0] &= -1ULL << s; } else { const char * lp = x.data + x.size - 8; memcpy(&n[0], lp, 8); if constexpr (std::endian::native == std::endian::little) - n[0] >>= s; - else - n[0] <<= s; + n[0] >>= s; + else + n[0] <<= s; } auto res = hash(k8); auto buck = getBucketFromHash(res); @@ -138,9 +138,9 @@ public: const char * lp = x.data + x.size - 8; memcpy(&n[1], lp, 8); if constexpr (std::endian::native == std::endian::little) - n[1] >>= s; + n[1] >>= s; else - n[1] <<= s; + n[1] <<= s; auto res = hash(k16); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); @@ -152,9 +152,9 @@ public: const char * lp = x.data + x.size - 8; memcpy(&n[2], lp, 8); if constexpr (std::endian::native == std::endian::little) - n[2] >>= s; + n[2] >>= s; else - n[2] <<= s; + n[2] <<= s; auto res = hash(k24); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); From 11016d4c5f36fa39a36c2c2b6c0eec7c1c3dfd5f Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 25 Jul 2023 16:46:50 +0200 Subject: [PATCH 1983/1997] Revert "Rewrite uniq to count" --- src/Analyzer/Passes/UniqToCountPass.cpp | 198 ------------------ src/Analyzer/Passes/UniqToCountPass.h | 30 --- src/Analyzer/QueryTreePassManager.cpp | 2 - src/Core/Settings.h | 1 - src/Interpreters/InterpreterSelectQuery.cpp | 7 - .../RewriteUniqToCountVisitor.cpp | 163 -------------- src/Interpreters/RewriteUniqToCountVisitor.h | 30 --- .../test_rewrite_uniq_to_count/__init__.py | 0 .../test_rewrite_uniq_to_count/test.py | 127 ----------- 9 files changed, 558 deletions(-) delete mode 100644 src/Analyzer/Passes/UniqToCountPass.cpp delete mode 100644 src/Analyzer/Passes/UniqToCountPass.h delete mode 100644 src/Interpreters/RewriteUniqToCountVisitor.cpp delete mode 100644 src/Interpreters/RewriteUniqToCountVisitor.h delete mode 100644 tests/integration/test_rewrite_uniq_to_count/__init__.py delete mode 100644 tests/integration/test_rewrite_uniq_to_count/test.py diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp deleted file mode 100644 index 7533a99107b..00000000000 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ /dev/null @@ -1,198 +0,0 @@ -#include "UniqToCountPass.h" - -#include -#include - -#include -#include -#include -#include - -namespace DB -{ - -namespace -{ - -bool matchFnUniq(String func_name) -{ - auto name = Poco::toLower(func_name); - return name == "uniq" || name == "uniqHLL12" || name == "uniqExact" || name == "uniqTheta" || name == "uniqCombined" - || name == "uniqCombined64"; -} - -/// Extract the corresponding projection columns for group by node list. -/// For example: -/// SELECT a as aa, any(b) FROM table group by a; -> aa(ColumnNode) -NamesAndTypes extractProjectionColumnsForGroupBy(const QueryNode * query_node) -{ - if (!query_node->hasGroupBy()) - return {}; - - NamesAndTypes result; - for (const auto & group_by_ele : query_node->getGroupByNode()->getChildren()) - { - const auto & projection_columns = query_node->getProjectionColumns(); - const auto & projection_nodes = query_node->getProjection().getNodes(); - - assert(projection_columns.size() == projection_nodes.size()); - - for (size_t i = 0; i < projection_columns.size(); i++) - { - if (projection_nodes[i]->isEqual(*group_by_ele)) - result.push_back(projection_columns[i]); - } - } - return result; -} - -/// Whether query_columns equals subquery_columns. -/// query_columns: query columns from query -/// subquery_columns: projection columns from subquery -bool nodeListEquals(const QueryTreeNodes & query_columns, const NamesAndTypes & subquery_columns) -{ - if (query_columns.size() != subquery_columns.size()) - return false; - - for (const auto & query_column : query_columns) - { - auto find = std::find_if( - subquery_columns.begin(), - subquery_columns.end(), - [&](const auto & subquery_column) -> bool - { - if (auto * column_node = query_column->as()) - { - return subquery_column == column_node->getColumn(); - } - return false; - }); - - if (find == subquery_columns.end()) - return false; - } - return true; -} - -/// Whether subquery_columns contains all columns in subquery_columns. -/// query_columns: query columns from query -/// subquery_columns: projection columns from subquery -bool nodeListContainsAll(const QueryTreeNodes & query_columns, const NamesAndTypes & subquery_columns) -{ - if (query_columns.size() > subquery_columns.size()) - return false; - - for (const auto & query_column : query_columns) - { - auto find = std::find_if( - subquery_columns.begin(), - subquery_columns.end(), - [&](const auto & subquery_column) -> bool - { - if (auto * column_node = query_column->as()) - { - return subquery_column == column_node->getColumn(); - } - return false; - }); - - if (find == subquery_columns.end()) - return false; - } - return true; -} - -} - -class UniqToCountVisitor : public InDepthQueryTreeVisitor -{ -public: - using Base = InDepthQueryTreeVisitor; - using Base::Base; - - void visitImpl(QueryTreeNodePtr & node) - { - auto * query_node = node->as(); - if (!query_node) - return; - - /// Check that query has only single table expression which is subquery - auto * subquery_node = query_node->getJoinTree()->as(); - if (!subquery_node) - return; - - /// Check that query has only single node in projection - auto & projection_nodes = query_node->getProjection().getNodes(); - if (projection_nodes.size() != 1) - return; - - /// Check that projection_node is a function - auto & projection_node = projection_nodes[0]; - auto * function_node = projection_node->as(); - if (!function_node) - return; - - /// Check that query single projection node is `uniq` or its variants - if (!matchFnUniq(function_node->getFunctionName())) - return; - - auto & uniq_arguments_nodes = function_node->getArguments().getNodes(); - - /// Whether query matches 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' - auto match_subquery_with_distinct = [&]() -> bool - { - if (!subquery_node->isDistinct()) - return false; - - /// uniq expression list == subquery projection columns - if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getProjectionColumns())) - return false; - - return true; - }; - - /// Whether query matches 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)' - auto match_subquery_with_group_by = [&]() -> bool - { - if (!subquery_node->hasGroupBy()) - return false; - - /// uniq argument node list == subquery group by node list - auto group_by_columns = extractProjectionColumnsForGroupBy(subquery_node); - - if (!nodeListEquals(uniq_arguments_nodes, group_by_columns)) - return false; - - /// subquery projection columns must contain all columns in uniq argument node list - if (!nodeListContainsAll(uniq_arguments_nodes, subquery_node->getProjectionColumns())) - return false; - - return true; - }; - - /// Replace uniq of initial query to count - if (match_subquery_with_distinct() || match_subquery_with_group_by()) - { - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties); - - function_node->resolveAsAggregateFunction(std::move(aggregate_function)); - function_node->getArguments().getNodes().clear(); - - /// Update projection columns - query_node->resolveProjectionColumns({{"count()", function_node->getResultType()}}); - } - } -}; - - -void UniqToCountPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) -{ - if (!context->getSettings().optimize_uniq_to_count) - return; - - UniqToCountVisitor visitor; - visitor.visit(query_tree_node); -} - -} diff --git a/src/Analyzer/Passes/UniqToCountPass.h b/src/Analyzer/Passes/UniqToCountPass.h deleted file mode 100644 index 4992d524e5e..00000000000 --- a/src/Analyzer/Passes/UniqToCountPass.h +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -/** Optimize `uniq` and its variants(except uniqUpTo) into `count` over subquery. - * Example: 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' to - * Result: 'SELECT count() FROM (SELECT DISTINCT x ...)' - * - * Example: 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)' to - * Result: 'SELECT count() FROM (SELECT x ... GROUP BY x ...)' - * - * Note that we can rewrite all uniq variants except uniqUpTo. - */ -class UniqToCountPass final : public IQueryTreePass -{ -public: - String getName() override { return "UniqToCount"; } - - String getDescription() override - { - return "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause."; - } - - void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; -}; - -} diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index dd75b0f586d..a6da2a66615 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -247,7 +246,6 @@ void addQueryTreePasses(QueryTreePassManager & manager) manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); - manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 2ead00cafb4..8bebef5fb00 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -776,7 +776,6 @@ class IColumn; M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \ M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \ - M(Bool, optimize_uniq_to_count, false, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.", 0) \ M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0)\ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 8402165b62b..fc3ea3a13ca 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -39,7 +39,6 @@ #include #include #include -#include #include #include @@ -427,12 +426,6 @@ InterpreterSelectQuery::InterpreterSelectQuery( RewriteCountDistinctFunctionVisitor(data_rewrite_countdistinct).visit(query_ptr); } - if (settings.optimize_uniq_to_count) - { - RewriteUniqToCountMatcher::Data data_rewrite_uniq_count; - RewriteUniqToCountVisitor(data_rewrite_uniq_count).visit(query_ptr); - } - JoinedTables joined_tables(getSubqueryContext(context), getSelectQuery(), options.with_all_cols, options_.is_create_parameterized_view); bool got_storage_from_query = false; diff --git a/src/Interpreters/RewriteUniqToCountVisitor.cpp b/src/Interpreters/RewriteUniqToCountVisitor.cpp deleted file mode 100644 index 7445068207a..00000000000 --- a/src/Interpreters/RewriteUniqToCountVisitor.cpp +++ /dev/null @@ -1,163 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -using Aliases = std::unordered_map; - -namespace -{ - -bool matchFnUniq(String func_name) -{ - auto name = Poco::toLower(func_name); - return name == "uniq" || name == "uniqHLL12" || name == "uniqExact" || name == "uniqTheta" || name == "uniqCombined" - || name == "uniqCombined64"; -} - -bool expressionEquals(const ASTPtr & lhs, const ASTPtr & rhs, const Aliases & alias) -{ - if (lhs->getTreeHash() == rhs->getTreeHash()) - { - return true; - } - else - { - auto * lhs_idf = lhs->as(); - auto * rhs_idf = rhs->as(); - if (lhs_idf && rhs_idf) - { - /// compound identifiers, such as: - if (lhs_idf->shortName() == rhs_idf->shortName()) - return true; - - /// translate alias - if (alias.find(lhs_idf->shortName()) != alias.end()) - lhs_idf = alias.find(lhs_idf->shortName())->second->as(); - - if (alias.find(rhs_idf->shortName()) != alias.end()) - rhs_idf = alias.find(rhs_idf->shortName())->second->as(); - - if (lhs_idf->shortName() == rhs_idf->shortName()) - return true; - } - } - return false; -} - -bool expressionListEquals(ASTExpressionList * lhs, ASTExpressionList * rhs, const Aliases & alias) -{ - if (!lhs || !rhs) - return false; - if (lhs->children.size() != rhs->children.size()) - return false; - for (size_t i = 0; i < lhs->children.size(); i++) - { - if (!expressionEquals(lhs->children[i], rhs->children[i], alias)) - return false; - } - return true; -} - -/// Test whether lhs contains all expressions in rhs. -bool expressionListContainsAll(ASTExpressionList * lhs, ASTExpressionList * rhs, const Aliases & alias) -{ - if (!lhs || !rhs) - return false; - if (lhs->children.size() < rhs->children.size()) - return false; - for (const auto & re : rhs->children) - { - auto predicate = [&re, &alias](ASTPtr & le) { return expressionEquals(le, re, alias); }; - if (std::find_if(lhs->children.begin(), lhs->children.end(), predicate) == lhs->children.end()) - return false; - } - return true; -} - -} - -void RewriteUniqToCountMatcher::visit(ASTPtr & ast, Data & /*data*/) -{ - auto * selectq = ast->as(); - if (!selectq || !selectq->tables() || selectq->tables()->children.size() != 1) - return; - auto expr_list = selectq->select(); - if (!expr_list || expr_list->children.size() != 1) - return; - auto * func = expr_list->children[0]->as(); - if (!func || !matchFnUniq(func->name)) - return; - if (selectq->tables()->as()->children[0]->as()->children.size() != 1) - return; - auto * table_expr = selectq->tables() - ->as() - ->children[0] - ->as() - ->children[0] - ->as(); - if (!table_expr || table_expr->children.size() != 1 || !table_expr->subquery) - return; - auto * subquery = table_expr->subquery->as(); - if (!subquery) - return; - auto * sub_selectq = subquery->children[0] - ->as()->children[0] - ->as()->children[0] - ->as(); - if (!sub_selectq) - return; - auto sub_expr_list = sub_selectq->select(); - if (!sub_expr_list) - return; - - /// collect subquery select expressions alias - Aliases alias; - for (const auto & expr : sub_expr_list->children) - { - if (!expr->tryGetAlias().empty()) - alias.insert({expr->tryGetAlias(), expr}); - } - - /// Whether query matches 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' - auto match_subquery_with_distinct = [&]() -> bool - { - if (!sub_selectq->distinct) - return false; - /// uniq expression list == subquery group by expression list - if (!expressionListEquals(func->children[0]->as(), sub_expr_list->as(), alias)) - return false; - return true; - }; - - /// Whether query matches 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)' - auto match_subquery_with_group_by = [&]() -> bool - { - auto group_by = sub_selectq->groupBy(); - if (!group_by) - return false; - /// uniq expression list == subquery group by expression list - if (!expressionListEquals(func->children[0]->as(), group_by->as(), alias)) - return false; - /// subquery select expression list must contain all columns in uniq expression list - if (!expressionListContainsAll(sub_expr_list->as(), func->children[0]->as(), alias)) - return false; - return true; - }; - - if (match_subquery_with_distinct() || match_subquery_with_group_by()) - expr_list->children[0] = makeASTFunction("count"); -} - -} diff --git a/src/Interpreters/RewriteUniqToCountVisitor.h b/src/Interpreters/RewriteUniqToCountVisitor.h deleted file mode 100644 index 94528ccf2ee..00000000000 --- a/src/Interpreters/RewriteUniqToCountVisitor.h +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once - -#include -#include -#include "Interpreters/TreeRewriter.h" - -namespace DB -{ - -class ASTFunction; - -/** Optimize `uniq` into `count` over subquery. - * Example: 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' to - * Result: 'SELECT count() FROM (SELECT DISTINCT x ...)' - * - * Example: 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)' to - * Result: 'SELECT count() FROM (SELECT x ... GROUP BY x ...)' - * - * Note that we can rewrite all uniq variants except uniqUpTo. - */ -class RewriteUniqToCountMatcher -{ -public: - struct Data {}; - static void visit(ASTPtr & ast, Data &); - static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } -}; - -using RewriteUniqToCountVisitor = InDepthNodeVisitor; -} diff --git a/tests/integration/test_rewrite_uniq_to_count/__init__.py b/tests/integration/test_rewrite_uniq_to_count/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_rewrite_uniq_to_count/test.py b/tests/integration/test_rewrite_uniq_to_count/test.py deleted file mode 100644 index e38e57f5cee..00000000000 --- a/tests/integration/test_rewrite_uniq_to_count/test.py +++ /dev/null @@ -1,127 +0,0 @@ -import pytest -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node") - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - prepare() - yield cluster - finally: - shutdown() - cluster.shutdown() - - -def prepare(): - node.query( - """ - CREATE TABLE IF NOT EXISTS test_rewrite_uniq_to_count - ( - `a` UInt8, - `b` UInt8, - `c` UInt8 - ) - ENGINE = MergeTree - ORDER BY `a` - """ - ) - node.query( - "INSERT INTO test_rewrite_uniq_to_count values ('1', '1', '1'), ('1', '1', '1')" - ) - node.query( - "INSERT INTO test_rewrite_uniq_to_count values ('2', '2', '2'), ('2', '2', '2')" - ) - node.query( - "INSERT INTO test_rewrite_uniq_to_count values ('3', '3', '3'), ('3', '3', '3')" - ) - - -def shutdown(): - node.query("DROP TABLE IF EXISTS test_rewrite_uniq_to_count SYNC") - - -def check(query, result): - # old analyzer - query = query + " settings optimize_uniq_to_count = 1" - assert node.query(query) == f"{result}\n" - assert "count()" in node.query("EXPLAIN SYNTAX " + query) - - # new analyzer - query = query + ", allow_experimental_analyzer = 1" - assert node.query(query) == f"{result}\n" - assert "count()" in node.query("EXPLAIN QUERY TREE " + query) - - -def check_by_old_analyzer(query, result): - # only old analyzer - query = query + " settings optimize_uniq_to_count = 1" - assert node.query(query) == f"{result}\n" - assert "count()" in node.query("EXPLAIN SYNTAX " + query) - - -def test_rewrite_distinct(started_cluster): - # simple test - check( - "SELECT uniq(a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count)", - 3, - ) - - # test subquery alias - check( - "SELECT uniq(t.a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) t", - 3, - ) - - # test compound column name - check( - "SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a FROM test_rewrite_uniq_to_count) t", - 3, - ) - - # test select expression alias - check( - "SELECT uniq(alias_of_a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a as alias_of_a FROM test_rewrite_uniq_to_count) t", - 3, - ) - - # test select expression alias - check( - "SELECT uniq(alias_of_a) FROM (SELECT DISTINCT a as alias_of_a FROM test_rewrite_uniq_to_count) t", - 3, - ) - - -def test_rewrite_group_by(started_cluster): - # simple test - check( - "SELECT uniq(a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a)", - 3, - ) - - # test subquery alias - check( - "SELECT uniq(t.a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", - 3, - ) - - # test select expression alias - check( - "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", - 3, - ) - - # test select expression alias - check( - "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", - 3, - ) - - # test select expression alias - check( - "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", - 3, - ) From 2cc1ac45dd8dda3385e2df1db9ea4fab1789a585 Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 25 Jul 2023 18:45:56 +0200 Subject: [PATCH 1984/1997] update missed error --- src/Functions/FunctionToDecimalString.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionToDecimalString.h b/src/Functions/FunctionToDecimalString.h index a965e2c2c90..ce52d8b99f6 100644 --- a/src/Functions/FunctionToDecimalString.h +++ b/src/Functions/FunctionToDecimalString.h @@ -214,7 +214,7 @@ private: ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const { const auto * precision_col = checkAndGetColumn>(arguments[1].column.get()); - const auto * precision_col_const = typeid_cast(arguments[1].column.get()); + const auto * precision_col_const = checkAndGetColumnConst>(arguments[1].column.get()); auto result_col = ColumnString::create(); auto * result_col_string = assert_cast(result_col.get()); From 413ec520b3027d9f377aa1929a2855429994ffe3 Mon Sep 17 00:00:00 2001 From: Sanjam Panda Date: Tue, 25 Jul 2023 18:54:27 +0200 Subject: [PATCH 1985/1997] fix code style --- src/Common/HashTable/TwoLevelStringHashTable.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Common/HashTable/TwoLevelStringHashTable.h b/src/Common/HashTable/TwoLevelStringHashTable.h index 0527ec67e6e..54c208c5b60 100644 --- a/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/src/Common/HashTable/TwoLevelStringHashTable.h @@ -113,20 +113,20 @@ public: if ((reinterpret_cast(p) & 2048) == 0) { memcpy(&n[0], p, 8); - if constexpr (std::endian::native == std::endian::little) + if constexpr (std::endian::native == std::endian::little) n[0] &= -1ULL >> s; else n[0] &= -1ULL << s; - } + } else { const char * lp = x.data + x.size - 8; memcpy(&n[0], lp, 8); - if constexpr (std::endian::native == std::endian::little) + if constexpr (std::endian::native == std::endian::little) n[0] >>= s; else n[0] <<= s; - } + } auto res = hash(k8); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); @@ -139,9 +139,9 @@ public: memcpy(&n[1], lp, 8); if constexpr (std::endian::native == std::endian::little) n[1] >>= s; - else + else n[1] <<= s; - auto res = hash(k16); + auto res = hash(k16); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); return func(self.impls[buck].m2, k16, res); @@ -153,9 +153,9 @@ public: memcpy(&n[2], lp, 8); if constexpr (std::endian::native == std::endian::little) n[2] >>= s; - else + else n[2] <<= s; - auto res = hash(k24); + auto res = hash(k24); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); return func(self.impls[buck].m3, k24, res); From 4f7bdf308d215478a718e1fe3c157c043702213e Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 25 Jul 2023 18:57:54 +0200 Subject: [PATCH 1986/1997] add explicit else --- src/Functions/FunctionToDecimalString.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionToDecimalString.h b/src/Functions/FunctionToDecimalString.h index ce52d8b99f6..3dd946203cc 100644 --- a/src/Functions/FunctionToDecimalString.h +++ b/src/Functions/FunctionToDecimalString.h @@ -38,7 +38,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args = { - {"Value", nullptr, nullptr, nullptr}, + {"Value", &isNumber, nullptr, "Number"}, {"precision", &isNativeInteger, &isColumnConst, "const Integer"} }; @@ -230,8 +230,10 @@ private: { if (precision_col_const) vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets, from_scale); - else + else if (precision_col) vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, from_scale); + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second argument of function formatDecimal", arguments[1].column->getName()); } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName()); @@ -243,8 +245,11 @@ private: { if (precision_col_const) vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets); - else + else if (precision_col) vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets); + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second argument of function formatDecimal", arguments[1].column->getName()); + } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName()); From 59db21941034a287eea6c1016ed2ca83e6772774 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 25 Jul 2023 19:21:41 +0200 Subject: [PATCH 1987/1997] Fix possible error "Cannot drain connections: cancel first" --- src/QueryPipeline/RemoteQueryExecutor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index cd6f65b7b43..198c3265a84 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -591,8 +591,8 @@ void RemoteQueryExecutor::finish() /// Send the request to abort the execution of the request, if not already sent. tryCancel("Cancelling query because enough data has been read"); - /// If connections weren't created yet or query wasn't sent, nothing to do. - if (!connections || !sent_query) + /// If connections weren't created yet, query wasn't sent or was already finished, nothing to do. + if (!connections || !sent_query || finished) return; /// Get the remaining packets so that there is no out of sync in the connections to the replicas. From d78b3e560f13a6ba8b85b76e2f0d56bea44f2c62 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 25 Jul 2023 17:45:13 +0000 Subject: [PATCH 1988/1997] Fix 02497_trace_events_stress_long again --- .../0_stateless/02497_trace_events_stress_long.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02497_trace_events_stress_long.sh b/tests/queries/0_stateless/02497_trace_events_stress_long.sh index 91f6a9bb541..c111ed40a29 100755 --- a/tests/queries/0_stateless/02497_trace_events_stress_long.sh +++ b/tests/queries/0_stateless/02497_trace_events_stress_long.sh @@ -45,4 +45,11 @@ thread2 $TIMEOUT >/dev/null & wait -$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id LIKE '02497_$CLICKHOUSE_DATABASE%'" | rg '^0$' \ No newline at end of file +for _ in {1..10} +do + # process list is cleaned after everything is sent to client + # so this check can be run before process list is cleaned + # to avoid spurious failures we retry the check couple of times + $CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id LIKE '02497_$CLICKHOUSE_DATABASE%'" | rg '^0$' && break + sleep 1 +done \ No newline at end of file From 20300804b13187447e8677573b46ee70175c98cc Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 26 Jul 2023 00:01:46 +0300 Subject: [PATCH 1989/1997] Update test.py --- .../test_replicated_merge_tree_wait_on_shutdown/test.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py index d1373d44d0f..67dd03098e9 100644 --- a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py @@ -55,16 +55,7 @@ def test_shutdown_and_wait(start_cluster): node1.query(f"INSERT INTO test_table VALUES ({value})") with PartitionManager() as pm: - assert node2.query("SELECT * FROM test_table") == "0\n" pm.partition_instances(node1, node2) - # iptables rules must be applied immediately, but looks like sometimes they are not... - assert_eq_with_retry( - node1, - "select count() from remote('node1,node2', 'system.one')", - "1\n", - settings={"skip_unavailable_shards": 1}, - ) - p.map(insert, range(1, 50)) # Start shutdown async From d85f9ddb35f02564fe9d04f20f0a3451530a2b4c Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 26 Jul 2023 00:03:08 +0300 Subject: [PATCH 1990/1997] Update parallel_skip.json --- tests/integration/parallel_skip.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 407fe7d1b01..1075fbaa0f8 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -69,6 +69,8 @@ "test_server_reload/test.py::test_remove_tcp_port", "test_keeper_map/test.py::test_keeper_map_without_zk", + + "test_replicated_merge_tree_wait_on_shutdown/test.py::test_shutdown_and_wait", "test_http_failover/test.py::test_url_destination_host_with_multiple_addrs", "test_http_failover/test.py::test_url_invalid_hostname", From 3928f7ef460f4f4603ceaa065733ac0a7ebc4d16 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 26 Jul 2023 09:19:35 +0200 Subject: [PATCH 1991/1997] Remove peak memory usage from the final message in the client --- src/Common/ProgressIndication.cpp | 3 --- tests/queries/0_stateless/01921_test_progress_bar.py | 1 - 2 files changed, 4 deletions(-) diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp index 960d864660c..5a1929d4ec2 100644 --- a/src/Common/ProgressIndication.cpp +++ b/src/Common/ProgressIndication.cpp @@ -101,9 +101,6 @@ void ProgressIndication::writeFinalProgress() << formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.)"; else std::cout << ". "; - auto peak_memory_usage = getMemoryUsage().peak; - if (peak_memory_usage >= 0) - std::cout << "\nPeak memory usage (for query) " << formatReadableSizeWithBinarySuffix(peak_memory_usage) << "."; } void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message) diff --git a/tests/queries/0_stateless/01921_test_progress_bar.py b/tests/queries/0_stateless/01921_test_progress_bar.py index 9ce2168e2ae..89eecbc3987 100755 --- a/tests/queries/0_stateless/01921_test_progress_bar.py +++ b/tests/queries/0_stateless/01921_test_progress_bar.py @@ -17,4 +17,3 @@ with client(name="client1>", log=log) as client1: client1.send("SELECT number FROM numbers(1000) FORMAT Null") client1.expect("Progress: 1\.00 thousand rows, 8\.00 KB .*" + end_of_block) client1.expect("0 rows in set. Elapsed: [\\w]{1}\.[\\w]{3} sec.") - client1.expect("Peak memory usage \(for query\) .*B" + end_of_block) From 93e10077bad715235dfe7d4da6d103ffbb30f55a Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 26 Jul 2023 11:53:19 +0200 Subject: [PATCH 1992/1997] Fix attaching gdb in stress tests (#51445) * Fix attaching gdb in stress tests * Fix * Update run.sh * Try remove run_with_retry * Return run_with_retry * Don't set -e in run_with_retry if it was't set before * Update tests/ci/utils.lib * Fix bash --------- Co-authored-by: Alexander Tokmakov --- docker/test/stress/run.sh | 3 ++- docker/test/upgrade/run.sh | 1 + tests/ci/stress_tests.lib | 2 -- tests/ci/utils.lib | 11 +++++++++-- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 4926967d2d2..9217fcfddd9 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -14,6 +14,7 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib +source /usr/share/clickhouse-test/ci/attach_gdb.lib source /usr/share/clickhouse-test/ci/stress_tests.lib install_packages package_folder @@ -52,7 +53,7 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & start -shellcheck disable=SC2086 # No quotes because I want to split it into words. +# shellcheck disable=SC2086 # No quotes because I want to split it into words. /s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS chmod 777 -R /var/lib/clickhouse clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary" diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index b8061309342..73a2965bf44 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -16,6 +16,7 @@ ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_pre # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib +source /usr/share/clickhouse-test/ci/attach_gdb.lib source /usr/share/clickhouse-test/ci/stress_tests.lib azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & diff --git a/tests/ci/stress_tests.lib b/tests/ci/stress_tests.lib index 190f3f39f9e..85b376ac39d 100644 --- a/tests/ci/stress_tests.lib +++ b/tests/ci/stress_tests.lib @@ -9,8 +9,6 @@ FAIL="\tFAIL\t\\N\t" FAILURE_CONTEXT_LINES=100 FAILURE_CONTEXT_MAX_LINE_WIDTH=300 -source attach_gdb.lib - function escaped() { # That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language. diff --git a/tests/ci/utils.lib b/tests/ci/utils.lib index b5ce4ae0d78..c90b7ebe6f6 100644 --- a/tests/ci/utils.lib +++ b/tests/ci/utils.lib @@ -2,6 +2,11 @@ function run_with_retry() { + if [[ $- =~ e ]]; then + set_e=true + else + set_e=false + fi set +e local total_retries="$1" @@ -12,7 +17,9 @@ function run_with_retry() until [ "$retry" -ge "$total_retries" ] do if "$@"; then - set -e + if $set_e; then + set -e + fi return else retry=$((retry + 1)) @@ -26,4 +33,4 @@ function run_with_retry() function fn_exists() { declare -F "$1" > /dev/null; -} \ No newline at end of file +} From 017d34d40fdd8fe5b03e993b030385ccb20b0ebc Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 28 Jun 2023 23:41:51 +0200 Subject: [PATCH 1993/1997] determine task size by prewhere columns --- src/Core/Settings.h | 1 + src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp | 9 ++++++--- src/Storages/MergeTree/MergeTreeReadPool.cpp | 6 ++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 24be644ee55..d14121a97a3 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -673,6 +673,7 @@ class IColumn; M(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead of read with ignore.", 0) \ M(UInt64, merge_tree_min_bytes_per_task_for_remote_reading, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes to read per task.", 0) \ M(Bool, merge_tree_use_const_size_tasks_for_remote_reading, true, "Whether to use constant size tasks for reading from a remote table.", 0) \ + M(Bool, merge_tree_determine_task_size_by_prewhere_columns, true, "Whether to use only prewhere columns size to determine reading task size.", 0) \ \ M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \ M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \ diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index fbad7d2f7be..e9e2138d995 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -328,7 +328,10 @@ MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInf for (const auto & range : part.ranges) part_info->sum_marks += range.end - range.begin; - part_info->approx_size_of_mark = getApproximateSizeOfGranule(*part_info->data_part, column_names); + const auto & columns = settings.merge_tree_determine_task_size_by_prewhere_columns && prewhere_info + ? prewhere_info->prewhere_actions->getRequiredColumnsNames() + : column_names; + part_info->approx_size_of_mark = getApproximateSizeOfGranule(*part_info->data_part, columns); const auto task_columns = getReadTaskColumns( part_reader_info, @@ -369,9 +372,9 @@ MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInf } if (prewhere_info) { - for (const auto & columns : task_columns.pre_columns) + for (const auto & cols : task_columns.pre_columns) { - for (const auto & col : columns) + for (const auto & col : cols) { const size_t col_size = part.data_part->getColumnSize(col.name).data_compressed; part_info->estimated_memory_usage_for_single_prefetch += std::min(col_size, settings.prefetch_buffer_size); diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 2ab90189f9d..896769d9355 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -73,8 +73,10 @@ MergeTreeReadPool::MergeTreeReadPool( size_t total_marks = 0; for (const auto & part : parts_ranges) { - total_compressed_bytes += getApproxSizeOfPart( - *part.data_part, prewhere_info ? prewhere_info->prewhere_actions->getRequiredColumnsNames() : column_names_); + const auto & columns = settings.merge_tree_determine_task_size_by_prewhere_columns && prewhere_info + ? prewhere_info->prewhere_actions->getRequiredColumnsNames() + : column_names_; + total_compressed_bytes += getApproxSizeOfPart(*part.data_part, columns); total_marks += part.getMarksCount(); } From 04180549b094c231a01642cb70fa051bed2f7abb Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 26 Jul 2023 13:15:58 +0200 Subject: [PATCH 1994/1997] Fix possible double-free in Aggregator (#52439) --- src/Interpreters/Aggregator.cpp | 6 ++++-- .../test.py | 2 +- .../0_stateless/02355_control_block_size_in_aggregator.sql | 3 ++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index c7d4b87694b..36cd32910b5 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -2020,7 +2020,8 @@ template NO_INLINE Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t) const { - const size_t max_block_size = params.max_block_size; + /// +1 for nullKeyData, if `data` doesn't have it - not a problem, just some memory for one excessive row will be preallocated + const size_t max_block_size = (return_single_block ? data.size() : std::min(params.max_block_size, data.size())) + 1; const bool final = true; ConvertToBlockRes res; @@ -2097,7 +2098,8 @@ template Aggregator::ConvertToBlockRes NO_INLINE Aggregator::convertToBlockImplNotFinal(Method & method, Table & data, Arenas & aggregates_pools, size_t) const { - const size_t max_block_size = params.max_block_size; + /// +1 for nullKeyData, if `data` doesn't have it - not a problem, just some memory for one excessive row will be preallocated + const size_t max_block_size = (return_single_block ? data.size() : std::min(params.max_block_size, data.size())) + 1; const bool final = false; ConvertToBlockRes res; diff --git a/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py index faa38af6533..e66631460f7 100644 --- a/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py +++ b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py @@ -68,7 +68,7 @@ def test_distributed_directory_monitor_split_batch_on_failure_OFF(started_cluste settings={ # max_memory_usage is the limit for the batch on the remote node # (local query should not be affected since 30MB is enough for 100K rows) - "max_memory_usage": "30Mi", + "max_memory_usage": "20Mi", "max_untracked_memory": "0", }, ) diff --git a/tests/queries/0_stateless/02355_control_block_size_in_aggregator.sql b/tests/queries/0_stateless/02355_control_block_size_in_aggregator.sql index b4754c6d6fe..f9f9661a7c4 100644 --- a/tests/queries/0_stateless/02355_control_block_size_in_aggregator.sql +++ b/tests/queries/0_stateless/02355_control_block_size_in_aggregator.sql @@ -1,6 +1,7 @@ SET max_block_size = 4213; -SELECT DISTINCT (blockSize() <= 4213) +--- We allocate space for one more row in case nullKeyData is present. +SELECT DISTINCT (blockSize() <= 4214) FROM ( SELECT number From 0a838dc6d19af963a021aa1910f2144839f21d4a Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 26 Jul 2023 18:30:18 +0200 Subject: [PATCH 1995/1997] Revert "Remove `mmap/mremap/munmap` from Allocator.h" (#52589) --- src/Common/Allocator.cpp | 26 ++- src/Common/Allocator.h | 182 ++++++++++++++---- src/Common/Allocator_fwd.h | 2 +- src/Common/CurrentMetrics.cpp | 2 + src/Common/HashTable/HashTableAllocator.h | 2 +- .../01778_mmap_cache_infra.reference | 2 + 6 files changed, 177 insertions(+), 39 deletions(-) diff --git a/src/Common/Allocator.cpp b/src/Common/Allocator.cpp index 769df70d71e..0fb90e5a47e 100644 --- a/src/Common/Allocator.cpp +++ b/src/Common/Allocator.cpp @@ -1,4 +1,26 @@ #include "Allocator.h" -template class Allocator; -template class Allocator; +/** Keep definition of this constant in cpp file; otherwise its value + * is inlined into allocator code making it impossible to override it + * in third-party code. + * + * Note: extern may seem redundant, but is actually needed due to bug in GCC. + * See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html + */ +#ifdef NDEBUG + __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 128 * (1ULL << 20); +#else + /** + * In debug build, use small mmap threshold to reproduce more memory + * stomping bugs. Along with ASLR it will hopefully detect more issues than + * ASan. The program may fail due to the limit on number of memory mappings. + * + * Not too small to avoid too quick exhaust of memory mappings. + */ + __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 16384; +#endif + +template class Allocator; +template class Allocator; +template class Allocator; +template class Allocator; diff --git a/src/Common/Allocator.h b/src/Common/Allocator.h index 1e77e988326..5180fbdaa2d 100644 --- a/src/Common/Allocator.h +++ b/src/Common/Allocator.h @@ -36,26 +36,51 @@ #include +/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +/** + * Many modern allocators (for example, tcmalloc) do not do a mremap for + * realloc, even in case of large enough chunks of memory. Although this allows + * you to increase performance and reduce memory consumption during realloc. + * To fix this, we do mremap manually if the chunk of memory is large enough. + * The threshold (64 MB) is chosen quite large, since changing the address + * space is very slow, especially in the case of a large number of threads. We + * expect that the set of operations mmap/something to do/mremap can only be + * performed about 1000 times per second. + * + * P.S. This is also required, because tcmalloc can not allocate a chunk of + * memory greater than 16 GB. + * + * P.P.S. Note that MMAP_THRESHOLD symbol is intentionally made weak. It allows + * to override it during linkage when using ClickHouse as a library in + * third-party applications which may already use own allocator doing mmaps + * in the implementation of alloc/realloc. + */ +extern const size_t MMAP_THRESHOLD; + static constexpr size_t MALLOC_MIN_ALIGNMENT = 8; +namespace CurrentMetrics +{ + extern const Metric MMappedAllocs; + extern const Metric MMappedAllocBytes; +} + namespace DB { - namespace ErrorCodes { + extern const int BAD_ARGUMENTS; extern const int CANNOT_ALLOCATE_MEMORY; + extern const int CANNOT_MUNMAP; + extern const int CANNOT_MREMAP; extern const int LOGICAL_ERROR; } - } -/** Previously there was a code which tried to use manual mmap and mremap (clickhouse_mremap.h) for large allocations/reallocations (64MB+). - * Most modern allocators (including jemalloc) don't use mremap, so the idea was to take advantage from mremap system call for large reallocs. - * Actually jemalloc had support for mremap, but it was intentionally removed from codebase https://github.com/jemalloc/jemalloc/commit/e2deab7a751c8080c2b2cdcfd7b11887332be1bb. - * Our performance tests also shows that without manual mmap/mremap/munmap clickhouse is overall faster for about 1-2% and up to 5-7x for some types of queries. - * That is why we don't do manuall mmap/mremap/munmap here and completely rely on jemalloc for allocations of any size. - */ - /** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena. * Also used in hash tables. * The interface is different from std::allocator @@ -63,8 +88,10 @@ namespace ErrorCodes * - passing the size into the `free` method; * - by the presence of the `alignment` argument; * - the possibility of zeroing memory (used in hash tables); + * - random hint address for mmap + * - mmap_threshold for using mmap less or more */ -template +template class Allocator { public: @@ -82,7 +109,7 @@ public: try { checkSize(size); - freeNoTrack(buf); + freeNoTrack(buf, size); CurrentMemoryTracker::free(size); } catch (...) @@ -105,26 +132,49 @@ public: /// nothing to do. /// BTW, it's not possible to change alignment while doing realloc. } - else if (alignment <= MALLOC_MIN_ALIGNMENT) + else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD + && alignment <= MALLOC_MIN_ALIGNMENT) { /// Resize malloc'd memory region with no special alignment requirement. CurrentMemoryTracker::realloc(old_size, new_size); void * new_buf = ::realloc(buf, new_size); if (nullptr == new_buf) - { - DB::throwFromErrno( - fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); - } + DB::throwFromErrno(fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); buf = new_buf; if constexpr (clear_memory) if (new_size > old_size) memset(reinterpret_cast(buf) + old_size, 0, new_size - old_size); } + else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD) + { + /// Resize mmap'd memory region. + CurrentMemoryTracker::realloc(old_size, new_size); + + // On apple and freebsd self-implemented mremap used (common/mremap.h) + buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, + PROT_READ | PROT_WRITE, mmap_flags, -1, 0); + if (MAP_FAILED == buf) + DB::throwFromErrno(fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.", + ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_MREMAP); + + /// No need for zero-fill, because mmap guarantees it. + } + else if (new_size < MMAP_THRESHOLD) + { + /// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once. + CurrentMemoryTracker::realloc(old_size, new_size); + + void * new_buf = allocNoTrack(new_size, alignment); + memcpy(new_buf, buf, std::min(old_size, new_size)); + freeNoTrack(buf, old_size); + buf = new_buf; + } else { /// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods. + void * new_buf = alloc(new_size, alignment); memcpy(new_buf, buf, std::min(old_size, new_size)); free(buf, old_size); @@ -142,38 +192,83 @@ protected: static constexpr bool clear_memory = clear_memory_; + // Freshly mmapped pages are copy-on-write references to a global zero page. + // On the first write, a page fault occurs, and an actual writable page is + // allocated. If we are going to use this memory soon, such as when resizing + // hash tables, it makes sense to pre-fault the pages by passing + // MAP_POPULATE to mmap(). This takes some time, but should be faster + // overall than having a hot loop interrupted by page faults. + // It is only supported on Linux. + static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS +#if defined(OS_LINUX) + | (mmap_populate ? MAP_POPULATE : 0) +#endif + ; + private: void * allocNoTrack(size_t size, size_t alignment) { void * buf; - if (alignment <= MALLOC_MIN_ALIGNMENT) - { - if constexpr (clear_memory) - buf = ::calloc(size, 1); - else - buf = ::malloc(size); + size_t mmap_min_alignment = ::getPageSize(); - if (nullptr == buf) - DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + if (size >= MMAP_THRESHOLD) + { + if (alignment > mmap_min_alignment) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, + "Too large alignment {}: more than page size when allocating {}.", + ReadableSize(alignment), ReadableSize(size)); + + buf = mmap(getMmapHint(), size, PROT_READ | PROT_WRITE, + mmap_flags, -1, 0); + if (MAP_FAILED == buf) + DB::throwFromErrno(fmt::format("Allocator: Cannot mmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + /// No need for zero-fill, because mmap guarantees it. + + CurrentMetrics::add(CurrentMetrics::MMappedAllocs); + CurrentMetrics::add(CurrentMetrics::MMappedAllocBytes, size); } else { - buf = nullptr; - int res = posix_memalign(&buf, alignment, size); + if (alignment <= MALLOC_MIN_ALIGNMENT) + { + if constexpr (clear_memory) + buf = ::calloc(size, 1); + else + buf = ::malloc(size); - if (0 != res) - DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)), - DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res); + if (nullptr == buf) + DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + } + else + { + buf = nullptr; + int res = posix_memalign(&buf, alignment, size); - if constexpr (clear_memory) - memset(buf, 0, size); + if (0 != res) + DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)), + DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res); + + if constexpr (clear_memory) + memset(buf, 0, size); + } } return buf; } - void freeNoTrack(void * buf) + void freeNoTrack(void * buf, size_t size) { - ::free(buf); + if (size >= MMAP_THRESHOLD) + { + if (0 != munmap(buf, size)) + DB::throwFromErrno(fmt::format("Allocator: Cannot munmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_MUNMAP); + + CurrentMetrics::sub(CurrentMetrics::MMappedAllocs); + CurrentMetrics::sub(CurrentMetrics::MMappedAllocBytes, size); + } + else + { + ::free(buf); + } } void checkSize(size_t size) @@ -182,6 +277,21 @@ private: if (size >= 0x8000000000000000ULL) throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Too large size ({}) passed to allocator. It indicates an error.", size); } + +#ifndef NDEBUG + /// In debug builds, request mmap() at random addresses (a kind of ASLR), to + /// reproduce more memory stomping bugs. Note that Linux doesn't do it by + /// default. This may lead to worse TLB performance. + void * getMmapHint() + { + return reinterpret_cast(std::uniform_int_distribution(0x100000000000UL, 0x700000000000UL)(thread_local_rng)); + } +#else + void * getMmapHint() + { + return nullptr; + } +#endif }; @@ -257,5 +367,7 @@ constexpr size_t allocatorInitialBytes; -extern template class Allocator; +extern template class Allocator; +extern template class Allocator; +extern template class Allocator; +extern template class Allocator; diff --git a/src/Common/Allocator_fwd.h b/src/Common/Allocator_fwd.h index a96bc2a503b..a13a4398654 100644 --- a/src/Common/Allocator_fwd.h +++ b/src/Common/Allocator_fwd.h @@ -3,7 +3,7 @@ * This file provides forward declarations for Allocator. */ -template +template class Allocator; template diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 583b13cf79d..e290fc8ccd3 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -173,6 +173,8 @@ M(PartsInMemory, "In-memory parts.") \ M(MMappedFiles, "Total number of mmapped files.") \ M(MMappedFileBytes, "Sum size of mmapped file regions.") \ + M(MMappedAllocs, "Total number of mmapped allocations") \ + M(MMappedAllocBytes, "Sum bytes of mmapped allocations") \ M(AsynchronousReadWait, "Number of threads waiting for asynchronous read.") \ M(PendingAsyncInsert, "Number of asynchronous inserts that are waiting for flush.") \ M(KafkaConsumers, "Number of active Kafka consumers") \ diff --git a/src/Common/HashTable/HashTableAllocator.h b/src/Common/HashTable/HashTableAllocator.h index 8252265111d..47e3fdfc4b6 100644 --- a/src/Common/HashTable/HashTableAllocator.h +++ b/src/Common/HashTable/HashTableAllocator.h @@ -8,7 +8,7 @@ * table, so it makes sense to pre-fault the pages so that page faults don't * interrupt the resize loop. Set the allocator parameter accordingly. */ -using HashTableAllocator = Allocator; +using HashTableAllocator = Allocator; template using HashTableAllocatorWithStackMemory = AllocatorWithStackMemory; diff --git a/tests/queries/0_stateless/01778_mmap_cache_infra.reference b/tests/queries/0_stateless/01778_mmap_cache_infra.reference index 0e82b277bc1..ed365028ecc 100644 --- a/tests/queries/0_stateless/01778_mmap_cache_infra.reference +++ b/tests/queries/0_stateless/01778_mmap_cache_infra.reference @@ -2,5 +2,7 @@ CreatedReadBufferMMap CreatedReadBufferMMapFailed MMappedFileCacheHits MMappedFileCacheMisses +MMappedAllocBytes +MMappedAllocs MMappedFileBytes MMappedFiles From d89e2e6a27746dbb8febd2990d1ed3c23fcf153b Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Wed, 26 Jul 2023 19:58:41 +0200 Subject: [PATCH 1996/1997] Add SYSTEM STOP LISTEN query (#51016) Co-authored-by: Nikita Mikhaylov Co-authored-by: Nikita Mikhaylov --- docs/en/sql-reference/statements/system.md | 26 + programs/keeper/CMakeLists.txt | 1 + programs/server/Server.cpp | 448 +++++++++++------- programs/server/Server.h | 14 +- src/Access/Common/AccessType.h | 1 + src/Interpreters/Context.cpp | 34 ++ src/Interpreters/Context.h | 8 + src/Interpreters/InterpreterSystemQuery.cpp | 19 +- src/Parsers/ASTSystemQuery.cpp | 11 + src/Parsers/ASTSystemQuery.h | 7 +- src/Parsers/ParserSystemQuery.cpp | 36 ++ src/Parsers/examples/CMakeLists.txt | 4 +- src/Server/ServerType.cpp | 138 ++++++ src/Server/ServerType.h | 44 ++ .../test_system_start_stop_listen/__init__.py | 0 .../configs/cluster.xml | 16 + .../test_system_start_stop_listen/test.py | 40 ++ .../01271_show_privileges.reference | 1 + .../02117_show_create_table_system.reference | 6 +- 19 files changed, 663 insertions(+), 191 deletions(-) create mode 100644 src/Server/ServerType.cpp create mode 100644 src/Server/ServerType.h create mode 100644 tests/integration/test_system_start_stop_listen/__init__.py create mode 100644 tests/integration/test_system_start_stop_listen/configs/cluster.xml create mode 100644 tests/integration/test_system_start_stop_listen/test.py diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 65a35f03fbe..fb601cd5d35 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -414,3 +414,29 @@ Will do sync syscall. ```sql SYSTEM SYNC FILE CACHE [ON CLUSTER cluster_name] ``` + + +### SYSTEM STOP LISTEN + +Closes the socket and gracefully terminates the existing connections to the server on the specified port with the specified protocol. + +However, if the corresponding protocol settings were not specified in the clickhouse-server configuration, this command will have no effect. + +```sql +SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] +``` + +- If `CUSTOM 'protocol'` modifier is specified, the custom protocol with the specified name defined in the protocols section of the server configuration will be stopped. +- If `QUERIES ALL` modifier is specified, all protocols are stopped. +- If `QUERIES DEFAULT` modifier is specified, all default protocols are stopped. +- If `QUERIES CUSTOM` modifier is specified, all custom protocols are stopped. + +### SYSTEM START LISTEN + +Allows new connections to be established on the specified protocols. + +However, if the server on the specified port and protocol was not stopped using the SYSTEM STOP LISTEN command, this command will have no effect. + +```sql +SYSTEM START LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] +``` diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index abf31a7a499..43a8d84b513 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -65,6 +65,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusRequestHandler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusMetricsWriter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ServerType.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 960b6574633..dce52ecdb12 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1457,6 +1457,24 @@ try access_control.reload(AccessControl::ReloadMode::USERS_CONFIG_ONLY); }); + global_context->setStopServersCallback([&](const ServerType & server_type) + { + stopServers(servers, server_type); + }); + + global_context->setStartServersCallback([&](const ServerType & server_type) + { + createServers( + config(), + listen_hosts, + listen_try, + server_pool, + async_metrics, + servers, + /* start_servers= */ true, + server_type); + }); + /// Limit on total number of concurrently executed queries. global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries); @@ -1998,7 +2016,8 @@ void Server::createServers( Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, std::vector & servers, - bool start_servers) + bool start_servers, + const ServerType & server_type) { const Settings & settings = global_context->getSettingsRef(); @@ -2012,6 +2031,9 @@ void Server::createServers( for (const auto & protocol : protocols) { + if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol)) + continue; + std::vector hosts; if (config.has("protocols." + protocol + ".host")) hosts.push_back(config.getString("protocols." + protocol + ".host")); @@ -2058,162 +2080,190 @@ void Server::createServers( for (const auto & listen_host : listen_hosts) { - /// HTTP - const char * port_name = "http_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); + const char * port_name; - return ProtocolServerAdapter( - listen_host, - port_name, - "http://" + address.toString(), - std::make_unique( - httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params)); - }); - - /// HTTPS - port_name = "https_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::HTTP)) { + /// HTTP + port_name = "http_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + + return ProtocolServerAdapter( + listen_host, + port_name, + "http://" + address.toString(), + std::make_unique( + httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params)); + }); + } + + if (server_type.shouldStart(ServerType::Type::HTTPS)) + { + /// HTTPS + port_name = "https_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { #if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "https://" + address.toString(), - std::make_unique( - httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params)); + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "https://" + address.toString(), + std::make_unique( + httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params)); #else - UNUSED(port); - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support."); + UNUSED(port); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support."); #endif - }); + }); + } - /// TCP - port_name = "tcp_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::TCP)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "native protocol (tcp): " + address.toString(), - std::make_unique( - new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - }); + /// TCP + port_name = "tcp_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "native protocol (tcp): " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); + } - /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt - port_name = "tcp_with_proxy_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::TCP_WITH_PROXY)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "native protocol (tcp) with PROXY: " + address.toString(), - std::make_unique( - new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - }); + /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt + port_name = "tcp_with_proxy_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "native protocol (tcp) with PROXY: " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); + } - /// TCP with SSL - port_name = "tcp_port_secure"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::TCP_SECURE)) { -#if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "secure native protocol (tcp_secure): " + address.toString(), - std::make_unique( - new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false), - server_pool, - socket, - new Poco::Net::TCPServerParams)); -#else - UNUSED(port); - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); -#endif - }); + /// TCP with SSL + port_name = "tcp_port_secure"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + #if USE_SSL + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "secure native protocol (tcp_secure): " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + #else + UNUSED(port); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); + #endif + }); + } - port_name = "mysql_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::MYSQL)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(Poco::Timespan()); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "MySQL compatibility protocol: " + address.toString(), - std::make_unique(new MySQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); - }); + port_name = "mysql_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(Poco::Timespan()); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "MySQL compatibility protocol: " + address.toString(), + std::make_unique(new MySQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); + }); + } - port_name = "postgresql_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::POSTGRESQL)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(Poco::Timespan()); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "PostgreSQL compatibility protocol: " + address.toString(), - std::make_unique(new PostgreSQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); - }); + port_name = "postgresql_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(Poco::Timespan()); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "PostgreSQL compatibility protocol: " + address.toString(), + std::make_unique(new PostgreSQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); + }); + } #if USE_GRPC - port_name = "grpc_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::GRPC)) { - Poco::Net::SocketAddress server_address(listen_host, port); - return ProtocolServerAdapter( - listen_host, - port_name, - "gRPC protocol: " + server_address.toString(), - std::make_unique(*this, makeSocketAddress(listen_host, port, &logger()))); - }); + port_name = "grpc_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::SocketAddress server_address(listen_host, port); + return ProtocolServerAdapter( + listen_host, + port_name, + "gRPC protocol: " + server_address.toString(), + std::make_unique(*this, makeSocketAddress(listen_host, port, &logger()))); + }); + } #endif - - /// Prometheus (if defined and not setup yet with http_port) - port_name = "prometheus.port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::PROMETHEUS)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "Prometheus: http://" + address.toString(), - std::make_unique( - httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); - }); + /// Prometheus (if defined and not setup yet with http_port) + port_name = "prometheus.port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "Prometheus: http://" + address.toString(), + std::make_unique( + httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); + }); + } } } @@ -2224,7 +2274,8 @@ void Server::createInterserverServers( Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, std::vector & servers, - bool start_servers) + bool start_servers, + const ServerType & server_type) { const Settings & settings = global_context->getSettingsRef(); @@ -2236,52 +2287,97 @@ void Server::createInterserverServers( /// Now iterate over interserver_listen_hosts for (const auto & interserver_listen_host : interserver_listen_hosts) { - /// Interserver IO HTTP - const char * port_name = "interserver_http_port"; - createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, interserver_listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - interserver_listen_host, - port_name, - "replica communication (interserver): http://" + address.toString(), - std::make_unique( - httpContext(), - createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"), - server_pool, - socket, - http_params)); - }); + const char * port_name; - port_name = "interserver_https_port"; - createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTP)) { + /// Interserver IO HTTP + port_name = "interserver_http_port"; + createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, interserver_listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + interserver_listen_host, + port_name, + "replica communication (interserver): http://" + address.toString(), + std::make_unique( + httpContext(), + createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"), + server_pool, + socket, + http_params)); + }); + } + + if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTPS)) + { + port_name = "interserver_https_port"; + createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { #if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config, socket, interserver_listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - interserver_listen_host, - port_name, - "secure replica communication (interserver): https://" + address.toString(), - std::make_unique( - httpContext(), - createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"), - server_pool, - socket, - http_params)); + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config, socket, interserver_listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + interserver_listen_host, + port_name, + "secure replica communication (interserver): https://" + address.toString(), + std::make_unique( + httpContext(), + createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"), + server_pool, + socket, + http_params)); #else - UNUSED(port); - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); + UNUSED(port); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif - }); + }); + } } } +void Server::stopServers( + std::vector & servers, + const ServerType & server_type +) const +{ + Poco::Logger * log = &logger(); + + /// Remove servers once all their connections are closed + auto check_server = [&log](const char prefix[], auto & server) + { + if (!server.isStopping()) + return false; + size_t current_connections = server.currentConnections(); + LOG_DEBUG(log, "Server {}{}: {} ({} connections)", + server.getDescription(), + prefix, + !current_connections ? "finished" : "waiting", + current_connections); + return !current_connections; + }; + + std::erase_if(servers, std::bind_front(check_server, " (from one of previous remove)")); + + for (auto & server : servers) + { + if (!server.isStopping()) + { + const std::string server_port_name = server.getPortName(); + + if (server_type.shouldStop(server_port_name)) + server.stop(); + } + } + + std::erase_if(servers, std::bind_front(check_server, "")); +} + void Server::updateServers( Poco::Util::AbstractConfiguration & config, Poco::ThreadPool & server_pool, diff --git a/programs/server/Server.h b/programs/server/Server.h index d13378dcd65..3f03dd137ef 100644 --- a/programs/server/Server.h +++ b/programs/server/Server.h @@ -3,8 +3,9 @@ #include #include -#include "Server/HTTP/HTTPContext.h" +#include #include +#include #include /** Server provides three interfaces: @@ -106,7 +107,8 @@ private: Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, std::vector & servers, - bool start_servers = false); + bool start_servers = false, + const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL)); void createInterserverServers( Poco::Util::AbstractConfiguration & config, @@ -115,7 +117,8 @@ private: Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, std::vector & servers, - bool start_servers = false); + bool start_servers = false, + const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL)); void updateServers( Poco::Util::AbstractConfiguration & config, @@ -123,6 +126,11 @@ private: AsynchronousMetrics & async_metrics, std::vector & servers, std::vector & servers_to_start_before_tables); + + void stopServers( + std::vector & servers, + const ServerType & server_type + ) const; }; } diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 374a1dd04a4..b253a0e13ce 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -187,6 +187,7 @@ enum class AccessType M(SYSTEM_THREAD_FUZZER, "SYSTEM START THREAD FUZZER, SYSTEM STOP THREAD FUZZER, START THREAD FUZZER, STOP THREAD FUZZER", GLOBAL, SYSTEM) \ M(SYSTEM_UNFREEZE, "SYSTEM UNFREEZE", GLOBAL, SYSTEM) \ M(SYSTEM_FAILPOINT, "SYSTEM ENABLE FAILPOINT, SYSTEM DISABLE FAILPOINT", GLOBAL, SYSTEM) \ + M(SYSTEM_LISTEN, "SYSTEM START LISTEN, SYSTEM STOP LISTEN", GLOBAL, SYSTEM) \ M(SYSTEM, "", GROUP, ALL) /* allows to execute SYSTEM {SHUTDOWN|RELOAD CONFIG|...} */ \ \ M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\ diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index cc77e0fe723..f83e524ffb9 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -357,6 +358,9 @@ struct ContextSharedPart : boost::noncopyable Context::ConfigReloadCallback config_reload_callback; + Context::StartStopServersCallback start_servers_callback; + Context::StartStopServersCallback stop_servers_callback; + bool is_server_completely_started = false; #if USE_ROCKSDB @@ -3688,6 +3692,36 @@ void Context::reloadConfig() const shared->config_reload_callback(); } +void Context::setStartServersCallback(StartStopServersCallback && callback) +{ + /// Is initialized at server startup, so lock isn't required. Otherwise use mutex. + shared->start_servers_callback = std::move(callback); +} + +void Context::setStopServersCallback(StartStopServersCallback && callback) +{ + /// Is initialized at server startup, so lock isn't required. Otherwise use mutex. + shared->stop_servers_callback = std::move(callback); +} + +void Context::startServers(const ServerType & server_type) const +{ + /// Use mutex if callback may be changed after startup. + if (!shared->start_servers_callback) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't start servers because start_servers_callback is not set."); + + shared->start_servers_callback(server_type); +} + +void Context::stopServers(const ServerType & server_type) const +{ + /// Use mutex if callback may be changed after startup. + if (!shared->stop_servers_callback) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't stop servers because stop_servers_callback is not set."); + + shared->stop_servers_callback(server_type); +} + void Context::shutdown() { diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index fa210f04451..75752774d4c 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -134,6 +134,7 @@ using StoragePolicyPtr = std::shared_ptr; using StoragePoliciesMap = std::map; class StoragePolicySelector; using StoragePolicySelectorPtr = std::shared_ptr; +class ServerType; template class MergeTreeBackgroundExecutor; @@ -1057,6 +1058,13 @@ public: void setConfigReloadCallback(ConfigReloadCallback && callback); void reloadConfig() const; + using StartStopServersCallback = std::function; + void setStartServersCallback(StartStopServersCallback && callback); + void setStopServersCallback(StartStopServersCallback && callback); + + void startServers(const ServerType & server_type) const; + void stopServers(const ServerType & server_type) const; + void shutdown(); bool isInternalQuery() const { return is_internal_query; } diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 1bd30e06888..3207da9941a 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -556,6 +556,14 @@ BlockIO InterpreterSystemQuery::execute() ); break; } + case Type::STOP_LISTEN: + getContext()->checkAccess(AccessType::SYSTEM_LISTEN); + getContext()->stopServers(query.server_type); + break; + case Type::START_LISTEN: + getContext()->checkAccess(AccessType::SYSTEM_LISTEN); + getContext()->startServers(query.server_type); + break; case Type::FLUSH_ASYNC_INSERT_QUEUE: { getContext()->checkAccess(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE); @@ -567,9 +575,6 @@ BlockIO InterpreterSystemQuery::execute() queue->flushAll(); break; } - case Type::STOP_LISTEN_QUERIES: - case Type::START_LISTEN_QUERIES: - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not supported yet", query.type); case Type::STOP_THREAD_FUZZER: getContext()->checkAccess(AccessType::SYSTEM_THREAD_FUZZER); ThreadFuzzer::stop(); @@ -1181,8 +1186,12 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_SYNC_FILE_CACHE); break; } - case Type::STOP_LISTEN_QUERIES: - case Type::START_LISTEN_QUERIES: + case Type::STOP_LISTEN: + case Type::START_LISTEN: + { + required_access.emplace_back(AccessType::SYSTEM_LISTEN); + break; + } case Type::STOP_THREAD_FUZZER: case Type::START_THREAD_FUZZER: case Type::ENABLE_FAILPOINT: diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index a91449ff035..754eb825dcc 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -220,6 +220,17 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, { settings.ostr << (settings.hilite ? hilite_none : ""); } + else if (type == Type::START_LISTEN || type == Type::STOP_LISTEN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " " << ServerType::serverTypeToString(server_type.type) + << (settings.hilite ? hilite_none : ""); + + if (server_type.type == ServerType::CUSTOM) + { + settings.ostr << (settings.hilite ? hilite_identifier : "") << " " << backQuoteIfNeed(server_type.custom_name); + } + + } } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index b18f8fc7b07..ebaf357c0ab 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -3,6 +3,7 @@ #include #include #include +#include #include "config.h" @@ -35,8 +36,8 @@ public: #if USE_AWS_S3 DROP_S3_CLIENT_CACHE, #endif - STOP_LISTEN_QUERIES, - START_LISTEN_QUERIES, + STOP_LISTEN, + START_LISTEN, RESTART_REPLICAS, RESTART_REPLICA, RESTORE_REPLICA, @@ -116,6 +117,8 @@ public: SyncReplicaMode sync_replica_mode = SyncReplicaMode::DEFAULT; + ServerType server_type; + String getID(char) const override { return "SYSTEM query"; } ASTPtr clone() const override diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 48dbe60e241..9aff0e8879e 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -442,6 +442,42 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & break; } + case Type::START_LISTEN: + case Type::STOP_LISTEN: + { + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; + + ServerType::Type current_type = ServerType::Type::END; + std::string current_custom_name; + + for (const auto & type : magic_enum::enum_values()) + { + if (ParserKeyword{ServerType::serverTypeToString(type)}.ignore(pos, expected)) + { + current_type = type; + break; + } + } + + if (current_type == ServerType::Type::END) + return false; + + if (current_type == ServerType::CUSTOM) + { + ASTPtr ast; + + if (!ParserStringLiteral{}.parse(pos, ast, expected)) + return false; + + current_custom_name = ast->as().value.get(); + } + + res->server_type = ServerType(current_type, current_custom_name); + + break; + } + default: { if (!parseQueryWithOnCluster(res, pos, expected)) diff --git a/src/Parsers/examples/CMakeLists.txt b/src/Parsers/examples/CMakeLists.txt index 82ca7bc0688..e411574bd65 100644 --- a/src/Parsers/examples/CMakeLists.txt +++ b/src/Parsers/examples/CMakeLists.txt @@ -3,8 +3,8 @@ set(SRCS) clickhouse_add_executable(lexer lexer.cpp ${SRCS}) target_link_libraries(lexer PRIVATE clickhouse_parsers) -clickhouse_add_executable(select_parser select_parser.cpp ${SRCS}) +clickhouse_add_executable(select_parser select_parser.cpp ${SRCS} "../../Server/ServerType.cpp") target_link_libraries(select_parser PRIVATE clickhouse_parsers) -clickhouse_add_executable(create_parser create_parser.cpp ${SRCS}) +clickhouse_add_executable(create_parser create_parser.cpp ${SRCS} "../../Server/ServerType.cpp") target_link_libraries(create_parser PRIVATE clickhouse_parsers) diff --git a/src/Server/ServerType.cpp b/src/Server/ServerType.cpp new file mode 100644 index 00000000000..c6916ee39d9 --- /dev/null +++ b/src/Server/ServerType.cpp @@ -0,0 +1,138 @@ +#include + +#include +#include +#include + +#include + +namespace DB +{ + +namespace +{ + std::vector getTypeIndexToTypeName() + { + constexpr std::size_t types_size = magic_enum::enum_count(); + + std::vector type_index_to_type_name; + type_index_to_type_name.resize(types_size); + + auto entries = magic_enum::enum_entries(); + for (const auto & [entry, str] : entries) + { + auto str_copy = String(str); + std::replace(str_copy.begin(), str_copy.end(), '_', ' '); + type_index_to_type_name[static_cast(entry)] = std::move(str_copy); + } + + return type_index_to_type_name; + } +} + +const char * ServerType::serverTypeToString(ServerType::Type type) +{ + /** During parsing if SystemQuery is not parsed properly it is added to Expected variants as description check IParser.h. + * Description string must be statically allocated. + */ + static std::vector type_index_to_type_name = getTypeIndexToTypeName(); + const auto & type_name = type_index_to_type_name[static_cast(type)]; + return type_name.data(); +} + +bool ServerType::shouldStart(Type server_type, const std::string & custom_name_) const +{ + if (type == Type::QUERIES_ALL) + return true; + + if (type == Type::QUERIES_DEFAULT) + { + switch (server_type) + { + case Type::TCP: + case Type::TCP_WITH_PROXY: + case Type::TCP_SECURE: + case Type::HTTP: + case Type::HTTPS: + case Type::MYSQL: + case Type::GRPC: + case Type::POSTGRESQL: + case Type::PROMETHEUS: + case Type::INTERSERVER_HTTP: + case Type::INTERSERVER_HTTPS: + return true; + default: + return false; + } + } + + if (type == Type::QUERIES_CUSTOM) + { + switch (server_type) + { + case Type::CUSTOM: + return true; + default: + return false; + } + } + + return type == server_type && custom_name == custom_name_; +} + +bool ServerType::shouldStop(const std::string & port_name) const +{ + Type port_type; + std::string port_custom_name; + + if (port_name == "http_port") + port_type = Type::HTTP; + + else if (port_name == "https_port") + port_type = Type::HTTPS; + + else if (port_name == "tcp_port") + port_type = Type::TCP; + + else if (port_name == "tcp_with_proxy_port") + port_type = Type::TCP_WITH_PROXY; + + else if (port_name == "tcp_port_secure") + port_type = Type::TCP_SECURE; + + else if (port_name == "mysql_port") + port_type = Type::MYSQL; + + else if (port_name == "postgresql_port") + port_type = Type::POSTGRESQL; + + else if (port_name == "grpc_port") + port_type = Type::GRPC; + + else if (port_name == "prometheus.port") + port_type = Type::PROMETHEUS; + + else if (port_name == "interserver_http_port") + port_type = Type::INTERSERVER_HTTP; + + else if (port_name == "interserver_https_port") + port_type = Type::INTERSERVER_HTTPS; + + else if (port_name.starts_with("protocols.") && port_name.ends_with(".port")) + { + constexpr size_t protocols_size = std::string_view("protocols.").size(); + constexpr size_t port_size = std::string_view("protocols.").size(); + + port_type = Type::CUSTOM; + port_custom_name = port_name.substr(protocols_size, port_name.size() - port_size); + } + else + port_type = Type::UNKNOWN; + + if (port_type == Type::UNKNOWN) + return false; + + return shouldStart(type, port_custom_name); +} + +} diff --git a/src/Server/ServerType.h b/src/Server/ServerType.h new file mode 100644 index 00000000000..345d1a10119 --- /dev/null +++ b/src/Server/ServerType.h @@ -0,0 +1,44 @@ +#pragma once + +#include +namespace DB +{ + +class ServerType +{ +public: + + enum Type + { + UNKNOWN, + TCP, + TCP_WITH_PROXY, + TCP_SECURE, + HTTP, + HTTPS, + MYSQL, + GRPC, + POSTGRESQL, + PROMETHEUS, + CUSTOM, + INTERSERVER_HTTP, + INTERSERVER_HTTPS, + QUERIES_ALL, + QUERIES_DEFAULT, + QUERIES_CUSTOM, + END + }; + + ServerType() = default; + explicit ServerType(Type type_, const std::string & custom_name_ = "") : type(type_), custom_name(custom_name_) {} + + static const char * serverTypeToString(Type type); + + bool shouldStart(Type server_type, const std::string & custom_name_ = "") const; + bool shouldStop(const std::string & port_name) const; + + Type type; + std::string custom_name; +}; + +} diff --git a/tests/integration/test_system_start_stop_listen/__init__.py b/tests/integration/test_system_start_stop_listen/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_system_start_stop_listen/configs/cluster.xml b/tests/integration/test_system_start_stop_listen/configs/cluster.xml new file mode 100644 index 00000000000..93d8f890f40 --- /dev/null +++ b/tests/integration/test_system_start_stop_listen/configs/cluster.xml @@ -0,0 +1,16 @@ + + + + + + node1 + 9000 + + + node2 + 9000 + + + + + diff --git a/tests/integration/test_system_start_stop_listen/test.py b/tests/integration/test_system_start_stop_listen/test.py new file mode 100644 index 00000000000..ec1a000c599 --- /dev/null +++ b/tests/integration/test_system_start_stop_listen/test.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 + + +import pytest +import time +from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager +from helpers.test_tools import assert_eq_with_retry +import random +import string +import json + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance( + "node1", main_configs=["configs/cluster.xml"], with_zookeeper=True +) +node2 = cluster.add_instance( + "node2", main_configs=["configs/cluster.xml"], with_zookeeper=True +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + + +def test_system_start_stop_listen_queries(started_cluster): + node1.query("SYSTEM STOP LISTEN QUERIES ALL") + + assert "Connection refused" in node1.query_and_get_error("SELECT 1", timeout=3) + + node2.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL") + + node1.query("SELECT 1") diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index e6f7fa1ed2b..db0f2d8235b 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -138,6 +138,7 @@ SYSTEM FLUSH [] \N SYSTEM SYSTEM THREAD FUZZER ['SYSTEM START THREAD FUZZER','SYSTEM STOP THREAD FUZZER','START THREAD FUZZER','STOP THREAD FUZZER'] GLOBAL SYSTEM SYSTEM UNFREEZE ['SYSTEM UNFREEZE'] GLOBAL SYSTEM SYSTEM FAILPOINT ['SYSTEM ENABLE FAILPOINT','SYSTEM DISABLE FAILPOINT'] GLOBAL SYSTEM +SYSTEM LISTEN ['SYSTEM START LISTEN','SYSTEM STOP LISTEN'] GLOBAL SYSTEM SYSTEM [] \N ALL dictGet ['dictHas','dictGetHierarchy','dictIsIn'] DICTIONARY ALL displaySecretsInShowAndSelect [] GLOBAL ALL diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index d58c76260c5..46d1f0e3a0b 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -297,7 +297,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM LISTEN' = 140, 'SYSTEM' = 141, 'dictGet' = 142, 'displaySecretsInShowAndSelect' = 143, 'addressToLine' = 144, 'addressToLineWithInlines' = 145, 'addressToSymbol' = 146, 'demangle' = 147, 'INTROSPECTION' = 148, 'FILE' = 149, 'URL' = 150, 'REMOTE' = 151, 'MONGO' = 152, 'REDIS' = 153, 'MEILISEARCH' = 154, 'MYSQL' = 155, 'POSTGRES' = 156, 'SQLITE' = 157, 'ODBC' = 158, 'JDBC' = 159, 'HDFS' = 160, 'S3' = 161, 'HIVE' = 162, 'AZURE' = 163, 'SOURCES' = 164, 'CLUSTER' = 165, 'ALL' = 166, 'NONE' = 167), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -584,10 +584,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM LISTEN' = 140, 'SYSTEM' = 141, 'dictGet' = 142, 'displaySecretsInShowAndSelect' = 143, 'addressToLine' = 144, 'addressToLineWithInlines' = 145, 'addressToSymbol' = 146, 'demangle' = 147, 'INTROSPECTION' = 148, 'FILE' = 149, 'URL' = 150, 'REMOTE' = 151, 'MONGO' = 152, 'REDIS' = 153, 'MEILISEARCH' = 154, 'MYSQL' = 155, 'POSTGRES' = 156, 'SQLITE' = 157, 'ODBC' = 158, 'JDBC' = 159, 'HDFS' = 160, 'S3' = 161, 'HIVE' = 162, 'AZURE' = 163, 'SOURCES' = 164, 'CLUSTER' = 165, 'ALL' = 166, 'NONE' = 167), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM LISTEN' = 140, 'SYSTEM' = 141, 'dictGet' = 142, 'displaySecretsInShowAndSelect' = 143, 'addressToLine' = 144, 'addressToLineWithInlines' = 145, 'addressToSymbol' = 146, 'demangle' = 147, 'INTROSPECTION' = 148, 'FILE' = 149, 'URL' = 150, 'REMOTE' = 151, 'MONGO' = 152, 'REDIS' = 153, 'MEILISEARCH' = 154, 'MYSQL' = 155, 'POSTGRES' = 156, 'SQLITE' = 157, 'ODBC' = 158, 'JDBC' = 159, 'HDFS' = 160, 'S3' = 161, 'HIVE' = 162, 'AZURE' = 163, 'SOURCES' = 164, 'CLUSTER' = 165, 'ALL' = 166, 'NONE' = 167)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' From 954a1d3edec8117a135c23b7ec60065249fa0f02 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 26 Jul 2023 23:38:14 +0300 Subject: [PATCH 1997/1997] Improve logging macros (#52519) * improve logging macros * fix * Update logger_useful.h * fix * fix --- base/poco/Foundation/include/Poco/Message.h | 2 + base/poco/Foundation/src/Message.cpp | 13 +++ src/Common/LoggingFormatStringHelpers.h | 63 ++++++++---- src/Common/logger_useful.h | 97 ++++++++++++++----- src/Common/tests/gtest_log.cpp | 53 ++++++++++ .../01164_detach_attach_partition_race.sh | 2 +- 6 files changed, 189 insertions(+), 41 deletions(-) diff --git a/base/poco/Foundation/include/Poco/Message.h b/base/poco/Foundation/include/Poco/Message.h index e8f04888ab4..282c7fb5fd1 100644 --- a/base/poco/Foundation/include/Poco/Message.h +++ b/base/poco/Foundation/include/Poco/Message.h @@ -67,6 +67,8 @@ public: Message( const std::string & source, const std::string & text, Priority prio, const char * file, int line, std::string_view fmt_str = {}); + Message( + std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str); /// Creates a Message with the given source, text, priority, /// source file path and line. /// diff --git a/base/poco/Foundation/src/Message.cpp b/base/poco/Foundation/src/Message.cpp index 663c96e47a2..54118cc0fc5 100644 --- a/base/poco/Foundation/src/Message.cpp +++ b/base/poco/Foundation/src/Message.cpp @@ -60,6 +60,19 @@ Message::Message(const std::string& source, const std::string& text, Priority pr } +Message::Message(std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str): + _source(std::move(source)), + _text(std::move(text)), + _prio(prio), + _tid(0), + _file(file), + _line(line), + _pMap(0), + _fmt_str(fmt_str) +{ + init(); +} + Message::Message(const Message& msg): _source(msg._source), _text(msg._text), diff --git a/src/Common/LoggingFormatStringHelpers.h b/src/Common/LoggingFormatStringHelpers.h index 3afa3fb089d..6dffd202807 100644 --- a/src/Common/LoggingFormatStringHelpers.h +++ b/src/Common/LoggingFormatStringHelpers.h @@ -43,6 +43,17 @@ struct PreformattedMessage operator const std::string & () const { return text; } operator std::string () && { return std::move(text); } operator fmt::format_string<> () const { UNREACHABLE(); } + + void apply(std::string & out_text, std::string_view & out_format_string) const & + { + out_text = text; + out_format_string = format_string; + } + void apply(std::string & out_text, std::string_view & out_format_string) && + { + out_text = std::move(text); + out_format_string = format_string; + } }; template @@ -99,10 +110,33 @@ template constexpr std::string_view tryGetStaticFormatString(T && x } } +/// Constexpr ifs are not like ifdefs, and compiler still checks that unneeded code can be compiled +/// This template is useful to avoid compilation failures when condition of some "constexpr if" is false +template struct ConstexprIfsAreNotIfdefs +{ + template constexpr static std::string_view getStaticFormatString(T &&) { return {}; } + template static PreformattedMessage getPreformatted(T &&) { return {}; } +}; + +template<> struct ConstexprIfsAreNotIfdefs +{ + template consteval static std::string_view getStaticFormatString(T && x) + { + /// See tryGetStaticFormatString(...) + static_assert(!std::is_same_v>); + static_assert(std::is_nothrow_convertible::value); + static_assert(!std::is_pointer::value); + return std::string_view(x); + } + + template static T && getPreformatted(T && x) { return std::forward(x); } +}; + template constexpr size_t numArgs(Ts &&...) { return sizeof...(Ts); } template constexpr auto firstArg(T && x, Ts &&...) { return std::forward(x); } /// For implicit conversion of fmt::basic_runtime<> to char* for std::string ctor template constexpr auto firstArg(fmt::basic_runtime && data, Ts &&...) { return data.str.data(); } +template constexpr auto firstArg(const fmt::basic_runtime & data, Ts &&...) { return data.str.data(); } consteval ssize_t formatStringCountArgsNum(const char * const str, size_t len) { @@ -142,26 +176,19 @@ consteval void formatStringCheckArgsNumImpl(std::string_view str, size_t nargs) functionThatFailsCompilationOfConstevalFunctions("unexpected number of arguments in a format string"); } -template -struct CheckArgsNumHelperImpl +template +consteval void formatStringCheckArgsNum(T && str, size_t nargs) { - template - consteval CheckArgsNumHelperImpl(T && str) - { - formatStringCheckArgsNumImpl(tryGetStaticFormatString(str), sizeof...(Args)); - } - - /// No checks for fmt::runtime and PreformattedMessage - template CheckArgsNumHelperImpl(fmt::basic_runtime &&) {} - template<> CheckArgsNumHelperImpl(PreformattedMessage &) {} - template<> CheckArgsNumHelperImpl(const PreformattedMessage &) {} - template<> CheckArgsNumHelperImpl(PreformattedMessage &&) {} - -}; - -template using CheckArgsNumHelper = CheckArgsNumHelperImpl...>; -template void formatStringCheckArgsNum(CheckArgsNumHelper, Args &&...) {} + formatStringCheckArgsNumImpl(tryGetStaticFormatString(str), nargs); +} +template inline void formatStringCheckArgsNum(fmt::basic_runtime &&, size_t) {} +template<> inline void formatStringCheckArgsNum(PreformattedMessage &, size_t) {} +template<> inline void formatStringCheckArgsNum(const PreformattedMessage &, size_t) {} +template<> inline void formatStringCheckArgsNum(PreformattedMessage &&, size_t) {} +template struct FormatStringTypeInfo{ static constexpr bool is_static = true; static constexpr bool has_format = true; }; +template struct FormatStringTypeInfo> { static constexpr bool is_static = false; static constexpr bool has_format = false; }; +template<> struct FormatStringTypeInfo { static constexpr bool is_static = false; static constexpr bool has_format = true; }; /// This wrapper helps to avoid too frequent and noisy log messages. /// For each pair (logger_name, format_string) it remembers when such a message was logged the last time. diff --git a/src/Common/logger_useful.h b/src/Common/logger_useful.h index 3ebb1d25075..d9fe5ac9190 100644 --- a/src/Common/logger_useful.h +++ b/src/Common/logger_useful.h @@ -1,7 +1,7 @@ #pragma once /// Macros for convenient usage of Poco logger. - +#include #include #include #include @@ -28,33 +28,86 @@ namespace #define LOG_IMPL_FIRST_ARG(X, ...) X +/// Copy-paste from contrib/libpq/include/c.h +/// There's no easy way to count the number of arguments without evaluating these arguments... +#define CH_VA_ARGS_NARGS(...) \ + CH_VA_ARGS_NARGS_(__VA_ARGS__, \ + 63,62,61,60, \ + 59,58,57,56,55,54,53,52,51,50, \ + 49,48,47,46,45,44,43,42,41,40, \ + 39,38,37,36,35,34,33,32,31,30, \ + 29,28,27,26,25,24,23,22,21,20, \ + 19,18,17,16,15,14,13,12,11,10, \ + 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#define CH_VA_ARGS_NARGS_( \ + _01,_02,_03,_04,_05,_06,_07,_08,_09,_10, \ + _11,_12,_13,_14,_15,_16,_17,_18,_19,_20, \ + _21,_22,_23,_24,_25,_26,_27,_28,_29,_30, \ + _31,_32,_33,_34,_35,_36,_37,_38,_39,_40, \ + _41,_42,_43,_44,_45,_46,_47,_48,_49,_50, \ + _51,_52,_53,_54,_55,_56,_57,_58,_59,_60, \ + _61,_62,_63, N, ...) \ + (N) + +#define LINE_NUM_AS_STRING_IMPL2(x) #x +#define LINE_NUM_AS_STRING_IMPL(x) LINE_NUM_AS_STRING_IMPL2(x) +#define LINE_NUM_AS_STRING LINE_NUM_AS_STRING_IMPL(__LINE__) +#define MESSAGE_FOR_EXCEPTION_ON_LOGGING "Failed to write a log message: " __FILE__ ":" LINE_NUM_AS_STRING "\n" + /// Logs a message to a specified logger with that level. /// If more than one argument is provided, /// the first argument is interpreted as a template with {}-substitutions /// and the latter arguments are treated as values to substitute. /// If only one argument is provided, it is treated as a message without substitutions. -#define LOG_IMPL(logger, priority, PRIORITY, ...) do \ -{ \ - auto _logger = ::getLogger(logger); \ - const bool _is_clients_log = (DB::CurrentThread::getGroup() != nullptr) && \ - (DB::CurrentThread::get().getClientLogsLevel() >= (priority)); \ - if (_is_clients_log || _logger->is((PRIORITY))) \ - { \ - std::string formatted_message = numArgs(__VA_ARGS__) > 1 ? fmt::format(__VA_ARGS__) : firstArg(__VA_ARGS__); \ - formatStringCheckArgsNum(__VA_ARGS__); \ - if (auto _channel = _logger->getChannel()) \ - { \ - std::string file_function; \ - file_function += __FILE__; \ - file_function += "; "; \ - file_function += __PRETTY_FUNCTION__; \ - Poco::Message poco_message(_logger->name(), formatted_message, \ - (PRIORITY), file_function.c_str(), __LINE__, tryGetStaticFormatString(LOG_IMPL_FIRST_ARG(__VA_ARGS__))); \ - _channel->log(poco_message); \ - } \ - ProfileEvents::incrementForLogMessage(PRIORITY); \ - } \ +#define LOG_IMPL(logger, priority, PRIORITY, ...) do \ +{ \ + auto _logger = ::getLogger(logger); \ + const bool _is_clients_log = (DB::CurrentThread::getGroup() != nullptr) && \ + (DB::CurrentThread::get().getClientLogsLevel() >= (priority)); \ + if (!_is_clients_log && !_logger->is((PRIORITY))) \ + break; \ + \ + try \ + { \ + ProfileEvents::incrementForLogMessage(PRIORITY); \ + auto _channel = _logger->getChannel(); \ + if (!_channel) \ + break; \ + \ + constexpr size_t _nargs = CH_VA_ARGS_NARGS(__VA_ARGS__); \ + using LogTypeInfo = FormatStringTypeInfo>; \ + \ + std::string_view _format_string; \ + std::string _formatted_message; \ + \ + if constexpr (LogTypeInfo::is_static) \ + { \ + formatStringCheckArgsNum(LOG_IMPL_FIRST_ARG(__VA_ARGS__), _nargs - 1); \ + _format_string = ConstexprIfsAreNotIfdefs::getStaticFormatString(LOG_IMPL_FIRST_ARG(__VA_ARGS__)); \ + } \ + \ + constexpr bool is_preformatted_message = !LogTypeInfo::is_static && LogTypeInfo::has_format; \ + if constexpr (is_preformatted_message) \ + { \ + static_assert(_nargs == 1 || !is_preformatted_message); \ + ConstexprIfsAreNotIfdefs::getPreformatted(LOG_IMPL_FIRST_ARG(__VA_ARGS__)).apply(_formatted_message, _format_string); \ + } \ + else \ + { \ + _formatted_message = _nargs == 1 ? firstArg(__VA_ARGS__) : fmt::format(__VA_ARGS__); \ + } \ + \ + std::string _file_function = __FILE__ "; "; \ + _file_function += __PRETTY_FUNCTION__; \ + Poco::Message _poco_message(_logger->name(), std::move(_formatted_message), \ + (PRIORITY), _file_function.c_str(), __LINE__, _format_string); \ + _channel->log(_poco_message); \ + } \ + catch (...) \ + { \ + ::write(STDERR_FILENO, static_cast(MESSAGE_FOR_EXCEPTION_ON_LOGGING), sizeof(MESSAGE_FOR_EXCEPTION_ON_LOGGING)); \ + } \ } while (false) diff --git a/src/Common/tests/gtest_log.cpp b/src/Common/tests/gtest_log.cpp index f92866626f9..e755c22ba75 100644 --- a/src/Common/tests/gtest_log.cpp +++ b/src/Common/tests/gtest_log.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -50,3 +51,55 @@ TEST(Logger, TestLog) } } + +static size_t global_counter = 0; + +static std::string getLogMessage() +{ + ++global_counter; + return "test1 " + std::to_string(thread_local_rng()); +} + +static size_t getLogMessageParam() +{ + ++global_counter; + return thread_local_rng(); +} + +static PreformattedMessage getPreformatted() +{ + ++global_counter; + return PreformattedMessage::create("test3 {}", thread_local_rng()); +} + +static size_t getLogMessageParamOrThrow() +{ + size_t x = thread_local_rng(); + if (x % 1000 == 0) + return x; + throw Poco::Exception("error", 42); +} + +TEST(Logger, SideEffects) +{ + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + auto my_channel = Poco::AutoPtr(new Poco::StreamChannel(oss)); + auto * log = &Poco::Logger::create("Logger", my_channel.get()); + log->setLevel("trace"); + + /// Ensure that parameters are evaluated only once + global_counter = 0; + LOG_TRACE(log, fmt::runtime(getLogMessage())); + EXPECT_EQ(global_counter, 1); + LOG_TRACE(log, "test2 {}", getLogMessageParam()); + EXPECT_EQ(global_counter, 2); + LOG_TRACE(log, getPreformatted()); + EXPECT_EQ(global_counter, 3); + + auto var = PreformattedMessage::create("test4 {}", thread_local_rng()); + LOG_TRACE(log, var); + EXPECT_EQ(var.text.starts_with("test4 "), true); + EXPECT_EQ(var.format_string, "test4 {}"); + + LOG_TRACE(log, "test no throw {}", getLogMessageParamOrThrow()); +} diff --git a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh index e645cb5aae7..07b39723c37 100755 --- a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh +++ b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "drop table if exists mt" -$CLICKHOUSE_CLIENT -q "create table mt (n int) engine=MergeTree order by n settings parts_to_throw_insert=1000" +$CLICKHOUSE_CLIENT -q "create table mt (n int) engine=MergeTree order by n settings parts_to_throw_insert=5000" $CLICKHOUSE_CLIENT -q "insert into mt values (1)" $CLICKHOUSE_CLIENT -q "insert into mt values (2)" $CLICKHOUSE_CLIENT -q "insert into mt values (3)"